blob: abc924c9467cd596fc53da8612c1783068162ffd [file] [log] [blame]
Josef Bacik0f9dd462008-09-23 13:14:11 -04001/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Josef Bacik96303082009-07-13 21:29:25 -040019#include <linux/pagemap.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040020#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090021#include <linux/slab.h>
Josef Bacik96303082009-07-13 21:29:25 -040022#include <linux/math64.h>
Josef Bacik6ab60602011-08-08 08:24:46 -040023#include <linux/ratelimit.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040024#include "ctree.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040025#include "free-space-cache.h"
26#include "transaction.h"
Josef Bacik0af3d002010-06-21 14:48:16 -040027#include "disk-io.h"
Josef Bacik43be2142011-04-01 14:55:00 +000028#include "extent_io.h"
Li Zefan581bb052011-04-20 10:06:11 +080029#include "inode-map.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040030
Josef Bacik96303082009-07-13 21:29:25 -040031#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
Li Zefan34d52cb2011-03-29 13:46:06 +080034static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0cb59c92010-07-02 12:14:14 -040035 struct btrfs_free_space *info);
36
Li Zefan0414efa2011-04-20 10:20:14 +080037static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path,
39 u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -040040{
41 struct btrfs_key key;
42 struct btrfs_key location;
43 struct btrfs_disk_key disk_key;
44 struct btrfs_free_space_header *header;
45 struct extent_buffer *leaf;
46 struct inode *inode = NULL;
47 int ret;
48
Josef Bacik0af3d002010-06-21 14:48:16 -040049 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +080050 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -040051 key.type = 0;
52
53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 if (ret < 0)
55 return ERR_PTR(ret);
56 if (ret > 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +020057 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040058 return ERR_PTR(-ENOENT);
59 }
60
61 leaf = path->nodes[0];
62 header = btrfs_item_ptr(leaf, path->slots[0],
63 struct btrfs_free_space_header);
64 btrfs_free_space_key(leaf, header, &disk_key);
65 btrfs_disk_key_to_cpu(&location, &disk_key);
David Sterbab3b4aa72011-04-21 01:20:15 +020066 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040067
68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 if (!inode)
70 return ERR_PTR(-ENOENT);
71 if (IS_ERR(inode))
72 return inode;
73 if (is_bad_inode(inode)) {
74 iput(inode);
75 return ERR_PTR(-ENOENT);
76 }
77
Miao Xieadae52b2011-03-31 09:43:23 +000078 inode->i_mapping->flags &= ~__GFP_FS;
79
Li Zefan0414efa2011-04-20 10:20:14 +080080 return inode;
81}
82
83struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 struct btrfs_block_group_cache
85 *block_group, struct btrfs_path *path)
86{
87 struct inode *inode = NULL;
88
89 spin_lock(&block_group->lock);
90 if (block_group->inode)
91 inode = igrab(block_group->inode);
92 spin_unlock(&block_group->lock);
93 if (inode)
94 return inode;
95
96 inode = __lookup_free_space_inode(root, path,
97 block_group->key.objectid);
98 if (IS_ERR(inode))
99 return inode;
100
Josef Bacik0af3d002010-06-21 14:48:16 -0400101 spin_lock(&block_group->lock);
Josef Bacik2f356122011-06-10 15:31:13 -0400102 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
103 printk(KERN_INFO "Old style space inode found, converting.\n");
104 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
105 block_group->disk_cache_state = BTRFS_DC_CLEAR;
106 }
107
Josef Bacik300e4f82011-08-29 14:06:00 -0400108 if (!block_group->iref) {
Josef Bacik0af3d002010-06-21 14:48:16 -0400109 block_group->inode = igrab(inode);
110 block_group->iref = 1;
111 }
112 spin_unlock(&block_group->lock);
113
114 return inode;
115}
116
Li Zefan0414efa2011-04-20 10:20:14 +0800117int __create_free_space_inode(struct btrfs_root *root,
118 struct btrfs_trans_handle *trans,
119 struct btrfs_path *path, u64 ino, u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -0400120{
121 struct btrfs_key key;
122 struct btrfs_disk_key disk_key;
123 struct btrfs_free_space_header *header;
124 struct btrfs_inode_item *inode_item;
125 struct extent_buffer *leaf;
Josef Bacik0af3d002010-06-21 14:48:16 -0400126 int ret;
127
Li Zefan0414efa2011-04-20 10:20:14 +0800128 ret = btrfs_insert_empty_inode(trans, root, path, ino);
Josef Bacik0af3d002010-06-21 14:48:16 -0400129 if (ret)
130 return ret;
131
132 leaf = path->nodes[0];
133 inode_item = btrfs_item_ptr(leaf, path->slots[0],
134 struct btrfs_inode_item);
135 btrfs_item_key(leaf, &disk_key, path->slots[0]);
136 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
137 sizeof(*inode_item));
138 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
139 btrfs_set_inode_size(leaf, inode_item, 0);
140 btrfs_set_inode_nbytes(leaf, inode_item, 0);
141 btrfs_set_inode_uid(leaf, inode_item, 0);
142 btrfs_set_inode_gid(leaf, inode_item, 0);
143 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
144 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
Josef Bacik2f356122011-06-10 15:31:13 -0400145 BTRFS_INODE_PREALLOC);
Josef Bacik0af3d002010-06-21 14:48:16 -0400146 btrfs_set_inode_nlink(leaf, inode_item, 1);
147 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
Li Zefan0414efa2011-04-20 10:20:14 +0800148 btrfs_set_inode_block_group(leaf, inode_item, offset);
Josef Bacik0af3d002010-06-21 14:48:16 -0400149 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200150 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400151
152 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800153 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -0400154 key.type = 0;
155
156 ret = btrfs_insert_empty_item(trans, root, path, &key,
157 sizeof(struct btrfs_free_space_header));
158 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200159 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400160 return ret;
161 }
162 leaf = path->nodes[0];
163 header = btrfs_item_ptr(leaf, path->slots[0],
164 struct btrfs_free_space_header);
165 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
166 btrfs_set_free_space_key(leaf, header, &disk_key);
167 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200168 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400169
170 return 0;
171}
172
Li Zefan0414efa2011-04-20 10:20:14 +0800173int create_free_space_inode(struct btrfs_root *root,
174 struct btrfs_trans_handle *trans,
175 struct btrfs_block_group_cache *block_group,
176 struct btrfs_path *path)
177{
178 int ret;
179 u64 ino;
180
181 ret = btrfs_find_free_objectid(root, &ino);
182 if (ret < 0)
183 return ret;
184
185 return __create_free_space_inode(root, trans, path, ino,
186 block_group->key.objectid);
187}
188
Josef Bacik0af3d002010-06-21 14:48:16 -0400189int btrfs_truncate_free_space_cache(struct btrfs_root *root,
190 struct btrfs_trans_handle *trans,
191 struct btrfs_path *path,
192 struct inode *inode)
193{
Liu Bo65450aa2011-09-11 10:52:24 -0400194 struct btrfs_block_rsv *rsv;
Josef Bacik0af3d002010-06-21 14:48:16 -0400195 loff_t oldsize;
196 int ret = 0;
197
Liu Bo65450aa2011-09-11 10:52:24 -0400198 rsv = trans->block_rsv;
Josef Bacik0af3d002010-06-21 14:48:16 -0400199 trans->block_rsv = root->orphan_block_rsv;
Josef Bacik4a92b1b2011-08-30 12:34:28 -0400200 ret = btrfs_block_rsv_check(root, root->orphan_block_rsv, 0, 5, 0);
Josef Bacik0af3d002010-06-21 14:48:16 -0400201 if (ret)
202 return ret;
203
204 oldsize = i_size_read(inode);
205 btrfs_i_size_write(inode, 0);
206 truncate_pagecache(inode, oldsize, 0);
207
208 /*
209 * We don't need an orphan item because truncating the free space cache
210 * will never be split across transactions.
211 */
212 ret = btrfs_truncate_inode_items(trans, root, inode,
213 0, BTRFS_EXTENT_DATA_KEY);
Liu Bo65450aa2011-09-11 10:52:24 -0400214
215 trans->block_rsv = rsv;
Josef Bacik0af3d002010-06-21 14:48:16 -0400216 if (ret) {
217 WARN_ON(1);
218 return ret;
219 }
220
Li Zefan82d59022011-04-20 10:33:24 +0800221 ret = btrfs_update_inode(trans, root, inode);
222 return ret;
Josef Bacik0af3d002010-06-21 14:48:16 -0400223}
224
Josef Bacik9d66e232010-08-25 16:54:15 -0400225static int readahead_cache(struct inode *inode)
226{
227 struct file_ra_state *ra;
228 unsigned long last_index;
229
230 ra = kzalloc(sizeof(*ra), GFP_NOFS);
231 if (!ra)
232 return -ENOMEM;
233
234 file_ra_state_init(ra, inode->i_mapping);
235 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
236
237 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
238
239 kfree(ra);
240
241 return 0;
242}
243
Josef Bacika67509c2011-10-05 15:18:58 -0400244struct io_ctl {
245 void *cur, *orig;
246 struct page *page;
247 struct page **pages;
248 struct btrfs_root *root;
249 unsigned long size;
250 int index;
251 int num_pages;
252};
253
254static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
255 struct btrfs_root *root)
256{
257 memset(io_ctl, 0, sizeof(struct io_ctl));
258 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
259 PAGE_CACHE_SHIFT;
260 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
261 GFP_NOFS);
262 if (!io_ctl->pages)
263 return -ENOMEM;
264 io_ctl->root = root;
265 return 0;
266}
267
268static void io_ctl_free(struct io_ctl *io_ctl)
269{
270 kfree(io_ctl->pages);
271}
272
273static void io_ctl_unmap_page(struct io_ctl *io_ctl)
274{
275 if (io_ctl->cur) {
276 kunmap(io_ctl->page);
277 io_ctl->cur = NULL;
278 io_ctl->orig = NULL;
279 }
280}
281
282static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
283{
284 WARN_ON(io_ctl->cur);
285 BUG_ON(io_ctl->index >= io_ctl->num_pages);
286 io_ctl->page = io_ctl->pages[io_ctl->index++];
287 io_ctl->cur = kmap(io_ctl->page);
288 io_ctl->orig = io_ctl->cur;
289 io_ctl->size = PAGE_CACHE_SIZE;
290 if (clear)
291 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
292}
293
294static void io_ctl_drop_pages(struct io_ctl *io_ctl)
295{
296 int i;
297
298 io_ctl_unmap_page(io_ctl);
299
300 for (i = 0; i < io_ctl->num_pages; i++) {
301 ClearPageChecked(io_ctl->pages[i]);
302 unlock_page(io_ctl->pages[i]);
303 page_cache_release(io_ctl->pages[i]);
304 }
305}
306
307static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
308 int uptodate)
309{
310 struct page *page;
311 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
312 int i;
313
314 for (i = 0; i < io_ctl->num_pages; i++) {
315 page = find_or_create_page(inode->i_mapping, i, mask);
316 if (!page) {
317 io_ctl_drop_pages(io_ctl);
318 return -ENOMEM;
319 }
320 io_ctl->pages[i] = page;
321 if (uptodate && !PageUptodate(page)) {
322 btrfs_readpage(NULL, page);
323 lock_page(page);
324 if (!PageUptodate(page)) {
325 printk(KERN_ERR "btrfs: error reading free "
326 "space cache\n");
327 io_ctl_drop_pages(io_ctl);
328 return -EIO;
329 }
330 }
331 }
332
333 return 0;
334}
335
336static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
337{
338 u64 *val;
339
340 io_ctl_map_page(io_ctl, 1);
341
342 /*
343 * Skip the first 64bits to make sure theres a bogus crc for old
344 * kernels
345 */
346 io_ctl->cur += sizeof(u64);
347
348 val = io_ctl->cur;
349 *val = cpu_to_le64(generation);
350 io_ctl->cur += sizeof(u64);
351 io_ctl->size -= sizeof(u64) * 2;
352}
353
354static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
355{
356 u64 *gen;
357
358 io_ctl_map_page(io_ctl, 0);
359
360 /* Skip the bogus crc area */
361 io_ctl->cur += sizeof(u64);
362 gen = io_ctl->cur;
363 if (le64_to_cpu(*gen) != generation) {
364 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
365 "(%Lu) does not match inode (%Lu)\n", *gen,
366 generation);
367 io_ctl_unmap_page(io_ctl);
368 return -EIO;
369 }
370 io_ctl->cur += sizeof(u64);
371 io_ctl->size -= sizeof(u64) * 2;
372 return 0;
373}
374
375static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
376 void *bitmap)
377{
378 struct btrfs_free_space_entry *entry;
379
380 if (!io_ctl->cur)
381 return -ENOSPC;
382
383 entry = io_ctl->cur;
384 entry->offset = cpu_to_le64(offset);
385 entry->bytes = cpu_to_le64(bytes);
386 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
387 BTRFS_FREE_SPACE_EXTENT;
388 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
389 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
390
391 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
392 return 0;
393
394 /*
395 * index == 1 means the current page is 0, we need to generate a bogus
396 * crc for older kernels.
397 */
398 if (io_ctl->index == 1) {
399 u32 *tmp;
400 u32 crc = ~(u32)0;
401
402 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + sizeof(u64),
403 crc, PAGE_CACHE_SIZE - sizeof(u64));
404 btrfs_csum_final(crc, (char *)&crc);
405 crc++;
406 tmp = io_ctl->orig;
407 *tmp = crc;
408 }
409 io_ctl_unmap_page(io_ctl);
410
411 /* No more pages to map */
412 if (io_ctl->index >= io_ctl->num_pages)
413 return 0;
414
415 /* map the next page */
416 io_ctl_map_page(io_ctl, 1);
417 return 0;
418}
419
420static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
421{
422 if (!io_ctl->cur)
423 return -ENOSPC;
424
425 /*
426 * If we aren't at the start of the current page, unmap this one and
427 * map the next one if there is any left.
428 */
429 if (io_ctl->cur != io_ctl->orig) {
430 io_ctl_unmap_page(io_ctl);
431 if (io_ctl->index >= io_ctl->num_pages)
432 return -ENOSPC;
433 io_ctl_map_page(io_ctl, 0);
434 }
435
436 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
437 io_ctl_unmap_page(io_ctl);
438 if (io_ctl->index < io_ctl->num_pages)
439 io_ctl_map_page(io_ctl, 0);
440 return 0;
441}
442
443static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
444{
445 io_ctl_unmap_page(io_ctl);
446
447 while (io_ctl->index < io_ctl->num_pages) {
448 io_ctl_map_page(io_ctl, 1);
449 io_ctl_unmap_page(io_ctl);
450 }
451}
452
453static u8 io_ctl_read_entry(struct io_ctl *io_ctl,
454 struct btrfs_free_space *entry)
455{
456 struct btrfs_free_space_entry *e;
457 u8 type;
458
459 e = io_ctl->cur;
460 entry->offset = le64_to_cpu(e->offset);
461 entry->bytes = le64_to_cpu(e->bytes);
462 type = e->type;
463 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
464 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
465
466 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
467 return type;
468
469 io_ctl_unmap_page(io_ctl);
470
471 if (io_ctl->index >= io_ctl->num_pages)
472 return type;
473
474 io_ctl_map_page(io_ctl, 0);
475 return type;
476}
477
478static void io_ctl_read_bitmap(struct io_ctl *io_ctl,
479 struct btrfs_free_space *entry)
480{
481 BUG_ON(!io_ctl->cur);
482 if (io_ctl->cur != io_ctl->orig) {
483 io_ctl_unmap_page(io_ctl);
484 io_ctl_map_page(io_ctl, 0);
485 }
486 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
487 io_ctl_unmap_page(io_ctl);
488 if (io_ctl->index < io_ctl->num_pages)
489 io_ctl_map_page(io_ctl, 0);
490}
491
Li Zefan0414efa2011-04-20 10:20:14 +0800492int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
493 struct btrfs_free_space_ctl *ctl,
494 struct btrfs_path *path, u64 offset)
Josef Bacik9d66e232010-08-25 16:54:15 -0400495{
Josef Bacik9d66e232010-08-25 16:54:15 -0400496 struct btrfs_free_space_header *header;
497 struct extent_buffer *leaf;
Josef Bacika67509c2011-10-05 15:18:58 -0400498 struct io_ctl io_ctl;
Josef Bacik9d66e232010-08-25 16:54:15 -0400499 struct btrfs_key key;
Josef Bacika67509c2011-10-05 15:18:58 -0400500 struct btrfs_free_space *e, *n;
Josef Bacik9d66e232010-08-25 16:54:15 -0400501 struct list_head bitmaps;
502 u64 num_entries;
503 u64 num_bitmaps;
504 u64 generation;
Josef Bacika67509c2011-10-05 15:18:58 -0400505 u8 type;
Josef Bacikf6a39822011-06-06 10:50:35 -0400506 int ret = 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400507
508 INIT_LIST_HEAD(&bitmaps);
509
Josef Bacik9d66e232010-08-25 16:54:15 -0400510 /* Nothing in the space cache, goodbye */
Li Zefan0414efa2011-04-20 10:20:14 +0800511 if (!i_size_read(inode))
Josef Bacika67509c2011-10-05 15:18:58 -0400512 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400513
514 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800515 key.offset = offset;
Josef Bacik9d66e232010-08-25 16:54:15 -0400516 key.type = 0;
517
518 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Li Zefan0414efa2011-04-20 10:20:14 +0800519 if (ret < 0)
Josef Bacika67509c2011-10-05 15:18:58 -0400520 return 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800521 else if (ret > 0) {
Chris Mason945d8962011-05-22 12:33:42 -0400522 btrfs_release_path(path);
Josef Bacika67509c2011-10-05 15:18:58 -0400523 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400524 }
525
Li Zefan0414efa2011-04-20 10:20:14 +0800526 ret = -1;
527
Josef Bacik9d66e232010-08-25 16:54:15 -0400528 leaf = path->nodes[0];
529 header = btrfs_item_ptr(leaf, path->slots[0],
530 struct btrfs_free_space_header);
531 num_entries = btrfs_free_space_entries(leaf, header);
532 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
533 generation = btrfs_free_space_generation(leaf, header);
Chris Mason945d8962011-05-22 12:33:42 -0400534 btrfs_release_path(path);
Josef Bacik9d66e232010-08-25 16:54:15 -0400535
536 if (BTRFS_I(inode)->generation != generation) {
537 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
Li Zefan0414efa2011-04-20 10:20:14 +0800538 " not match free space cache generation (%llu)\n",
Josef Bacik9d66e232010-08-25 16:54:15 -0400539 (unsigned long long)BTRFS_I(inode)->generation,
Li Zefan0414efa2011-04-20 10:20:14 +0800540 (unsigned long long)generation);
Josef Bacika67509c2011-10-05 15:18:58 -0400541 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400542 }
543
544 if (!num_entries)
Josef Bacika67509c2011-10-05 15:18:58 -0400545 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400546
Josef Bacika67509c2011-10-05 15:18:58 -0400547 io_ctl_init(&io_ctl, inode, root);
Josef Bacik9d66e232010-08-25 16:54:15 -0400548 ret = readahead_cache(inode);
Li Zefan0414efa2011-04-20 10:20:14 +0800549 if (ret)
Josef Bacik9d66e232010-08-25 16:54:15 -0400550 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400551
Josef Bacika67509c2011-10-05 15:18:58 -0400552 ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
553 if (ret)
554 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400555
Josef Bacika67509c2011-10-05 15:18:58 -0400556 ret = io_ctl_check_generation(&io_ctl, generation);
557 if (ret)
558 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400559
Josef Bacika67509c2011-10-05 15:18:58 -0400560 while (num_entries) {
561 e = kmem_cache_zalloc(btrfs_free_space_cachep,
562 GFP_NOFS);
563 if (!e)
Josef Bacik9d66e232010-08-25 16:54:15 -0400564 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400565
Josef Bacika67509c2011-10-05 15:18:58 -0400566 type = io_ctl_read_entry(&io_ctl, e);
567 if (!e->bytes) {
568 kmem_cache_free(btrfs_free_space_cachep, e);
569 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400570 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400571
Josef Bacika67509c2011-10-05 15:18:58 -0400572 if (type == BTRFS_FREE_SPACE_EXTENT) {
573 spin_lock(&ctl->tree_lock);
574 ret = link_free_space(ctl, e);
575 spin_unlock(&ctl->tree_lock);
576 if (ret) {
577 printk(KERN_ERR "Duplicate entries in "
578 "free space cache, dumping\n");
Josef Bacikdc89e982011-01-28 17:05:48 -0500579 kmem_cache_free(btrfs_free_space_cachep, e);
Josef Bacik9d66e232010-08-25 16:54:15 -0400580 goto free_cache;
581 }
Josef Bacika67509c2011-10-05 15:18:58 -0400582 } else {
583 BUG_ON(!num_bitmaps);
584 num_bitmaps--;
585 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
586 if (!e->bitmap) {
587 kmem_cache_free(
588 btrfs_free_space_cachep, e);
589 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400590 }
Josef Bacika67509c2011-10-05 15:18:58 -0400591 spin_lock(&ctl->tree_lock);
592 ret = link_free_space(ctl, e);
593 ctl->total_bitmaps++;
594 ctl->op->recalc_thresholds(ctl);
595 spin_unlock(&ctl->tree_lock);
596 if (ret) {
597 printk(KERN_ERR "Duplicate entries in "
598 "free space cache, dumping\n");
599 kmem_cache_free(btrfs_free_space_cachep, e);
600 goto free_cache;
601 }
602 list_add_tail(&e->list, &bitmaps);
Josef Bacik9d66e232010-08-25 16:54:15 -0400603 }
604
Josef Bacika67509c2011-10-05 15:18:58 -0400605 num_entries--;
Josef Bacik9d66e232010-08-25 16:54:15 -0400606 }
607
Josef Bacika67509c2011-10-05 15:18:58 -0400608 /*
609 * We add the bitmaps at the end of the entries in order that
610 * the bitmap entries are added to the cache.
611 */
612 list_for_each_entry_safe(e, n, &bitmaps, list) {
613 list_del_init(&e->list);
614 io_ctl_read_bitmap(&io_ctl, e);
615 }
616
617 io_ctl_drop_pages(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400618 ret = 1;
619out:
Josef Bacika67509c2011-10-05 15:18:58 -0400620 io_ctl_free(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400621 return ret;
Josef Bacik9d66e232010-08-25 16:54:15 -0400622free_cache:
Josef Bacika67509c2011-10-05 15:18:58 -0400623 io_ctl_drop_pages(&io_ctl);
Li Zefan0414efa2011-04-20 10:20:14 +0800624 __btrfs_remove_free_space_cache(ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400625 goto out;
626}
627
Li Zefan0414efa2011-04-20 10:20:14 +0800628int load_free_space_cache(struct btrfs_fs_info *fs_info,
629 struct btrfs_block_group_cache *block_group)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400630{
Li Zefan34d52cb2011-03-29 13:46:06 +0800631 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Zefan0414efa2011-04-20 10:20:14 +0800632 struct btrfs_root *root = fs_info->tree_root;
633 struct inode *inode;
634 struct btrfs_path *path;
635 int ret;
636 bool matched;
637 u64 used = btrfs_block_group_used(&block_group->item);
638
639 /*
640 * If we're unmounting then just return, since this does a search on the
641 * normal root and not the commit root and we could deadlock.
642 */
David Sterba7841cb22011-05-31 18:07:27 +0200643 if (btrfs_fs_closing(fs_info))
Li Zefan0414efa2011-04-20 10:20:14 +0800644 return 0;
645
646 /*
647 * If this block group has been marked to be cleared for one reason or
648 * another then we can't trust the on disk cache, so just return.
649 */
650 spin_lock(&block_group->lock);
651 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
652 spin_unlock(&block_group->lock);
653 return 0;
654 }
655 spin_unlock(&block_group->lock);
656
657 path = btrfs_alloc_path();
658 if (!path)
659 return 0;
660
661 inode = lookup_free_space_inode(root, block_group, path);
662 if (IS_ERR(inode)) {
663 btrfs_free_path(path);
664 return 0;
665 }
666
667 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
668 path, block_group->key.objectid);
669 btrfs_free_path(path);
670 if (ret <= 0)
671 goto out;
672
673 spin_lock(&ctl->tree_lock);
674 matched = (ctl->free_space == (block_group->key.offset - used -
675 block_group->bytes_super));
676 spin_unlock(&ctl->tree_lock);
677
678 if (!matched) {
679 __btrfs_remove_free_space_cache(ctl);
680 printk(KERN_ERR "block group %llu has an wrong amount of free "
681 "space\n", block_group->key.objectid);
682 ret = -1;
683 }
684out:
685 if (ret < 0) {
686 /* This cache is bogus, make sure it gets cleared */
687 spin_lock(&block_group->lock);
688 block_group->disk_cache_state = BTRFS_DC_CLEAR;
689 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +0800690 ret = 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800691
692 printk(KERN_ERR "btrfs: failed to load free space cache "
693 "for block group %llu\n", block_group->key.objectid);
694 }
695
696 iput(inode);
697 return ret;
698}
699
Josef Bacikc09544e2011-08-30 10:19:10 -0400700/**
701 * __btrfs_write_out_cache - write out cached info to an inode
702 * @root - the root the inode belongs to
703 * @ctl - the free space cache we are going to write out
704 * @block_group - the block_group for this cache if it belongs to a block_group
705 * @trans - the trans handle
706 * @path - the path to use
707 * @offset - the offset for the key we'll insert
708 *
709 * This function writes out a free space cache struct to disk for quick recovery
710 * on mount. This will return 0 if it was successfull in writing the cache out,
711 * and -1 if it was not.
712 */
Li Zefan0414efa2011-04-20 10:20:14 +0800713int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
714 struct btrfs_free_space_ctl *ctl,
715 struct btrfs_block_group_cache *block_group,
716 struct btrfs_trans_handle *trans,
717 struct btrfs_path *path, u64 offset)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400718{
719 struct btrfs_free_space_header *header;
720 struct extent_buffer *leaf;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400721 struct rb_node *node;
722 struct list_head *pos, *n;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400723 struct extent_state *cached_state = NULL;
Josef Bacik43be2142011-04-01 14:55:00 +0000724 struct btrfs_free_cluster *cluster = NULL;
725 struct extent_io_tree *unpin = NULL;
Josef Bacika67509c2011-10-05 15:18:58 -0400726 struct io_ctl io_ctl;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400727 struct list_head bitmap_list;
728 struct btrfs_key key;
Josef Bacik43be2142011-04-01 14:55:00 +0000729 u64 start, end, len;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400730 int entries = 0;
731 int bitmaps = 0;
Josef Bacikc09544e2011-08-30 10:19:10 -0400732 int ret;
733 int err = -1;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400734
Josef Bacik0cb59c92010-07-02 12:14:14 -0400735 INIT_LIST_HEAD(&bitmap_list);
736
Li Zefan0414efa2011-04-20 10:20:14 +0800737 if (!i_size_read(inode))
738 return -1;
Josef Bacik2b209822010-12-03 13:17:53 -0500739
Josef Bacik0cb59c92010-07-02 12:14:14 -0400740 filemap_write_and_wait(inode->i_mapping);
741 btrfs_wait_ordered_range(inode, inode->i_size &
742 ~(root->sectorsize - 1), (u64)-1);
743
Josef Bacika67509c2011-10-05 15:18:58 -0400744 io_ctl_init(&io_ctl, inode, root);
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400745
Josef Bacik43be2142011-04-01 14:55:00 +0000746 /* Get the cluster for this block_group if it exists */
Li Zefan0414efa2011-04-20 10:20:14 +0800747 if (block_group && !list_empty(&block_group->cluster_list))
Josef Bacik43be2142011-04-01 14:55:00 +0000748 cluster = list_entry(block_group->cluster_list.next,
749 struct btrfs_free_cluster,
750 block_group_list);
751
752 /*
753 * We shouldn't have switched the pinned extents yet so this is the
754 * right one
755 */
756 unpin = root->fs_info->pinned_extents;
757
Josef Bacika67509c2011-10-05 15:18:58 -0400758 /* Lock all pages first so we can lock the extent safely. */
759 io_ctl_prepare_pages(&io_ctl, inode, 0);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400760
Josef Bacik0cb59c92010-07-02 12:14:14 -0400761 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
762 0, &cached_state, GFP_NOFS);
763
Josef Bacik43be2142011-04-01 14:55:00 +0000764 /*
765 * When searching for pinned extents, we need to start at our start
766 * offset.
767 */
Li Zefan0414efa2011-04-20 10:20:14 +0800768 if (block_group)
769 start = block_group->key.objectid;
Josef Bacik43be2142011-04-01 14:55:00 +0000770
Josef Bacikf75b1302011-10-05 10:00:18 -0400771 node = rb_first(&ctl->free_space_offset);
772 if (!node && cluster) {
773 node = rb_first(&cluster->root);
774 cluster = NULL;
775 }
776
Josef Bacika67509c2011-10-05 15:18:58 -0400777 io_ctl_set_generation(&io_ctl, trans->transid);
778
Josef Bacik0cb59c92010-07-02 12:14:14 -0400779 /* Write out the extent entries */
Josef Bacika67509c2011-10-05 15:18:58 -0400780 while (node) {
781 struct btrfs_free_space *e;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400782
Josef Bacika67509c2011-10-05 15:18:58 -0400783 e = rb_entry(node, struct btrfs_free_space, offset_index);
784 entries++;
Josef Bacik43be2142011-04-01 14:55:00 +0000785
Josef Bacika67509c2011-10-05 15:18:58 -0400786 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
787 e->bitmap);
788 if (ret)
789 goto out_nospc;
790
791 if (e->bitmap) {
792 list_add_tail(&e->list, &bitmap_list);
793 bitmaps++;
794 }
795 node = rb_next(node);
796 if (!node && cluster) {
797 node = rb_first(&cluster->root);
798 cluster = NULL;
799 }
800 }
801
802 /*
803 * We want to add any pinned extents to our free space cache
804 * so we don't leak the space
805 */
806 while (block_group && (start < block_group->key.objectid +
807 block_group->key.offset)) {
808 ret = find_first_extent_bit(unpin, start, &start, &end,
809 EXTENT_DIRTY);
810 if (ret) {
811 ret = 0;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400812 break;
813 }
814
Josef Bacika67509c2011-10-05 15:18:58 -0400815 /* This pinned extent is out of our range */
816 if (start >= block_group->key.objectid +
817 block_group->key.offset)
818 break;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400819
Josef Bacika67509c2011-10-05 15:18:58 -0400820 len = block_group->key.objectid +
821 block_group->key.offset - start;
822 len = min(len, end + 1 - start);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400823
Josef Bacika67509c2011-10-05 15:18:58 -0400824 entries++;
825 ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
826 if (ret)
827 goto out_nospc;
Josef Bacik2f356122011-06-10 15:31:13 -0400828
Josef Bacika67509c2011-10-05 15:18:58 -0400829 start = end + 1;
830 }
Josef Bacik0cb59c92010-07-02 12:14:14 -0400831
832 /* Write out the bitmaps */
833 list_for_each_safe(pos, n, &bitmap_list) {
Josef Bacik0cb59c92010-07-02 12:14:14 -0400834 struct btrfs_free_space *entry =
835 list_entry(pos, struct btrfs_free_space, list);
836
Josef Bacika67509c2011-10-05 15:18:58 -0400837 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
838 if (ret)
839 goto out_nospc;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400840 list_del_init(&entry->list);
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400841 }
842
Josef Bacik0cb59c92010-07-02 12:14:14 -0400843 /* Zero out the rest of the pages just to make sure */
Josef Bacika67509c2011-10-05 15:18:58 -0400844 io_ctl_zero_remaining_pages(&io_ctl);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400845
Josef Bacika67509c2011-10-05 15:18:58 -0400846 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
847 0, i_size_read(inode), &cached_state);
848 io_ctl_drop_pages(&io_ctl);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400849 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
850 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
851
Josef Bacikc09544e2011-08-30 10:19:10 -0400852 if (ret)
Josef Bacik2f356122011-06-10 15:31:13 -0400853 goto out;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400854
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400855
Josef Bacik549b4fd2011-10-05 16:33:53 -0400856 ret = filemap_write_and_wait(inode->i_mapping);
857 if (ret)
858 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400859
860 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800861 key.offset = offset;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400862 key.type = 0;
863
Josef Bacika9b5fcd2011-08-19 12:06:12 -0400864 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400865 if (ret < 0) {
Josef Bacika67509c2011-10-05 15:18:58 -0400866 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
Josef Bacik0cb59c92010-07-02 12:14:14 -0400867 EXTENT_DIRTY | EXTENT_DELALLOC |
868 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
Josef Bacik2f356122011-06-10 15:31:13 -0400869 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400870 }
871 leaf = path->nodes[0];
872 if (ret > 0) {
873 struct btrfs_key found_key;
874 BUG_ON(!path->slots[0]);
875 path->slots[0]--;
876 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
877 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
Li Zefan0414efa2011-04-20 10:20:14 +0800878 found_key.offset != offset) {
Josef Bacika67509c2011-10-05 15:18:58 -0400879 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
880 inode->i_size - 1,
Josef Bacik0cb59c92010-07-02 12:14:14 -0400881 EXTENT_DIRTY | EXTENT_DELALLOC |
882 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
883 GFP_NOFS);
David Sterbab3b4aa72011-04-21 01:20:15 +0200884 btrfs_release_path(path);
Josef Bacik2f356122011-06-10 15:31:13 -0400885 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400886 }
887 }
Josef Bacik549b4fd2011-10-05 16:33:53 -0400888
889 BTRFS_I(inode)->generation = trans->transid;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400890 header = btrfs_item_ptr(leaf, path->slots[0],
891 struct btrfs_free_space_header);
892 btrfs_set_free_space_entries(leaf, header, entries);
893 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
894 btrfs_set_free_space_generation(leaf, header, trans->transid);
895 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200896 btrfs_release_path(path);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400897
Josef Bacikc09544e2011-08-30 10:19:10 -0400898 err = 0;
Josef Bacik2f356122011-06-10 15:31:13 -0400899out:
Josef Bacika67509c2011-10-05 15:18:58 -0400900 io_ctl_free(&io_ctl);
Josef Bacikc09544e2011-08-30 10:19:10 -0400901 if (err) {
Josef Bacika67509c2011-10-05 15:18:58 -0400902 invalidate_inode_pages2(inode->i_mapping);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400903 BTRFS_I(inode)->generation = 0;
904 }
Josef Bacik0cb59c92010-07-02 12:14:14 -0400905 btrfs_update_inode(trans, root, inode);
Josef Bacikc09544e2011-08-30 10:19:10 -0400906 return err;
Josef Bacika67509c2011-10-05 15:18:58 -0400907
908out_nospc:
909 list_for_each_safe(pos, n, &bitmap_list) {
910 struct btrfs_free_space *entry =
911 list_entry(pos, struct btrfs_free_space, list);
912 list_del_init(&entry->list);
913 }
914 io_ctl_drop_pages(&io_ctl);
915 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
916 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
917 goto out;
Li Zefan0414efa2011-04-20 10:20:14 +0800918}
919
920int btrfs_write_out_cache(struct btrfs_root *root,
921 struct btrfs_trans_handle *trans,
922 struct btrfs_block_group_cache *block_group,
923 struct btrfs_path *path)
924{
925 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
926 struct inode *inode;
927 int ret = 0;
928
929 root = root->fs_info->tree_root;
930
931 spin_lock(&block_group->lock);
932 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
933 spin_unlock(&block_group->lock);
934 return 0;
935 }
936 spin_unlock(&block_group->lock);
937
938 inode = lookup_free_space_inode(root, block_group, path);
939 if (IS_ERR(inode))
940 return 0;
941
942 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
943 path, block_group->key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -0400944 if (ret) {
945 btrfs_delalloc_release_metadata(inode, inode->i_size);
Li Zefan0414efa2011-04-20 10:20:14 +0800946 spin_lock(&block_group->lock);
947 block_group->disk_cache_state = BTRFS_DC_ERROR;
948 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +0800949 ret = 0;
Josef Bacikc09544e2011-08-30 10:19:10 -0400950#ifdef DEBUG
Li Zefan0414efa2011-04-20 10:20:14 +0800951 printk(KERN_ERR "btrfs: failed to write free space cace "
952 "for block group %llu\n", block_group->key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -0400953#endif
Li Zefan0414efa2011-04-20 10:20:14 +0800954 }
955
Josef Bacik0cb59c92010-07-02 12:14:14 -0400956 iput(inode);
957 return ret;
958}
959
Li Zefan34d52cb2011-03-29 13:46:06 +0800960static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
Josef Bacik96303082009-07-13 21:29:25 -0400961 u64 offset)
962{
963 BUG_ON(offset < bitmap_start);
964 offset -= bitmap_start;
Li Zefan34d52cb2011-03-29 13:46:06 +0800965 return (unsigned long)(div_u64(offset, unit));
Josef Bacik96303082009-07-13 21:29:25 -0400966}
967
Li Zefan34d52cb2011-03-29 13:46:06 +0800968static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
Josef Bacik96303082009-07-13 21:29:25 -0400969{
Li Zefan34d52cb2011-03-29 13:46:06 +0800970 return (unsigned long)(div_u64(bytes, unit));
Josef Bacik96303082009-07-13 21:29:25 -0400971}
972
Li Zefan34d52cb2011-03-29 13:46:06 +0800973static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -0400974 u64 offset)
975{
976 u64 bitmap_start;
977 u64 bytes_per_bitmap;
978
Li Zefan34d52cb2011-03-29 13:46:06 +0800979 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
980 bitmap_start = offset - ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -0400981 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
982 bitmap_start *= bytes_per_bitmap;
Li Zefan34d52cb2011-03-29 13:46:06 +0800983 bitmap_start += ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -0400984
985 return bitmap_start;
986}
Josef Bacik0f9dd462008-09-23 13:14:11 -0400987
988static int tree_insert_offset(struct rb_root *root, u64 offset,
Josef Bacik96303082009-07-13 21:29:25 -0400989 struct rb_node *node, int bitmap)
Josef Bacik0f9dd462008-09-23 13:14:11 -0400990{
991 struct rb_node **p = &root->rb_node;
992 struct rb_node *parent = NULL;
993 struct btrfs_free_space *info;
994
995 while (*p) {
996 parent = *p;
997 info = rb_entry(parent, struct btrfs_free_space, offset_index);
998
Josef Bacik96303082009-07-13 21:29:25 -0400999 if (offset < info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001000 p = &(*p)->rb_left;
Josef Bacik96303082009-07-13 21:29:25 -04001001 } else if (offset > info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001002 p = &(*p)->rb_right;
Josef Bacik96303082009-07-13 21:29:25 -04001003 } else {
1004 /*
1005 * we could have a bitmap entry and an extent entry
1006 * share the same offset. If this is the case, we want
1007 * the extent entry to always be found first if we do a
1008 * linear search through the tree, since we want to have
1009 * the quickest allocation time, and allocating from an
1010 * extent is faster than allocating from a bitmap. So
1011 * if we're inserting a bitmap and we find an entry at
1012 * this offset, we want to go right, or after this entry
1013 * logically. If we are inserting an extent and we've
1014 * found a bitmap, we want to go left, or before
1015 * logically.
1016 */
1017 if (bitmap) {
Josef Bacik207dde82011-05-13 14:49:23 -04001018 if (info->bitmap) {
1019 WARN_ON_ONCE(1);
1020 return -EEXIST;
1021 }
Josef Bacik96303082009-07-13 21:29:25 -04001022 p = &(*p)->rb_right;
1023 } else {
Josef Bacik207dde82011-05-13 14:49:23 -04001024 if (!info->bitmap) {
1025 WARN_ON_ONCE(1);
1026 return -EEXIST;
1027 }
Josef Bacik96303082009-07-13 21:29:25 -04001028 p = &(*p)->rb_left;
1029 }
1030 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001031 }
1032
1033 rb_link_node(node, parent, p);
1034 rb_insert_color(node, root);
1035
1036 return 0;
1037}
1038
1039/*
Josef Bacik70cb0742009-04-03 10:14:19 -04001040 * searches the tree for the given offset.
1041 *
Josef Bacik96303082009-07-13 21:29:25 -04001042 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1043 * want a section that has at least bytes size and comes at or after the given
1044 * offset.
Josef Bacik0f9dd462008-09-23 13:14:11 -04001045 */
Josef Bacik96303082009-07-13 21:29:25 -04001046static struct btrfs_free_space *
Li Zefan34d52cb2011-03-29 13:46:06 +08001047tree_search_offset(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001048 u64 offset, int bitmap_only, int fuzzy)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001049{
Li Zefan34d52cb2011-03-29 13:46:06 +08001050 struct rb_node *n = ctl->free_space_offset.rb_node;
Josef Bacik96303082009-07-13 21:29:25 -04001051 struct btrfs_free_space *entry, *prev = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001052
Josef Bacik96303082009-07-13 21:29:25 -04001053 /* find entry that is closest to the 'offset' */
1054 while (1) {
1055 if (!n) {
1056 entry = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001057 break;
1058 }
Josef Bacik96303082009-07-13 21:29:25 -04001059
1060 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1061 prev = entry;
1062
1063 if (offset < entry->offset)
1064 n = n->rb_left;
1065 else if (offset > entry->offset)
1066 n = n->rb_right;
1067 else
1068 break;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001069 }
1070
Josef Bacik96303082009-07-13 21:29:25 -04001071 if (bitmap_only) {
1072 if (!entry)
1073 return NULL;
1074 if (entry->bitmap)
1075 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001076
Josef Bacik96303082009-07-13 21:29:25 -04001077 /*
1078 * bitmap entry and extent entry may share same offset,
1079 * in that case, bitmap entry comes after extent entry.
1080 */
1081 n = rb_next(n);
1082 if (!n)
1083 return NULL;
1084 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1085 if (entry->offset != offset)
1086 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001087
Josef Bacik96303082009-07-13 21:29:25 -04001088 WARN_ON(!entry->bitmap);
1089 return entry;
1090 } else if (entry) {
1091 if (entry->bitmap) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001092 /*
Josef Bacik96303082009-07-13 21:29:25 -04001093 * if previous extent entry covers the offset,
1094 * we should return it instead of the bitmap entry
Josef Bacik0f9dd462008-09-23 13:14:11 -04001095 */
Josef Bacik96303082009-07-13 21:29:25 -04001096 n = &entry->offset_index;
1097 while (1) {
1098 n = rb_prev(n);
1099 if (!n)
1100 break;
1101 prev = rb_entry(n, struct btrfs_free_space,
1102 offset_index);
1103 if (!prev->bitmap) {
1104 if (prev->offset + prev->bytes > offset)
1105 entry = prev;
1106 break;
1107 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001108 }
Josef Bacik96303082009-07-13 21:29:25 -04001109 }
1110 return entry;
1111 }
1112
1113 if (!prev)
1114 return NULL;
1115
1116 /* find last entry before the 'offset' */
1117 entry = prev;
1118 if (entry->offset > offset) {
1119 n = rb_prev(&entry->offset_index);
1120 if (n) {
1121 entry = rb_entry(n, struct btrfs_free_space,
1122 offset_index);
1123 BUG_ON(entry->offset > offset);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001124 } else {
Josef Bacik96303082009-07-13 21:29:25 -04001125 if (fuzzy)
1126 return entry;
1127 else
1128 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001129 }
1130 }
1131
Josef Bacik96303082009-07-13 21:29:25 -04001132 if (entry->bitmap) {
1133 n = &entry->offset_index;
1134 while (1) {
1135 n = rb_prev(n);
1136 if (!n)
1137 break;
1138 prev = rb_entry(n, struct btrfs_free_space,
1139 offset_index);
1140 if (!prev->bitmap) {
1141 if (prev->offset + prev->bytes > offset)
1142 return prev;
1143 break;
1144 }
1145 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001146 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001147 return entry;
1148 } else if (entry->offset + entry->bytes > offset)
1149 return entry;
1150
1151 if (!fuzzy)
1152 return NULL;
1153
1154 while (1) {
1155 if (entry->bitmap) {
1156 if (entry->offset + BITS_PER_BITMAP *
Li Zefan34d52cb2011-03-29 13:46:06 +08001157 ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001158 break;
1159 } else {
1160 if (entry->offset + entry->bytes > offset)
1161 break;
1162 }
1163
1164 n = rb_next(&entry->offset_index);
1165 if (!n)
1166 return NULL;
1167 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1168 }
1169 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001170}
1171
Li Zefanf333adb2010-11-09 14:57:39 +08001172static inline void
Li Zefan34d52cb2011-03-29 13:46:06 +08001173__unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001174 struct btrfs_free_space *info)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001175{
Li Zefan34d52cb2011-03-29 13:46:06 +08001176 rb_erase(&info->offset_index, &ctl->free_space_offset);
1177 ctl->free_extents--;
Li Zefanf333adb2010-11-09 14:57:39 +08001178}
1179
Li Zefan34d52cb2011-03-29 13:46:06 +08001180static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001181 struct btrfs_free_space *info)
1182{
Li Zefan34d52cb2011-03-29 13:46:06 +08001183 __unlink_free_space(ctl, info);
1184 ctl->free_space -= info->bytes;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001185}
1186
Li Zefan34d52cb2011-03-29 13:46:06 +08001187static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0f9dd462008-09-23 13:14:11 -04001188 struct btrfs_free_space *info)
1189{
1190 int ret = 0;
1191
Josef Bacik96303082009-07-13 21:29:25 -04001192 BUG_ON(!info->bitmap && !info->bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001193 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001194 &info->offset_index, (info->bitmap != NULL));
Josef Bacik0f9dd462008-09-23 13:14:11 -04001195 if (ret)
1196 return ret;
1197
Li Zefan34d52cb2011-03-29 13:46:06 +08001198 ctl->free_space += info->bytes;
1199 ctl->free_extents++;
Josef Bacik96303082009-07-13 21:29:25 -04001200 return ret;
1201}
1202
Li Zefan34d52cb2011-03-29 13:46:06 +08001203static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
Josef Bacik96303082009-07-13 21:29:25 -04001204{
Li Zefan34d52cb2011-03-29 13:46:06 +08001205 struct btrfs_block_group_cache *block_group = ctl->private;
Josef Bacik25891f72009-09-11 16:11:20 -04001206 u64 max_bytes;
1207 u64 bitmap_bytes;
1208 u64 extent_bytes;
Li Zefan8eb2d822010-11-09 14:48:01 +08001209 u64 size = block_group->key.offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08001210 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1211 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1212
1213 BUG_ON(ctl->total_bitmaps > max_bitmaps);
Josef Bacik96303082009-07-13 21:29:25 -04001214
1215 /*
1216 * The goal is to keep the total amount of memory used per 1gb of space
1217 * at or below 32k, so we need to adjust how much memory we allow to be
1218 * used by extent based free space tracking
1219 */
Li Zefan8eb2d822010-11-09 14:48:01 +08001220 if (size < 1024 * 1024 * 1024)
1221 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1222 else
1223 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1224 div64_u64(size, 1024 * 1024 * 1024);
Josef Bacik96303082009-07-13 21:29:25 -04001225
Josef Bacik25891f72009-09-11 16:11:20 -04001226 /*
1227 * we want to account for 1 more bitmap than what we have so we can make
1228 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1229 * we add more bitmaps.
1230 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001231 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
Josef Bacik96303082009-07-13 21:29:25 -04001232
Josef Bacik25891f72009-09-11 16:11:20 -04001233 if (bitmap_bytes >= max_bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001234 ctl->extents_thresh = 0;
Josef Bacik25891f72009-09-11 16:11:20 -04001235 return;
Josef Bacik96303082009-07-13 21:29:25 -04001236 }
Josef Bacik25891f72009-09-11 16:11:20 -04001237
1238 /*
1239 * we want the extent entry threshold to always be at most 1/2 the maxw
1240 * bytes we can have, or whatever is less than that.
1241 */
1242 extent_bytes = max_bytes - bitmap_bytes;
1243 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1244
Li Zefan34d52cb2011-03-29 13:46:06 +08001245 ctl->extents_thresh =
Josef Bacik25891f72009-09-11 16:11:20 -04001246 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
Josef Bacik96303082009-07-13 21:29:25 -04001247}
1248
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001249static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1250 struct btrfs_free_space *info,
1251 u64 offset, u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001252{
Li Zefanf38b6e72011-03-14 13:40:51 +08001253 unsigned long start, count;
Josef Bacik96303082009-07-13 21:29:25 -04001254
Li Zefan34d52cb2011-03-29 13:46:06 +08001255 start = offset_to_bit(info->offset, ctl->unit, offset);
1256 count = bytes_to_bits(bytes, ctl->unit);
Li Zefanf38b6e72011-03-14 13:40:51 +08001257 BUG_ON(start + count > BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001258
Li Zefanf38b6e72011-03-14 13:40:51 +08001259 bitmap_clear(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001260
1261 info->bytes -= bytes;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001262}
1263
1264static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1265 struct btrfs_free_space *info, u64 offset,
1266 u64 bytes)
1267{
1268 __bitmap_clear_bits(ctl, info, offset, bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001269 ctl->free_space -= bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001270}
1271
Li Zefan34d52cb2011-03-29 13:46:06 +08001272static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
Josef Bacik817d52f2009-07-13 21:29:25 -04001273 struct btrfs_free_space *info, u64 offset,
1274 u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001275{
Li Zefanf38b6e72011-03-14 13:40:51 +08001276 unsigned long start, count;
Josef Bacik96303082009-07-13 21:29:25 -04001277
Li Zefan34d52cb2011-03-29 13:46:06 +08001278 start = offset_to_bit(info->offset, ctl->unit, offset);
1279 count = bytes_to_bits(bytes, ctl->unit);
Li Zefanf38b6e72011-03-14 13:40:51 +08001280 BUG_ON(start + count > BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001281
Li Zefanf38b6e72011-03-14 13:40:51 +08001282 bitmap_set(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001283
1284 info->bytes += bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001285 ctl->free_space += bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001286}
1287
Li Zefan34d52cb2011-03-29 13:46:06 +08001288static int search_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001289 struct btrfs_free_space *bitmap_info, u64 *offset,
1290 u64 *bytes)
1291{
1292 unsigned long found_bits = 0;
1293 unsigned long bits, i;
1294 unsigned long next_zero;
1295
Li Zefan34d52cb2011-03-29 13:46:06 +08001296 i = offset_to_bit(bitmap_info->offset, ctl->unit,
Josef Bacik96303082009-07-13 21:29:25 -04001297 max_t(u64, *offset, bitmap_info->offset));
Li Zefan34d52cb2011-03-29 13:46:06 +08001298 bits = bytes_to_bits(*bytes, ctl->unit);
Josef Bacik96303082009-07-13 21:29:25 -04001299
1300 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1301 i < BITS_PER_BITMAP;
1302 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1303 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1304 BITS_PER_BITMAP, i);
1305 if ((next_zero - i) >= bits) {
1306 found_bits = next_zero - i;
1307 break;
1308 }
1309 i = next_zero;
1310 }
1311
1312 if (found_bits) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001313 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1314 *bytes = (u64)(found_bits) * ctl->unit;
Josef Bacik96303082009-07-13 21:29:25 -04001315 return 0;
1316 }
1317
1318 return -1;
1319}
1320
Li Zefan34d52cb2011-03-29 13:46:06 +08001321static struct btrfs_free_space *
1322find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001323{
1324 struct btrfs_free_space *entry;
1325 struct rb_node *node;
1326 int ret;
1327
Li Zefan34d52cb2011-03-29 13:46:06 +08001328 if (!ctl->free_space_offset.rb_node)
Josef Bacik96303082009-07-13 21:29:25 -04001329 return NULL;
1330
Li Zefan34d52cb2011-03-29 13:46:06 +08001331 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
Josef Bacik96303082009-07-13 21:29:25 -04001332 if (!entry)
1333 return NULL;
1334
1335 for (node = &entry->offset_index; node; node = rb_next(node)) {
1336 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1337 if (entry->bytes < *bytes)
1338 continue;
1339
1340 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001341 ret = search_bitmap(ctl, entry, offset, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001342 if (!ret)
1343 return entry;
1344 continue;
1345 }
1346
1347 *offset = entry->offset;
1348 *bytes = entry->bytes;
1349 return entry;
1350 }
1351
1352 return NULL;
1353}
1354
Li Zefan34d52cb2011-03-29 13:46:06 +08001355static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001356 struct btrfs_free_space *info, u64 offset)
1357{
Li Zefan34d52cb2011-03-29 13:46:06 +08001358 info->offset = offset_to_bitmap(ctl, offset);
Josef Bacikf019f422009-09-11 16:11:20 -04001359 info->bytes = 0;
Li Zefan34d52cb2011-03-29 13:46:06 +08001360 link_free_space(ctl, info);
1361 ctl->total_bitmaps++;
Josef Bacik96303082009-07-13 21:29:25 -04001362
Li Zefan34d52cb2011-03-29 13:46:06 +08001363 ctl->op->recalc_thresholds(ctl);
Josef Bacik96303082009-07-13 21:29:25 -04001364}
1365
Li Zefan34d52cb2011-03-29 13:46:06 +08001366static void free_bitmap(struct btrfs_free_space_ctl *ctl,
Li Zefanedf6e2d2010-11-09 14:50:07 +08001367 struct btrfs_free_space *bitmap_info)
1368{
Li Zefan34d52cb2011-03-29 13:46:06 +08001369 unlink_free_space(ctl, bitmap_info);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001370 kfree(bitmap_info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001371 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
Li Zefan34d52cb2011-03-29 13:46:06 +08001372 ctl->total_bitmaps--;
1373 ctl->op->recalc_thresholds(ctl);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001374}
1375
Li Zefan34d52cb2011-03-29 13:46:06 +08001376static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001377 struct btrfs_free_space *bitmap_info,
1378 u64 *offset, u64 *bytes)
1379{
1380 u64 end;
Josef Bacik6606bb92009-07-31 11:03:58 -04001381 u64 search_start, search_bytes;
1382 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001383
1384again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001385 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001386
Josef Bacik6606bb92009-07-31 11:03:58 -04001387 /*
1388 * XXX - this can go away after a few releases.
1389 *
1390 * since the only user of btrfs_remove_free_space is the tree logging
1391 * stuff, and the only way to test that is under crash conditions, we
1392 * want to have this debug stuff here just in case somethings not
1393 * working. Search the bitmap for the space we are trying to use to
1394 * make sure its actually there. If its not there then we need to stop
1395 * because something has gone wrong.
1396 */
1397 search_start = *offset;
1398 search_bytes = *bytes;
Josef Bacik13dbc082011-02-03 02:39:52 +00001399 search_bytes = min(search_bytes, end - search_start + 1);
Li Zefan34d52cb2011-03-29 13:46:06 +08001400 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
Josef Bacik6606bb92009-07-31 11:03:58 -04001401 BUG_ON(ret < 0 || search_start != *offset);
1402
Josef Bacik96303082009-07-13 21:29:25 -04001403 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001404 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
Josef Bacik96303082009-07-13 21:29:25 -04001405 *bytes -= end - *offset + 1;
1406 *offset = end + 1;
1407 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001408 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001409 *bytes = 0;
1410 }
1411
1412 if (*bytes) {
Josef Bacik6606bb92009-07-31 11:03:58 -04001413 struct rb_node *next = rb_next(&bitmap_info->offset_index);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001414 if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001415 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04001416
Josef Bacik6606bb92009-07-31 11:03:58 -04001417 /*
1418 * no entry after this bitmap, but we still have bytes to
1419 * remove, so something has gone wrong.
1420 */
1421 if (!next)
Josef Bacik96303082009-07-13 21:29:25 -04001422 return -EINVAL;
1423
Josef Bacik6606bb92009-07-31 11:03:58 -04001424 bitmap_info = rb_entry(next, struct btrfs_free_space,
1425 offset_index);
1426
1427 /*
1428 * if the next entry isn't a bitmap we need to return to let the
1429 * extent stuff do its work.
1430 */
Josef Bacik96303082009-07-13 21:29:25 -04001431 if (!bitmap_info->bitmap)
1432 return -EAGAIN;
1433
Josef Bacik6606bb92009-07-31 11:03:58 -04001434 /*
1435 * Ok the next item is a bitmap, but it may not actually hold
1436 * the information for the rest of this free space stuff, so
1437 * look for it, and if we don't find it return so we can try
1438 * everything over again.
1439 */
1440 search_start = *offset;
1441 search_bytes = *bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001442 ret = search_bitmap(ctl, bitmap_info, &search_start,
Josef Bacik6606bb92009-07-31 11:03:58 -04001443 &search_bytes);
1444 if (ret < 0 || search_start != *offset)
1445 return -EAGAIN;
1446
Josef Bacik96303082009-07-13 21:29:25 -04001447 goto again;
Li Zefanedf6e2d2010-11-09 14:50:07 +08001448 } else if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001449 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04001450
1451 return 0;
1452}
1453
Josef Bacik2cdc3422011-05-27 14:07:49 -04001454static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1455 struct btrfs_free_space *info, u64 offset,
1456 u64 bytes)
1457{
1458 u64 bytes_to_set = 0;
1459 u64 end;
1460
1461 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1462
1463 bytes_to_set = min(end - offset, bytes);
1464
1465 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1466
1467 return bytes_to_set;
1468
1469}
1470
Li Zefan34d52cb2011-03-29 13:46:06 +08001471static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1472 struct btrfs_free_space *info)
Josef Bacik96303082009-07-13 21:29:25 -04001473{
Li Zefan34d52cb2011-03-29 13:46:06 +08001474 struct btrfs_block_group_cache *block_group = ctl->private;
Josef Bacik96303082009-07-13 21:29:25 -04001475
1476 /*
1477 * If we are below the extents threshold then we can add this as an
1478 * extent, and don't have to deal with the bitmap
1479 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001480 if (ctl->free_extents < ctl->extents_thresh) {
Josef Bacik32cb0842011-03-18 16:16:21 -04001481 /*
1482 * If this block group has some small extents we don't want to
1483 * use up all of our free slots in the cache with them, we want
1484 * to reserve them to larger extents, however if we have plent
1485 * of cache left then go ahead an dadd them, no sense in adding
1486 * the overhead of a bitmap if we don't have to.
1487 */
1488 if (info->bytes <= block_group->sectorsize * 4) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001489 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1490 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04001491 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001492 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04001493 }
1494 }
Josef Bacik96303082009-07-13 21:29:25 -04001495
1496 /*
1497 * some block groups are so tiny they can't be enveloped by a bitmap, so
1498 * don't even bother to create a bitmap for this
1499 */
1500 if (BITS_PER_BITMAP * block_group->sectorsize >
1501 block_group->key.offset)
Li Zefan34d52cb2011-03-29 13:46:06 +08001502 return false;
1503
1504 return true;
1505}
1506
Josef Bacik2cdc3422011-05-27 14:07:49 -04001507static struct btrfs_free_space_op free_space_op = {
1508 .recalc_thresholds = recalculate_thresholds,
1509 .use_bitmap = use_bitmap,
1510};
1511
Li Zefan34d52cb2011-03-29 13:46:06 +08001512static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1513 struct btrfs_free_space *info)
1514{
1515 struct btrfs_free_space *bitmap_info;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001516 struct btrfs_block_group_cache *block_group = NULL;
Li Zefan34d52cb2011-03-29 13:46:06 +08001517 int added = 0;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001518 u64 bytes, offset, bytes_added;
Li Zefan34d52cb2011-03-29 13:46:06 +08001519 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001520
1521 bytes = info->bytes;
1522 offset = info->offset;
1523
Li Zefan34d52cb2011-03-29 13:46:06 +08001524 if (!ctl->op->use_bitmap(ctl, info))
1525 return 0;
1526
Josef Bacik2cdc3422011-05-27 14:07:49 -04001527 if (ctl->op == &free_space_op)
1528 block_group = ctl->private;
Chris Mason38e87882011-06-10 16:36:57 -04001529again:
Josef Bacik2cdc3422011-05-27 14:07:49 -04001530 /*
1531 * Since we link bitmaps right into the cluster we need to see if we
1532 * have a cluster here, and if so and it has our bitmap we need to add
1533 * the free space to that bitmap.
1534 */
1535 if (block_group && !list_empty(&block_group->cluster_list)) {
1536 struct btrfs_free_cluster *cluster;
1537 struct rb_node *node;
1538 struct btrfs_free_space *entry;
1539
1540 cluster = list_entry(block_group->cluster_list.next,
1541 struct btrfs_free_cluster,
1542 block_group_list);
1543 spin_lock(&cluster->lock);
1544 node = rb_first(&cluster->root);
1545 if (!node) {
1546 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04001547 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001548 }
1549
1550 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1551 if (!entry->bitmap) {
1552 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04001553 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001554 }
1555
1556 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1557 bytes_added = add_bytes_to_bitmap(ctl, entry,
1558 offset, bytes);
1559 bytes -= bytes_added;
1560 offset += bytes_added;
1561 }
1562 spin_unlock(&cluster->lock);
1563 if (!bytes) {
1564 ret = 1;
1565 goto out;
1566 }
1567 }
Chris Mason38e87882011-06-10 16:36:57 -04001568
1569no_cluster_bitmap:
Li Zefan34d52cb2011-03-29 13:46:06 +08001570 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik96303082009-07-13 21:29:25 -04001571 1, 0);
1572 if (!bitmap_info) {
1573 BUG_ON(added);
1574 goto new_bitmap;
1575 }
1576
Josef Bacik2cdc3422011-05-27 14:07:49 -04001577 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1578 bytes -= bytes_added;
1579 offset += bytes_added;
1580 added = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001581
1582 if (!bytes) {
1583 ret = 1;
1584 goto out;
1585 } else
1586 goto again;
1587
1588new_bitmap:
1589 if (info && info->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001590 add_new_bitmap(ctl, info, offset);
Josef Bacik96303082009-07-13 21:29:25 -04001591 added = 1;
1592 info = NULL;
1593 goto again;
1594 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001595 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001596
1597 /* no pre-allocated info, allocate a new one */
1598 if (!info) {
Josef Bacikdc89e982011-01-28 17:05:48 -05001599 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1600 GFP_NOFS);
Josef Bacik96303082009-07-13 21:29:25 -04001601 if (!info) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001602 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001603 ret = -ENOMEM;
1604 goto out;
1605 }
1606 }
1607
1608 /* allocate the bitmap */
1609 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
Li Zefan34d52cb2011-03-29 13:46:06 +08001610 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001611 if (!info->bitmap) {
1612 ret = -ENOMEM;
1613 goto out;
1614 }
1615 goto again;
1616 }
1617
1618out:
1619 if (info) {
1620 if (info->bitmap)
1621 kfree(info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001622 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001623 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001624
1625 return ret;
1626}
1627
Chris Mason945d8962011-05-22 12:33:42 -04001628static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001629 struct btrfs_free_space *info, bool update_stat)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001630{
Li Zefan120d66e2010-11-09 14:56:50 +08001631 struct btrfs_free_space *left_info;
1632 struct btrfs_free_space *right_info;
1633 bool merged = false;
1634 u64 offset = info->offset;
1635 u64 bytes = info->bytes;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001636
Josef Bacik0f9dd462008-09-23 13:14:11 -04001637 /*
1638 * first we want to see if there is free space adjacent to the range we
1639 * are adding, if there is remove that struct and add a new one to
1640 * cover the entire range
1641 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001642 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001643 if (right_info && rb_prev(&right_info->offset_index))
1644 left_info = rb_entry(rb_prev(&right_info->offset_index),
1645 struct btrfs_free_space, offset_index);
1646 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001647 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001648
Josef Bacik96303082009-07-13 21:29:25 -04001649 if (right_info && !right_info->bitmap) {
Li Zefanf333adb2010-11-09 14:57:39 +08001650 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08001651 unlink_free_space(ctl, right_info);
Li Zefanf333adb2010-11-09 14:57:39 +08001652 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001653 __unlink_free_space(ctl, right_info);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001654 info->bytes += right_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05001655 kmem_cache_free(btrfs_free_space_cachep, right_info);
Li Zefan120d66e2010-11-09 14:56:50 +08001656 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001657 }
1658
Josef Bacik96303082009-07-13 21:29:25 -04001659 if (left_info && !left_info->bitmap &&
1660 left_info->offset + left_info->bytes == offset) {
Li Zefanf333adb2010-11-09 14:57:39 +08001661 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08001662 unlink_free_space(ctl, left_info);
Li Zefanf333adb2010-11-09 14:57:39 +08001663 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001664 __unlink_free_space(ctl, left_info);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001665 info->offset = left_info->offset;
1666 info->bytes += left_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05001667 kmem_cache_free(btrfs_free_space_cachep, left_info);
Li Zefan120d66e2010-11-09 14:56:50 +08001668 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001669 }
1670
Li Zefan120d66e2010-11-09 14:56:50 +08001671 return merged;
1672}
1673
Li Zefan581bb052011-04-20 10:06:11 +08001674int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1675 u64 offset, u64 bytes)
Li Zefan120d66e2010-11-09 14:56:50 +08001676{
1677 struct btrfs_free_space *info;
1678 int ret = 0;
1679
Josef Bacikdc89e982011-01-28 17:05:48 -05001680 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
Li Zefan120d66e2010-11-09 14:56:50 +08001681 if (!info)
1682 return -ENOMEM;
1683
1684 info->offset = offset;
1685 info->bytes = bytes;
1686
Li Zefan34d52cb2011-03-29 13:46:06 +08001687 spin_lock(&ctl->tree_lock);
Li Zefan120d66e2010-11-09 14:56:50 +08001688
Li Zefan34d52cb2011-03-29 13:46:06 +08001689 if (try_merge_free_space(ctl, info, true))
Li Zefan120d66e2010-11-09 14:56:50 +08001690 goto link;
1691
1692 /*
1693 * There was no extent directly to the left or right of this new
1694 * extent then we know we're going to have to allocate a new extent, so
1695 * before we do that see if we need to drop this into a bitmap
1696 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001697 ret = insert_into_bitmap(ctl, info);
Li Zefan120d66e2010-11-09 14:56:50 +08001698 if (ret < 0) {
1699 goto out;
1700 } else if (ret) {
1701 ret = 0;
1702 goto out;
1703 }
1704link:
Li Zefan34d52cb2011-03-29 13:46:06 +08001705 ret = link_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001706 if (ret)
Josef Bacikdc89e982011-01-28 17:05:48 -05001707 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001708out:
Li Zefan34d52cb2011-03-29 13:46:06 +08001709 spin_unlock(&ctl->tree_lock);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001710
Josef Bacik0f9dd462008-09-23 13:14:11 -04001711 if (ret) {
Josef Bacik96303082009-07-13 21:29:25 -04001712 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04001713 BUG_ON(ret == -EEXIST);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001714 }
1715
Josef Bacik0f9dd462008-09-23 13:14:11 -04001716 return ret;
1717}
1718
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001719int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1720 u64 offset, u64 bytes)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001721{
Li Zefan34d52cb2011-03-29 13:46:06 +08001722 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001723 struct btrfs_free_space *info;
Josef Bacik96303082009-07-13 21:29:25 -04001724 struct btrfs_free_space *next_info = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001725 int ret = 0;
1726
Li Zefan34d52cb2011-03-29 13:46:06 +08001727 spin_lock(&ctl->tree_lock);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001728
Josef Bacik96303082009-07-13 21:29:25 -04001729again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001730 info = tree_search_offset(ctl, offset, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001731 if (!info) {
Josef Bacik6606bb92009-07-31 11:03:58 -04001732 /*
1733 * oops didn't find an extent that matched the space we wanted
1734 * to remove, look for a bitmap instead
1735 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001736 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik6606bb92009-07-31 11:03:58 -04001737 1, 0);
1738 if (!info) {
1739 WARN_ON(1);
1740 goto out_lock;
1741 }
Josef Bacik96303082009-07-13 21:29:25 -04001742 }
1743
1744 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1745 u64 end;
1746 next_info = rb_entry(rb_next(&info->offset_index),
1747 struct btrfs_free_space,
1748 offset_index);
1749
1750 if (next_info->bitmap)
Li Zefan34d52cb2011-03-29 13:46:06 +08001751 end = next_info->offset +
1752 BITS_PER_BITMAP * ctl->unit - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001753 else
1754 end = next_info->offset + next_info->bytes;
1755
1756 if (next_info->bytes < bytes ||
1757 next_info->offset > offset || offset > end) {
1758 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1759 " trying to use %llu\n",
1760 (unsigned long long)info->offset,
1761 (unsigned long long)info->bytes,
1762 (unsigned long long)bytes);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001763 WARN_ON(1);
1764 ret = -EINVAL;
Josef Bacik96303082009-07-13 21:29:25 -04001765 goto out_lock;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001766 }
Josef Bacik96303082009-07-13 21:29:25 -04001767
1768 info = next_info;
1769 }
1770
1771 if (info->bytes == bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001772 unlink_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001773 if (info->bitmap) {
1774 kfree(info->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08001775 ctl->total_bitmaps--;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001776 }
Josef Bacikdc89e982011-01-28 17:05:48 -05001777 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001778 goto out_lock;
1779 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001780
Josef Bacik96303082009-07-13 21:29:25 -04001781 if (!info->bitmap && info->offset == offset) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001782 unlink_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001783 info->offset += bytes;
1784 info->bytes -= bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001785 link_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001786 goto out_lock;
1787 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001788
Josef Bacik96303082009-07-13 21:29:25 -04001789 if (!info->bitmap && info->offset <= offset &&
1790 info->offset + info->bytes >= offset + bytes) {
Chris Mason9b49c9b2008-09-24 11:23:25 -04001791 u64 old_start = info->offset;
1792 /*
1793 * we're freeing space in the middle of the info,
1794 * this can happen during tree log replay
1795 *
1796 * first unlink the old info and then
1797 * insert it again after the hole we're creating
1798 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001799 unlink_free_space(ctl, info);
Chris Mason9b49c9b2008-09-24 11:23:25 -04001800 if (offset + bytes < info->offset + info->bytes) {
1801 u64 old_end = info->offset + info->bytes;
1802
1803 info->offset = offset + bytes;
1804 info->bytes = old_end - info->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08001805 ret = link_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001806 WARN_ON(ret);
1807 if (ret)
1808 goto out_lock;
Chris Mason9b49c9b2008-09-24 11:23:25 -04001809 } else {
1810 /* the hole we're creating ends at the end
1811 * of the info struct, just free the info
1812 */
Josef Bacikdc89e982011-01-28 17:05:48 -05001813 kmem_cache_free(btrfs_free_space_cachep, info);
Chris Mason9b49c9b2008-09-24 11:23:25 -04001814 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001815 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001816
1817 /* step two, insert a new info struct to cover
1818 * anything before the hole
Chris Mason9b49c9b2008-09-24 11:23:25 -04001819 */
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001820 ret = btrfs_add_free_space(block_group, old_start,
1821 offset - old_start);
Josef Bacik96303082009-07-13 21:29:25 -04001822 WARN_ON(ret);
1823 goto out;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001824 }
Josef Bacik96303082009-07-13 21:29:25 -04001825
Li Zefan34d52cb2011-03-29 13:46:06 +08001826 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001827 if (ret == -EAGAIN)
1828 goto again;
1829 BUG_ON(ret);
1830out_lock:
Li Zefan34d52cb2011-03-29 13:46:06 +08001831 spin_unlock(&ctl->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001832out:
Josef Bacik25179202008-10-29 14:49:05 -04001833 return ret;
1834}
1835
Josef Bacik0f9dd462008-09-23 13:14:11 -04001836void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1837 u64 bytes)
1838{
Li Zefan34d52cb2011-03-29 13:46:06 +08001839 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001840 struct btrfs_free_space *info;
1841 struct rb_node *n;
1842 int count = 0;
1843
Li Zefan34d52cb2011-03-29 13:46:06 +08001844 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001845 info = rb_entry(n, struct btrfs_free_space, offset_index);
1846 if (info->bytes >= bytes)
1847 count++;
Josef Bacik96303082009-07-13 21:29:25 -04001848 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
Joel Becker21380932009-04-21 12:38:29 -07001849 (unsigned long long)info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001850 (unsigned long long)info->bytes,
1851 (info->bitmap) ? "yes" : "no");
Josef Bacik0f9dd462008-09-23 13:14:11 -04001852 }
Josef Bacik96303082009-07-13 21:29:25 -04001853 printk(KERN_INFO "block group has cluster?: %s\n",
1854 list_empty(&block_group->cluster_list) ? "no" : "yes");
Josef Bacik0f9dd462008-09-23 13:14:11 -04001855 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1856 "\n", count);
1857}
1858
Li Zefan34d52cb2011-03-29 13:46:06 +08001859void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001860{
Li Zefan34d52cb2011-03-29 13:46:06 +08001861 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001862
Li Zefan34d52cb2011-03-29 13:46:06 +08001863 spin_lock_init(&ctl->tree_lock);
1864 ctl->unit = block_group->sectorsize;
1865 ctl->start = block_group->key.objectid;
1866 ctl->private = block_group;
1867 ctl->op = &free_space_op;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001868
Li Zefan34d52cb2011-03-29 13:46:06 +08001869 /*
1870 * we only want to have 32k of ram per block group for keeping
1871 * track of free space, and if we pass 1/2 of that we want to
1872 * start converting things over to using bitmaps
1873 */
1874 ctl->extents_thresh = ((1024 * 32) / 2) /
1875 sizeof(struct btrfs_free_space);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001876}
1877
Chris Masonfa9c0d792009-04-03 09:47:43 -04001878/*
1879 * for a given cluster, put all of its extents back into the free
1880 * space cache. If the block group passed doesn't match the block group
1881 * pointed to by the cluster, someone else raced in and freed the
1882 * cluster already. In that case, we just return without changing anything
1883 */
1884static int
1885__btrfs_return_cluster_to_free_space(
1886 struct btrfs_block_group_cache *block_group,
1887 struct btrfs_free_cluster *cluster)
1888{
Li Zefan34d52cb2011-03-29 13:46:06 +08001889 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001890 struct btrfs_free_space *entry;
1891 struct rb_node *node;
1892
1893 spin_lock(&cluster->lock);
1894 if (cluster->block_group != block_group)
1895 goto out;
1896
Josef Bacik96303082009-07-13 21:29:25 -04001897 cluster->block_group = NULL;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001898 cluster->window_start = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001899 list_del_init(&cluster->block_group_list);
Josef Bacik96303082009-07-13 21:29:25 -04001900
Chris Masonfa9c0d792009-04-03 09:47:43 -04001901 node = rb_first(&cluster->root);
Josef Bacik96303082009-07-13 21:29:25 -04001902 while (node) {
Josef Bacik4e69b592011-03-21 10:11:24 -04001903 bool bitmap;
1904
Chris Masonfa9c0d792009-04-03 09:47:43 -04001905 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1906 node = rb_next(&entry->offset_index);
1907 rb_erase(&entry->offset_index, &cluster->root);
Josef Bacik4e69b592011-03-21 10:11:24 -04001908
1909 bitmap = (entry->bitmap != NULL);
1910 if (!bitmap)
Li Zefan34d52cb2011-03-29 13:46:06 +08001911 try_merge_free_space(ctl, entry, false);
1912 tree_insert_offset(&ctl->free_space_offset,
Josef Bacik4e69b592011-03-21 10:11:24 -04001913 entry->offset, &entry->offset_index, bitmap);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001914 }
Eric Paris6bef4d32010-02-23 19:43:04 +00001915 cluster->root = RB_ROOT;
Josef Bacik96303082009-07-13 21:29:25 -04001916
Chris Masonfa9c0d792009-04-03 09:47:43 -04001917out:
1918 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04001919 btrfs_put_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001920 return 0;
1921}
1922
Chris Mason09655372011-05-21 09:27:38 -04001923void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001924{
1925 struct btrfs_free_space *info;
1926 struct rb_node *node;
Li Zefan581bb052011-04-20 10:06:11 +08001927
Li Zefan581bb052011-04-20 10:06:11 +08001928 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1929 info = rb_entry(node, struct btrfs_free_space, offset_index);
Josef Bacik9b90f512011-06-24 16:02:51 +00001930 if (!info->bitmap) {
1931 unlink_free_space(ctl, info);
1932 kmem_cache_free(btrfs_free_space_cachep, info);
1933 } else {
1934 free_bitmap(ctl, info);
1935 }
Li Zefan581bb052011-04-20 10:06:11 +08001936 if (need_resched()) {
1937 spin_unlock(&ctl->tree_lock);
1938 cond_resched();
1939 spin_lock(&ctl->tree_lock);
1940 }
1941 }
Chris Mason09655372011-05-21 09:27:38 -04001942}
1943
1944void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1945{
1946 spin_lock(&ctl->tree_lock);
1947 __btrfs_remove_free_space_cache_locked(ctl);
Li Zefan581bb052011-04-20 10:06:11 +08001948 spin_unlock(&ctl->tree_lock);
1949}
1950
Josef Bacik0f9dd462008-09-23 13:14:11 -04001951void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1952{
Li Zefan34d52cb2011-03-29 13:46:06 +08001953 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001954 struct btrfs_free_cluster *cluster;
Josef Bacik96303082009-07-13 21:29:25 -04001955 struct list_head *head;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001956
Li Zefan34d52cb2011-03-29 13:46:06 +08001957 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001958 while ((head = block_group->cluster_list.next) !=
1959 &block_group->cluster_list) {
1960 cluster = list_entry(head, struct btrfs_free_cluster,
1961 block_group_list);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001962
1963 WARN_ON(cluster->block_group != block_group);
1964 __btrfs_return_cluster_to_free_space(block_group, cluster);
Josef Bacik96303082009-07-13 21:29:25 -04001965 if (need_resched()) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001966 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001967 cond_resched();
Li Zefan34d52cb2011-03-29 13:46:06 +08001968 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001969 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04001970 }
Chris Mason09655372011-05-21 09:27:38 -04001971 __btrfs_remove_free_space_cache_locked(ctl);
Li Zefan34d52cb2011-03-29 13:46:06 +08001972 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001973
Josef Bacik0f9dd462008-09-23 13:14:11 -04001974}
1975
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001976u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1977 u64 offset, u64 bytes, u64 empty_size)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001978{
Li Zefan34d52cb2011-03-29 13:46:06 +08001979 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001980 struct btrfs_free_space *entry = NULL;
Josef Bacik96303082009-07-13 21:29:25 -04001981 u64 bytes_search = bytes + empty_size;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001982 u64 ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001983
Li Zefan34d52cb2011-03-29 13:46:06 +08001984 spin_lock(&ctl->tree_lock);
1985 entry = find_free_space(ctl, &offset, &bytes_search);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001986 if (!entry)
Josef Bacik96303082009-07-13 21:29:25 -04001987 goto out;
1988
1989 ret = offset;
1990 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001991 bitmap_clear_bits(ctl, entry, offset, bytes);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001992 if (!entry->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001993 free_bitmap(ctl, entry);
Josef Bacik96303082009-07-13 21:29:25 -04001994 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001995 unlink_free_space(ctl, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001996 entry->offset += bytes;
1997 entry->bytes -= bytes;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001998 if (!entry->bytes)
Josef Bacikdc89e982011-01-28 17:05:48 -05001999 kmem_cache_free(btrfs_free_space_cachep, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002000 else
Li Zefan34d52cb2011-03-29 13:46:06 +08002001 link_free_space(ctl, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04002002 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04002003
Josef Bacik96303082009-07-13 21:29:25 -04002004out:
Li Zefan34d52cb2011-03-29 13:46:06 +08002005 spin_unlock(&ctl->tree_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04002006
Josef Bacik0f9dd462008-09-23 13:14:11 -04002007 return ret;
2008}
Chris Masonfa9c0d792009-04-03 09:47:43 -04002009
2010/*
2011 * given a cluster, put all of its extents back into the free space
2012 * cache. If a block group is passed, this function will only free
2013 * a cluster that belongs to the passed block group.
2014 *
2015 * Otherwise, it'll get a reference on the block group pointed to by the
2016 * cluster and remove the cluster from it.
2017 */
2018int btrfs_return_cluster_to_free_space(
2019 struct btrfs_block_group_cache *block_group,
2020 struct btrfs_free_cluster *cluster)
2021{
Li Zefan34d52cb2011-03-29 13:46:06 +08002022 struct btrfs_free_space_ctl *ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002023 int ret;
2024
2025 /* first, get a safe pointer to the block group */
2026 spin_lock(&cluster->lock);
2027 if (!block_group) {
2028 block_group = cluster->block_group;
2029 if (!block_group) {
2030 spin_unlock(&cluster->lock);
2031 return 0;
2032 }
2033 } else if (cluster->block_group != block_group) {
2034 /* someone else has already freed it don't redo their work */
2035 spin_unlock(&cluster->lock);
2036 return 0;
2037 }
2038 atomic_inc(&block_group->count);
2039 spin_unlock(&cluster->lock);
2040
Li Zefan34d52cb2011-03-29 13:46:06 +08002041 ctl = block_group->free_space_ctl;
2042
Chris Masonfa9c0d792009-04-03 09:47:43 -04002043 /* now return any extents the cluster had on it */
Li Zefan34d52cb2011-03-29 13:46:06 +08002044 spin_lock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002045 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
Li Zefan34d52cb2011-03-29 13:46:06 +08002046 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002047
2048 /* finally drop our ref */
2049 btrfs_put_block_group(block_group);
2050 return ret;
2051}
2052
Josef Bacik96303082009-07-13 21:29:25 -04002053static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2054 struct btrfs_free_cluster *cluster,
Josef Bacik4e69b592011-03-21 10:11:24 -04002055 struct btrfs_free_space *entry,
Josef Bacik96303082009-07-13 21:29:25 -04002056 u64 bytes, u64 min_start)
2057{
Li Zefan34d52cb2011-03-29 13:46:06 +08002058 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04002059 int err;
2060 u64 search_start = cluster->window_start;
2061 u64 search_bytes = bytes;
2062 u64 ret = 0;
2063
Josef Bacik96303082009-07-13 21:29:25 -04002064 search_start = min_start;
2065 search_bytes = bytes;
2066
Li Zefan34d52cb2011-03-29 13:46:06 +08002067 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
Josef Bacik96303082009-07-13 21:29:25 -04002068 if (err)
Josef Bacik4e69b592011-03-21 10:11:24 -04002069 return 0;
Josef Bacik96303082009-07-13 21:29:25 -04002070
2071 ret = search_start;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00002072 __bitmap_clear_bits(ctl, entry, ret, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04002073
2074 return ret;
2075}
2076
Chris Masonfa9c0d792009-04-03 09:47:43 -04002077/*
2078 * given a cluster, try to allocate 'bytes' from it, returns 0
2079 * if it couldn't find anything suitably large, or a logical disk offset
2080 * if things worked out
2081 */
2082u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2083 struct btrfs_free_cluster *cluster, u64 bytes,
2084 u64 min_start)
2085{
Li Zefan34d52cb2011-03-29 13:46:06 +08002086 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002087 struct btrfs_free_space *entry = NULL;
2088 struct rb_node *node;
2089 u64 ret = 0;
2090
2091 spin_lock(&cluster->lock);
2092 if (bytes > cluster->max_size)
2093 goto out;
2094
2095 if (cluster->block_group != block_group)
2096 goto out;
2097
2098 node = rb_first(&cluster->root);
2099 if (!node)
2100 goto out;
2101
2102 entry = rb_entry(node, struct btrfs_free_space, offset_index);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002103 while(1) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002104 if (entry->bytes < bytes ||
2105 (!entry->bitmap && entry->offset < min_start)) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04002106 node = rb_next(&entry->offset_index);
2107 if (!node)
2108 break;
2109 entry = rb_entry(node, struct btrfs_free_space,
2110 offset_index);
2111 continue;
2112 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002113
Josef Bacik4e69b592011-03-21 10:11:24 -04002114 if (entry->bitmap) {
2115 ret = btrfs_alloc_from_bitmap(block_group,
2116 cluster, entry, bytes,
2117 min_start);
2118 if (ret == 0) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002119 node = rb_next(&entry->offset_index);
2120 if (!node)
2121 break;
2122 entry = rb_entry(node, struct btrfs_free_space,
2123 offset_index);
2124 continue;
2125 }
2126 } else {
Josef Bacik4e69b592011-03-21 10:11:24 -04002127 ret = entry->offset;
2128
2129 entry->offset += bytes;
2130 entry->bytes -= bytes;
2131 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002132
Li Zefan5e71b5d2010-11-09 14:55:34 +08002133 if (entry->bytes == 0)
Chris Masonfa9c0d792009-04-03 09:47:43 -04002134 rb_erase(&entry->offset_index, &cluster->root);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002135 break;
2136 }
2137out:
2138 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04002139
Li Zefan5e71b5d2010-11-09 14:55:34 +08002140 if (!ret)
2141 return 0;
2142
Li Zefan34d52cb2011-03-29 13:46:06 +08002143 spin_lock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002144
Li Zefan34d52cb2011-03-29 13:46:06 +08002145 ctl->free_space -= bytes;
Li Zefan5e71b5d2010-11-09 14:55:34 +08002146 if (entry->bytes == 0) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002147 ctl->free_extents--;
Josef Bacik4e69b592011-03-21 10:11:24 -04002148 if (entry->bitmap) {
2149 kfree(entry->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08002150 ctl->total_bitmaps--;
2151 ctl->op->recalc_thresholds(ctl);
Josef Bacik4e69b592011-03-21 10:11:24 -04002152 }
Josef Bacikdc89e982011-01-28 17:05:48 -05002153 kmem_cache_free(btrfs_free_space_cachep, entry);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002154 }
2155
Li Zefan34d52cb2011-03-29 13:46:06 +08002156 spin_unlock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002157
Chris Masonfa9c0d792009-04-03 09:47:43 -04002158 return ret;
2159}
2160
Josef Bacik96303082009-07-13 21:29:25 -04002161static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2162 struct btrfs_free_space *entry,
2163 struct btrfs_free_cluster *cluster,
2164 u64 offset, u64 bytes, u64 min_bytes)
2165{
Li Zefan34d52cb2011-03-29 13:46:06 +08002166 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04002167 unsigned long next_zero;
2168 unsigned long i;
2169 unsigned long search_bits;
2170 unsigned long total_bits;
2171 unsigned long found_bits;
2172 unsigned long start = 0;
2173 unsigned long total_found = 0;
Josef Bacik4e69b592011-03-21 10:11:24 -04002174 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04002175 bool found = false;
2176
2177 i = offset_to_bit(entry->offset, block_group->sectorsize,
2178 max_t(u64, offset, entry->offset));
Josef Bacikd0a365e2011-03-18 15:27:43 -04002179 search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2180 total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
Josef Bacik96303082009-07-13 21:29:25 -04002181
2182again:
2183 found_bits = 0;
2184 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2185 i < BITS_PER_BITMAP;
2186 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2187 next_zero = find_next_zero_bit(entry->bitmap,
2188 BITS_PER_BITMAP, i);
2189 if (next_zero - i >= search_bits) {
2190 found_bits = next_zero - i;
2191 break;
2192 }
2193 i = next_zero;
2194 }
2195
2196 if (!found_bits)
Josef Bacik4e69b592011-03-21 10:11:24 -04002197 return -ENOSPC;
Josef Bacik96303082009-07-13 21:29:25 -04002198
2199 if (!found) {
2200 start = i;
2201 found = true;
2202 }
2203
2204 total_found += found_bits;
2205
2206 if (cluster->max_size < found_bits * block_group->sectorsize)
2207 cluster->max_size = found_bits * block_group->sectorsize;
2208
2209 if (total_found < total_bits) {
2210 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2211 if (i - start > total_bits * 2) {
2212 total_found = 0;
2213 cluster->max_size = 0;
2214 found = false;
2215 }
2216 goto again;
2217 }
2218
2219 cluster->window_start = start * block_group->sectorsize +
2220 entry->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08002221 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002222 ret = tree_insert_offset(&cluster->root, entry->offset,
2223 &entry->offset_index, 1);
2224 BUG_ON(ret);
Josef Bacik96303082009-07-13 21:29:25 -04002225
2226 return 0;
2227}
2228
Chris Masonfa9c0d792009-04-03 09:47:43 -04002229/*
Josef Bacik4e69b592011-03-21 10:11:24 -04002230 * This searches the block group for just extents to fill the cluster with.
2231 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04002232static noinline int
2233setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2234 struct btrfs_free_cluster *cluster,
2235 struct list_head *bitmaps, u64 offset, u64 bytes,
2236 u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002237{
Li Zefan34d52cb2011-03-29 13:46:06 +08002238 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04002239 struct btrfs_free_space *first = NULL;
2240 struct btrfs_free_space *entry = NULL;
2241 struct btrfs_free_space *prev = NULL;
2242 struct btrfs_free_space *last;
2243 struct rb_node *node;
2244 u64 window_start;
2245 u64 window_free;
2246 u64 max_extent;
2247 u64 max_gap = 128 * 1024;
2248
Li Zefan34d52cb2011-03-29 13:46:06 +08002249 entry = tree_search_offset(ctl, offset, 0, 1);
Josef Bacik4e69b592011-03-21 10:11:24 -04002250 if (!entry)
2251 return -ENOSPC;
2252
2253 /*
2254 * We don't want bitmaps, so just move along until we find a normal
2255 * extent entry.
2256 */
2257 while (entry->bitmap) {
Josef Bacik86d4a772011-05-25 13:03:16 -04002258 if (list_empty(&entry->list))
2259 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04002260 node = rb_next(&entry->offset_index);
2261 if (!node)
2262 return -ENOSPC;
2263 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2264 }
2265
2266 window_start = entry->offset;
2267 window_free = entry->bytes;
2268 max_extent = entry->bytes;
2269 first = entry;
2270 last = entry;
2271 prev = entry;
2272
2273 while (window_free <= min_bytes) {
2274 node = rb_next(&entry->offset_index);
2275 if (!node)
2276 return -ENOSPC;
2277 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2278
Josef Bacik86d4a772011-05-25 13:03:16 -04002279 if (entry->bitmap) {
2280 if (list_empty(&entry->list))
2281 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04002282 continue;
Josef Bacik86d4a772011-05-25 13:03:16 -04002283 }
2284
Josef Bacik4e69b592011-03-21 10:11:24 -04002285 /*
2286 * we haven't filled the empty size and the window is
2287 * very large. reset and try again
2288 */
2289 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2290 entry->offset - window_start > (min_bytes * 2)) {
2291 first = entry;
2292 window_start = entry->offset;
2293 window_free = entry->bytes;
2294 last = entry;
2295 max_extent = entry->bytes;
2296 } else {
2297 last = entry;
2298 window_free += entry->bytes;
2299 if (entry->bytes > max_extent)
2300 max_extent = entry->bytes;
2301 }
2302 prev = entry;
2303 }
2304
2305 cluster->window_start = first->offset;
2306
2307 node = &first->offset_index;
2308
2309 /*
2310 * now we've found our entries, pull them out of the free space
2311 * cache and put them into the cluster rbtree
2312 */
2313 do {
2314 int ret;
2315
2316 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2317 node = rb_next(&entry->offset_index);
2318 if (entry->bitmap)
2319 continue;
2320
Li Zefan34d52cb2011-03-29 13:46:06 +08002321 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002322 ret = tree_insert_offset(&cluster->root, entry->offset,
2323 &entry->offset_index, 0);
2324 BUG_ON(ret);
2325 } while (node && entry != last);
2326
2327 cluster->max_size = max_extent;
2328
2329 return 0;
2330}
2331
2332/*
2333 * This specifically looks for bitmaps that may work in the cluster, we assume
2334 * that we have already failed to find extents that will work.
2335 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04002336static noinline int
2337setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2338 struct btrfs_free_cluster *cluster,
2339 struct list_head *bitmaps, u64 offset, u64 bytes,
2340 u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002341{
Li Zefan34d52cb2011-03-29 13:46:06 +08002342 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04002343 struct btrfs_free_space *entry;
2344 struct rb_node *node;
2345 int ret = -ENOSPC;
2346
Li Zefan34d52cb2011-03-29 13:46:06 +08002347 if (ctl->total_bitmaps == 0)
Josef Bacik4e69b592011-03-21 10:11:24 -04002348 return -ENOSPC;
2349
Josef Bacik86d4a772011-05-25 13:03:16 -04002350 /*
2351 * First check our cached list of bitmaps and see if there is an entry
2352 * here that will work.
2353 */
2354 list_for_each_entry(entry, bitmaps, list) {
2355 if (entry->bytes < min_bytes)
2356 continue;
2357 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2358 bytes, min_bytes);
2359 if (!ret)
2360 return 0;
2361 }
2362
2363 /*
2364 * If we do have entries on our list and we are here then we didn't find
2365 * anything, so go ahead and get the next entry after the last entry in
2366 * this list and start the search from there.
2367 */
2368 if (!list_empty(bitmaps)) {
2369 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2370 list);
2371 node = rb_next(&entry->offset_index);
2372 if (!node)
2373 return -ENOSPC;
2374 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2375 goto search;
2376 }
2377
Li Zefan34d52cb2011-03-29 13:46:06 +08002378 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
Josef Bacik4e69b592011-03-21 10:11:24 -04002379 if (!entry)
2380 return -ENOSPC;
2381
Josef Bacik86d4a772011-05-25 13:03:16 -04002382search:
Josef Bacik4e69b592011-03-21 10:11:24 -04002383 node = &entry->offset_index;
2384 do {
2385 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2386 node = rb_next(&entry->offset_index);
2387 if (!entry->bitmap)
2388 continue;
2389 if (entry->bytes < min_bytes)
2390 continue;
2391 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2392 bytes, min_bytes);
2393 } while (ret && node);
2394
2395 return ret;
2396}
2397
2398/*
Chris Masonfa9c0d792009-04-03 09:47:43 -04002399 * here we try to find a cluster of blocks in a block group. The goal
2400 * is to find at least bytes free and up to empty_size + bytes free.
2401 * We might not find them all in one contiguous area.
2402 *
2403 * returns zero and sets up cluster if things worked out, otherwise
2404 * it returns -enospc
2405 */
2406int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
Chris Mason451d7582009-06-09 20:28:34 -04002407 struct btrfs_root *root,
Chris Masonfa9c0d792009-04-03 09:47:43 -04002408 struct btrfs_block_group_cache *block_group,
2409 struct btrfs_free_cluster *cluster,
2410 u64 offset, u64 bytes, u64 empty_size)
2411{
Li Zefan34d52cb2011-03-29 13:46:06 +08002412 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik86d4a772011-05-25 13:03:16 -04002413 struct list_head bitmaps;
2414 struct btrfs_free_space *entry, *tmp;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002415 u64 min_bytes;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002416 int ret;
2417
2418 /* for metadata, allow allocates with more holes */
Chris Mason451d7582009-06-09 20:28:34 -04002419 if (btrfs_test_opt(root, SSD_SPREAD)) {
2420 min_bytes = bytes + empty_size;
2421 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04002422 /*
2423 * we want to do larger allocations when we are
2424 * flushing out the delayed refs, it helps prevent
2425 * making more work as we go along.
2426 */
2427 if (trans->transaction->delayed_refs.flushing)
2428 min_bytes = max(bytes, (bytes + empty_size) >> 1);
2429 else
2430 min_bytes = max(bytes, (bytes + empty_size) >> 4);
2431 } else
2432 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2433
Li Zefan34d52cb2011-03-29 13:46:06 +08002434 spin_lock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04002435
2436 /*
2437 * If we know we don't have enough space to make a cluster don't even
2438 * bother doing all the work to try and find one.
2439 */
Li Zefan34d52cb2011-03-29 13:46:06 +08002440 if (ctl->free_space < min_bytes) {
2441 spin_unlock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04002442 return -ENOSPC;
2443 }
2444
Chris Masonfa9c0d792009-04-03 09:47:43 -04002445 spin_lock(&cluster->lock);
2446
2447 /* someone already found a cluster, hooray */
2448 if (cluster->block_group) {
2449 ret = 0;
2450 goto out;
2451 }
Josef Bacik4e69b592011-03-21 10:11:24 -04002452
Josef Bacik86d4a772011-05-25 13:03:16 -04002453 INIT_LIST_HEAD(&bitmaps);
2454 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2455 bytes, min_bytes);
Josef Bacik4e69b592011-03-21 10:11:24 -04002456 if (ret)
Josef Bacik86d4a772011-05-25 13:03:16 -04002457 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2458 offset, bytes, min_bytes);
2459
2460 /* Clear our temporary list */
2461 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2462 list_del_init(&entry->list);
Josef Bacik4e69b592011-03-21 10:11:24 -04002463
2464 if (!ret) {
2465 atomic_inc(&block_group->count);
2466 list_add_tail(&cluster->block_group_list,
2467 &block_group->cluster_list);
2468 cluster->block_group = block_group;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002469 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002470out:
2471 spin_unlock(&cluster->lock);
Li Zefan34d52cb2011-03-29 13:46:06 +08002472 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002473
2474 return ret;
2475}
2476
2477/*
2478 * simple code to zero out a cluster
2479 */
2480void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2481{
2482 spin_lock_init(&cluster->lock);
2483 spin_lock_init(&cluster->refill_lock);
Eric Paris6bef4d32010-02-23 19:43:04 +00002484 cluster->root = RB_ROOT;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002485 cluster->max_size = 0;
2486 INIT_LIST_HEAD(&cluster->block_group_list);
2487 cluster->block_group = NULL;
2488}
2489
Li Dongyangf7039b12011-03-24 10:24:28 +00002490int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2491 u64 *trimmed, u64 start, u64 end, u64 minlen)
2492{
Li Zefan34d52cb2011-03-29 13:46:06 +08002493 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Dongyangf7039b12011-03-24 10:24:28 +00002494 struct btrfs_free_space *entry = NULL;
2495 struct btrfs_fs_info *fs_info = block_group->fs_info;
2496 u64 bytes = 0;
2497 u64 actually_trimmed;
2498 int ret = 0;
2499
2500 *trimmed = 0;
2501
2502 while (start < end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002503 spin_lock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002504
Li Zefan34d52cb2011-03-29 13:46:06 +08002505 if (ctl->free_space < minlen) {
2506 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002507 break;
2508 }
2509
Li Zefan34d52cb2011-03-29 13:46:06 +08002510 entry = tree_search_offset(ctl, start, 0, 1);
Li Dongyangf7039b12011-03-24 10:24:28 +00002511 if (!entry)
Li Zefan34d52cb2011-03-29 13:46:06 +08002512 entry = tree_search_offset(ctl,
2513 offset_to_bitmap(ctl, start),
Li Dongyangf7039b12011-03-24 10:24:28 +00002514 1, 1);
2515
2516 if (!entry || entry->offset >= end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002517 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002518 break;
2519 }
2520
2521 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002522 ret = search_bitmap(ctl, entry, &start, &bytes);
Li Dongyangf7039b12011-03-24 10:24:28 +00002523 if (!ret) {
2524 if (start >= end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002525 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002526 break;
2527 }
2528 bytes = min(bytes, end - start);
Li Zefan34d52cb2011-03-29 13:46:06 +08002529 bitmap_clear_bits(ctl, entry, start, bytes);
Li Dongyangf7039b12011-03-24 10:24:28 +00002530 if (entry->bytes == 0)
Li Zefan34d52cb2011-03-29 13:46:06 +08002531 free_bitmap(ctl, entry);
Li Dongyangf7039b12011-03-24 10:24:28 +00002532 } else {
2533 start = entry->offset + BITS_PER_BITMAP *
2534 block_group->sectorsize;
Li Zefan34d52cb2011-03-29 13:46:06 +08002535 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002536 ret = 0;
2537 continue;
2538 }
2539 } else {
2540 start = entry->offset;
2541 bytes = min(entry->bytes, end - start);
Li Zefan34d52cb2011-03-29 13:46:06 +08002542 unlink_free_space(ctl, entry);
Li Zefanf789b682011-04-25 19:43:52 -04002543 kmem_cache_free(btrfs_free_space_cachep, entry);
Li Dongyangf7039b12011-03-24 10:24:28 +00002544 }
2545
Li Zefan34d52cb2011-03-29 13:46:06 +08002546 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002547
2548 if (bytes >= minlen) {
Josef Bacikfb25e912011-07-26 17:00:46 -04002549 struct btrfs_space_info *space_info;
2550 int update = 0;
2551
2552 space_info = block_group->space_info;
2553 spin_lock(&space_info->lock);
2554 spin_lock(&block_group->lock);
2555 if (!block_group->ro) {
2556 block_group->reserved += bytes;
2557 space_info->bytes_reserved += bytes;
2558 update = 1;
2559 }
2560 spin_unlock(&block_group->lock);
2561 spin_unlock(&space_info->lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002562
2563 ret = btrfs_error_discard_extent(fs_info->extent_root,
2564 start,
2565 bytes,
2566 &actually_trimmed);
2567
Li Zefan34d52cb2011-03-29 13:46:06 +08002568 btrfs_add_free_space(block_group, start, bytes);
Josef Bacikfb25e912011-07-26 17:00:46 -04002569 if (update) {
2570 spin_lock(&space_info->lock);
2571 spin_lock(&block_group->lock);
2572 if (block_group->ro)
2573 space_info->bytes_readonly += bytes;
2574 block_group->reserved -= bytes;
2575 space_info->bytes_reserved -= bytes;
2576 spin_unlock(&space_info->lock);
2577 spin_unlock(&block_group->lock);
2578 }
Li Dongyangf7039b12011-03-24 10:24:28 +00002579
2580 if (ret)
2581 break;
2582 *trimmed += actually_trimmed;
2583 }
2584 start += bytes;
2585 bytes = 0;
2586
2587 if (fatal_signal_pending(current)) {
2588 ret = -ERESTARTSYS;
2589 break;
2590 }
2591
2592 cond_resched();
2593 }
2594
2595 return ret;
2596}
Li Zefan581bb052011-04-20 10:06:11 +08002597
2598/*
2599 * Find the left-most item in the cache tree, and then return the
2600 * smallest inode number in the item.
2601 *
2602 * Note: the returned inode number may not be the smallest one in
2603 * the tree, if the left-most item is a bitmap.
2604 */
2605u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2606{
2607 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2608 struct btrfs_free_space *entry = NULL;
2609 u64 ino = 0;
2610
2611 spin_lock(&ctl->tree_lock);
2612
2613 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2614 goto out;
2615
2616 entry = rb_entry(rb_first(&ctl->free_space_offset),
2617 struct btrfs_free_space, offset_index);
2618
2619 if (!entry->bitmap) {
2620 ino = entry->offset;
2621
2622 unlink_free_space(ctl, entry);
2623 entry->offset++;
2624 entry->bytes--;
2625 if (!entry->bytes)
2626 kmem_cache_free(btrfs_free_space_cachep, entry);
2627 else
2628 link_free_space(ctl, entry);
2629 } else {
2630 u64 offset = 0;
2631 u64 count = 1;
2632 int ret;
2633
2634 ret = search_bitmap(ctl, entry, &offset, &count);
2635 BUG_ON(ret);
2636
2637 ino = offset;
2638 bitmap_clear_bits(ctl, entry, offset, 1);
2639 if (entry->bytes == 0)
2640 free_bitmap(ctl, entry);
2641 }
2642out:
2643 spin_unlock(&ctl->tree_lock);
2644
2645 return ino;
2646}
Li Zefan82d59022011-04-20 10:33:24 +08002647
2648struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2649 struct btrfs_path *path)
2650{
2651 struct inode *inode = NULL;
2652
2653 spin_lock(&root->cache_lock);
2654 if (root->cache_inode)
2655 inode = igrab(root->cache_inode);
2656 spin_unlock(&root->cache_lock);
2657 if (inode)
2658 return inode;
2659
2660 inode = __lookup_free_space_inode(root, path, 0);
2661 if (IS_ERR(inode))
2662 return inode;
2663
2664 spin_lock(&root->cache_lock);
David Sterba7841cb22011-05-31 18:07:27 +02002665 if (!btrfs_fs_closing(root->fs_info))
Li Zefan82d59022011-04-20 10:33:24 +08002666 root->cache_inode = igrab(inode);
2667 spin_unlock(&root->cache_lock);
2668
2669 return inode;
2670}
2671
2672int create_free_ino_inode(struct btrfs_root *root,
2673 struct btrfs_trans_handle *trans,
2674 struct btrfs_path *path)
2675{
2676 return __create_free_space_inode(root, trans, path,
2677 BTRFS_FREE_INO_OBJECTID, 0);
2678}
2679
2680int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2681{
2682 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2683 struct btrfs_path *path;
2684 struct inode *inode;
2685 int ret = 0;
2686 u64 root_gen = btrfs_root_generation(&root->root_item);
2687
Chris Mason4b9465c2011-06-03 09:36:29 -04002688 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2689 return 0;
2690
Li Zefan82d59022011-04-20 10:33:24 +08002691 /*
2692 * If we're unmounting then just return, since this does a search on the
2693 * normal root and not the commit root and we could deadlock.
2694 */
David Sterba7841cb22011-05-31 18:07:27 +02002695 if (btrfs_fs_closing(fs_info))
Li Zefan82d59022011-04-20 10:33:24 +08002696 return 0;
2697
2698 path = btrfs_alloc_path();
2699 if (!path)
2700 return 0;
2701
2702 inode = lookup_free_ino_inode(root, path);
2703 if (IS_ERR(inode))
2704 goto out;
2705
2706 if (root_gen != BTRFS_I(inode)->generation)
2707 goto out_put;
2708
2709 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2710
2711 if (ret < 0)
2712 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2713 "root %llu\n", root->root_key.objectid);
2714out_put:
2715 iput(inode);
2716out:
2717 btrfs_free_path(path);
2718 return ret;
2719}
2720
2721int btrfs_write_out_ino_cache(struct btrfs_root *root,
2722 struct btrfs_trans_handle *trans,
2723 struct btrfs_path *path)
2724{
2725 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2726 struct inode *inode;
2727 int ret;
2728
Chris Mason4b9465c2011-06-03 09:36:29 -04002729 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2730 return 0;
2731
Li Zefan82d59022011-04-20 10:33:24 +08002732 inode = lookup_free_ino_inode(root, path);
2733 if (IS_ERR(inode))
2734 return 0;
2735
2736 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
Josef Bacikc09544e2011-08-30 10:19:10 -04002737 if (ret) {
2738 btrfs_delalloc_release_metadata(inode, inode->i_size);
2739#ifdef DEBUG
Li Zefan82d59022011-04-20 10:33:24 +08002740 printk(KERN_ERR "btrfs: failed to write free ino cache "
2741 "for root %llu\n", root->root_key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -04002742#endif
2743 }
Li Zefan82d59022011-04-20 10:33:24 +08002744
2745 iput(inode);
2746 return ret;
2747}