blob: 35bfc13c9d423169dbb559798e1fdb06738e803c [file] [log] [blame]
Josef Bacik0f9dd462008-09-23 13:14:11 -04001/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Josef Bacik96303082009-07-13 21:29:25 -040019#include <linux/pagemap.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040020#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090021#include <linux/slab.h>
Josef Bacik96303082009-07-13 21:29:25 -040022#include <linux/math64.h>
Josef Bacik6ab60602011-08-08 08:24:46 -040023#include <linux/ratelimit.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040024#include "ctree.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040025#include "free-space-cache.h"
26#include "transaction.h"
Josef Bacik0af3d002010-06-21 14:48:16 -040027#include "disk-io.h"
Josef Bacik43be2142011-04-01 14:55:00 +000028#include "extent_io.h"
Li Zefan581bb052011-04-20 10:06:11 +080029#include "inode-map.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040030
Josef Bacik96303082009-07-13 21:29:25 -040031#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
32#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
Li Zefan34d52cb2011-03-29 13:46:06 +080034static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0cb59c92010-07-02 12:14:14 -040035 struct btrfs_free_space *info);
36
Li Zefan0414efa2011-04-20 10:20:14 +080037static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38 struct btrfs_path *path,
39 u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -040040{
41 struct btrfs_key key;
42 struct btrfs_key location;
43 struct btrfs_disk_key disk_key;
44 struct btrfs_free_space_header *header;
45 struct extent_buffer *leaf;
46 struct inode *inode = NULL;
47 int ret;
48
Josef Bacik0af3d002010-06-21 14:48:16 -040049 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +080050 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -040051 key.type = 0;
52
53 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54 if (ret < 0)
55 return ERR_PTR(ret);
56 if (ret > 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +020057 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040058 return ERR_PTR(-ENOENT);
59 }
60
61 leaf = path->nodes[0];
62 header = btrfs_item_ptr(leaf, path->slots[0],
63 struct btrfs_free_space_header);
64 btrfs_free_space_key(leaf, header, &disk_key);
65 btrfs_disk_key_to_cpu(&location, &disk_key);
David Sterbab3b4aa72011-04-21 01:20:15 +020066 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -040067
68 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69 if (!inode)
70 return ERR_PTR(-ENOENT);
71 if (IS_ERR(inode))
72 return inode;
73 if (is_bad_inode(inode)) {
74 iput(inode);
75 return ERR_PTR(-ENOENT);
76 }
77
Miao Xieadae52b2011-03-31 09:43:23 +000078 inode->i_mapping->flags &= ~__GFP_FS;
79
Li Zefan0414efa2011-04-20 10:20:14 +080080 return inode;
81}
82
83struct inode *lookup_free_space_inode(struct btrfs_root *root,
84 struct btrfs_block_group_cache
85 *block_group, struct btrfs_path *path)
86{
87 struct inode *inode = NULL;
88
89 spin_lock(&block_group->lock);
90 if (block_group->inode)
91 inode = igrab(block_group->inode);
92 spin_unlock(&block_group->lock);
93 if (inode)
94 return inode;
95
96 inode = __lookup_free_space_inode(root, path,
97 block_group->key.objectid);
98 if (IS_ERR(inode))
99 return inode;
100
Josef Bacik0af3d002010-06-21 14:48:16 -0400101 spin_lock(&block_group->lock);
Josef Bacik2f356122011-06-10 15:31:13 -0400102 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) {
103 printk(KERN_INFO "Old style space inode found, converting.\n");
104 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM;
105 block_group->disk_cache_state = BTRFS_DC_CLEAR;
106 }
107
Josef Bacik300e4f82011-08-29 14:06:00 -0400108 if (!block_group->iref) {
Josef Bacik0af3d002010-06-21 14:48:16 -0400109 block_group->inode = igrab(inode);
110 block_group->iref = 1;
111 }
112 spin_unlock(&block_group->lock);
113
114 return inode;
115}
116
Li Zefan0414efa2011-04-20 10:20:14 +0800117int __create_free_space_inode(struct btrfs_root *root,
118 struct btrfs_trans_handle *trans,
119 struct btrfs_path *path, u64 ino, u64 offset)
Josef Bacik0af3d002010-06-21 14:48:16 -0400120{
121 struct btrfs_key key;
122 struct btrfs_disk_key disk_key;
123 struct btrfs_free_space_header *header;
124 struct btrfs_inode_item *inode_item;
125 struct extent_buffer *leaf;
Josef Bacik0af3d002010-06-21 14:48:16 -0400126 int ret;
127
Li Zefan0414efa2011-04-20 10:20:14 +0800128 ret = btrfs_insert_empty_inode(trans, root, path, ino);
Josef Bacik0af3d002010-06-21 14:48:16 -0400129 if (ret)
130 return ret;
131
132 leaf = path->nodes[0];
133 inode_item = btrfs_item_ptr(leaf, path->slots[0],
134 struct btrfs_inode_item);
135 btrfs_item_key(leaf, &disk_key, path->slots[0]);
136 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
137 sizeof(*inode_item));
138 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
139 btrfs_set_inode_size(leaf, inode_item, 0);
140 btrfs_set_inode_nbytes(leaf, inode_item, 0);
141 btrfs_set_inode_uid(leaf, inode_item, 0);
142 btrfs_set_inode_gid(leaf, inode_item, 0);
143 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
144 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
Josef Bacik2f356122011-06-10 15:31:13 -0400145 BTRFS_INODE_PREALLOC);
Josef Bacik0af3d002010-06-21 14:48:16 -0400146 btrfs_set_inode_nlink(leaf, inode_item, 1);
147 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
Li Zefan0414efa2011-04-20 10:20:14 +0800148 btrfs_set_inode_block_group(leaf, inode_item, offset);
Josef Bacik0af3d002010-06-21 14:48:16 -0400149 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200150 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400151
152 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800153 key.offset = offset;
Josef Bacik0af3d002010-06-21 14:48:16 -0400154 key.type = 0;
155
156 ret = btrfs_insert_empty_item(trans, root, path, &key,
157 sizeof(struct btrfs_free_space_header));
158 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +0200159 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400160 return ret;
161 }
162 leaf = path->nodes[0];
163 header = btrfs_item_ptr(leaf, path->slots[0],
164 struct btrfs_free_space_header);
165 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
166 btrfs_set_free_space_key(leaf, header, &disk_key);
167 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200168 btrfs_release_path(path);
Josef Bacik0af3d002010-06-21 14:48:16 -0400169
170 return 0;
171}
172
Li Zefan0414efa2011-04-20 10:20:14 +0800173int create_free_space_inode(struct btrfs_root *root,
174 struct btrfs_trans_handle *trans,
175 struct btrfs_block_group_cache *block_group,
176 struct btrfs_path *path)
177{
178 int ret;
179 u64 ino;
180
181 ret = btrfs_find_free_objectid(root, &ino);
182 if (ret < 0)
183 return ret;
184
185 return __create_free_space_inode(root, trans, path, ino,
186 block_group->key.objectid);
187}
188
Josef Bacik0af3d002010-06-21 14:48:16 -0400189int btrfs_truncate_free_space_cache(struct btrfs_root *root,
190 struct btrfs_trans_handle *trans,
191 struct btrfs_path *path,
192 struct inode *inode)
193{
Liu Bo65450aa2011-09-11 10:52:24 -0400194 struct btrfs_block_rsv *rsv;
Josef Bacik0af3d002010-06-21 14:48:16 -0400195 loff_t oldsize;
196 int ret = 0;
197
Liu Bo65450aa2011-09-11 10:52:24 -0400198 rsv = trans->block_rsv;
Josef Bacik0af3d002010-06-21 14:48:16 -0400199 trans->block_rsv = root->orphan_block_rsv;
Josef Bacik4a92b1b2011-08-30 12:34:28 -0400200 ret = btrfs_block_rsv_check(root, root->orphan_block_rsv, 0, 5, 0);
Josef Bacik0af3d002010-06-21 14:48:16 -0400201 if (ret)
202 return ret;
203
204 oldsize = i_size_read(inode);
205 btrfs_i_size_write(inode, 0);
206 truncate_pagecache(inode, oldsize, 0);
207
208 /*
209 * We don't need an orphan item because truncating the free space cache
210 * will never be split across transactions.
211 */
212 ret = btrfs_truncate_inode_items(trans, root, inode,
213 0, BTRFS_EXTENT_DATA_KEY);
Liu Bo65450aa2011-09-11 10:52:24 -0400214
215 trans->block_rsv = rsv;
Josef Bacik0af3d002010-06-21 14:48:16 -0400216 if (ret) {
217 WARN_ON(1);
218 return ret;
219 }
220
Li Zefan82d59022011-04-20 10:33:24 +0800221 ret = btrfs_update_inode(trans, root, inode);
222 return ret;
Josef Bacik0af3d002010-06-21 14:48:16 -0400223}
224
Josef Bacik9d66e232010-08-25 16:54:15 -0400225static int readahead_cache(struct inode *inode)
226{
227 struct file_ra_state *ra;
228 unsigned long last_index;
229
230 ra = kzalloc(sizeof(*ra), GFP_NOFS);
231 if (!ra)
232 return -ENOMEM;
233
234 file_ra_state_init(ra, inode->i_mapping);
235 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
236
237 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
238
239 kfree(ra);
240
241 return 0;
242}
243
Josef Bacika67509c2011-10-05 15:18:58 -0400244struct io_ctl {
245 void *cur, *orig;
246 struct page *page;
247 struct page **pages;
248 struct btrfs_root *root;
249 unsigned long size;
250 int index;
251 int num_pages;
252};
253
254static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
255 struct btrfs_root *root)
256{
257 memset(io_ctl, 0, sizeof(struct io_ctl));
258 io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
259 PAGE_CACHE_SHIFT;
260 io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
261 GFP_NOFS);
262 if (!io_ctl->pages)
263 return -ENOMEM;
264 io_ctl->root = root;
265 return 0;
266}
267
268static void io_ctl_free(struct io_ctl *io_ctl)
269{
270 kfree(io_ctl->pages);
271}
272
273static void io_ctl_unmap_page(struct io_ctl *io_ctl)
274{
275 if (io_ctl->cur) {
276 kunmap(io_ctl->page);
277 io_ctl->cur = NULL;
278 io_ctl->orig = NULL;
279 }
280}
281
282static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
283{
284 WARN_ON(io_ctl->cur);
285 BUG_ON(io_ctl->index >= io_ctl->num_pages);
286 io_ctl->page = io_ctl->pages[io_ctl->index++];
287 io_ctl->cur = kmap(io_ctl->page);
288 io_ctl->orig = io_ctl->cur;
289 io_ctl->size = PAGE_CACHE_SIZE;
290 if (clear)
291 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
292}
293
294static void io_ctl_drop_pages(struct io_ctl *io_ctl)
295{
296 int i;
297
298 io_ctl_unmap_page(io_ctl);
299
300 for (i = 0; i < io_ctl->num_pages; i++) {
301 ClearPageChecked(io_ctl->pages[i]);
302 unlock_page(io_ctl->pages[i]);
303 page_cache_release(io_ctl->pages[i]);
304 }
305}
306
307static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
308 int uptodate)
309{
310 struct page *page;
311 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
312 int i;
313
314 for (i = 0; i < io_ctl->num_pages; i++) {
315 page = find_or_create_page(inode->i_mapping, i, mask);
316 if (!page) {
317 io_ctl_drop_pages(io_ctl);
318 return -ENOMEM;
319 }
320 io_ctl->pages[i] = page;
321 if (uptodate && !PageUptodate(page)) {
322 btrfs_readpage(NULL, page);
323 lock_page(page);
324 if (!PageUptodate(page)) {
325 printk(KERN_ERR "btrfs: error reading free "
326 "space cache\n");
327 io_ctl_drop_pages(io_ctl);
328 return -EIO;
329 }
330 }
331 }
332
333 return 0;
334}
335
336static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
337{
338 u64 *val;
339
340 io_ctl_map_page(io_ctl, 1);
341
342 /*
343 * Skip the first 64bits to make sure theres a bogus crc for old
344 * kernels
345 */
346 io_ctl->cur += sizeof(u64);
347
348 val = io_ctl->cur;
349 *val = cpu_to_le64(generation);
350 io_ctl->cur += sizeof(u64);
351 io_ctl->size -= sizeof(u64) * 2;
352}
353
354static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
355{
356 u64 *gen;
357
358 io_ctl_map_page(io_ctl, 0);
359
360 /* Skip the bogus crc area */
361 io_ctl->cur += sizeof(u64);
362 gen = io_ctl->cur;
363 if (le64_to_cpu(*gen) != generation) {
364 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
365 "(%Lu) does not match inode (%Lu)\n", *gen,
366 generation);
367 io_ctl_unmap_page(io_ctl);
368 return -EIO;
369 }
370 io_ctl->cur += sizeof(u64);
371 io_ctl->size -= sizeof(u64) * 2;
372 return 0;
373}
374
375static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
376 void *bitmap)
377{
378 struct btrfs_free_space_entry *entry;
379
380 if (!io_ctl->cur)
381 return -ENOSPC;
382
383 entry = io_ctl->cur;
384 entry->offset = cpu_to_le64(offset);
385 entry->bytes = cpu_to_le64(bytes);
386 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
387 BTRFS_FREE_SPACE_EXTENT;
388 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
389 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
390
391 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
392 return 0;
393
394 /*
395 * index == 1 means the current page is 0, we need to generate a bogus
396 * crc for older kernels.
397 */
398 if (io_ctl->index == 1) {
399 u32 *tmp;
400 u32 crc = ~(u32)0;
401
402 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + sizeof(u64),
403 crc, PAGE_CACHE_SIZE - sizeof(u64));
404 btrfs_csum_final(crc, (char *)&crc);
405 crc++;
406 tmp = io_ctl->orig;
407 *tmp = crc;
408 }
409 io_ctl_unmap_page(io_ctl);
410
411 /* No more pages to map */
412 if (io_ctl->index >= io_ctl->num_pages)
413 return 0;
414
415 /* map the next page */
416 io_ctl_map_page(io_ctl, 1);
417 return 0;
418}
419
420static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
421{
422 if (!io_ctl->cur)
423 return -ENOSPC;
424
425 /*
426 * If we aren't at the start of the current page, unmap this one and
427 * map the next one if there is any left.
428 */
429 if (io_ctl->cur != io_ctl->orig) {
430 io_ctl_unmap_page(io_ctl);
431 if (io_ctl->index >= io_ctl->num_pages)
432 return -ENOSPC;
433 io_ctl_map_page(io_ctl, 0);
434 }
435
436 memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
437 io_ctl_unmap_page(io_ctl);
438 if (io_ctl->index < io_ctl->num_pages)
439 io_ctl_map_page(io_ctl, 0);
440 return 0;
441}
442
443static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
444{
445 io_ctl_unmap_page(io_ctl);
446
447 while (io_ctl->index < io_ctl->num_pages) {
448 io_ctl_map_page(io_ctl, 1);
449 io_ctl_unmap_page(io_ctl);
450 }
451}
452
453static u8 io_ctl_read_entry(struct io_ctl *io_ctl,
454 struct btrfs_free_space *entry)
455{
456 struct btrfs_free_space_entry *e;
457 u8 type;
458
459 e = io_ctl->cur;
460 entry->offset = le64_to_cpu(e->offset);
461 entry->bytes = le64_to_cpu(e->bytes);
462 type = e->type;
463 io_ctl->cur += sizeof(struct btrfs_free_space_entry);
464 io_ctl->size -= sizeof(struct btrfs_free_space_entry);
465
466 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
467 return type;
468
469 io_ctl_unmap_page(io_ctl);
470
471 if (io_ctl->index >= io_ctl->num_pages)
472 return type;
473
474 io_ctl_map_page(io_ctl, 0);
475 return type;
476}
477
478static void io_ctl_read_bitmap(struct io_ctl *io_ctl,
479 struct btrfs_free_space *entry)
480{
481 BUG_ON(!io_ctl->cur);
482 if (io_ctl->cur != io_ctl->orig) {
483 io_ctl_unmap_page(io_ctl);
484 io_ctl_map_page(io_ctl, 0);
485 }
486 memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
487 io_ctl_unmap_page(io_ctl);
488 if (io_ctl->index < io_ctl->num_pages)
489 io_ctl_map_page(io_ctl, 0);
490}
491
Li Zefan0414efa2011-04-20 10:20:14 +0800492int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
493 struct btrfs_free_space_ctl *ctl,
494 struct btrfs_path *path, u64 offset)
Josef Bacik9d66e232010-08-25 16:54:15 -0400495{
Josef Bacik9d66e232010-08-25 16:54:15 -0400496 struct btrfs_free_space_header *header;
497 struct extent_buffer *leaf;
Josef Bacika67509c2011-10-05 15:18:58 -0400498 struct io_ctl io_ctl;
Josef Bacik9d66e232010-08-25 16:54:15 -0400499 struct btrfs_key key;
Josef Bacika67509c2011-10-05 15:18:58 -0400500 struct btrfs_free_space *e, *n;
Josef Bacik9d66e232010-08-25 16:54:15 -0400501 struct list_head bitmaps;
502 u64 num_entries;
503 u64 num_bitmaps;
504 u64 generation;
Josef Bacika67509c2011-10-05 15:18:58 -0400505 u8 type;
Josef Bacikf6a39822011-06-06 10:50:35 -0400506 int ret = 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400507
508 INIT_LIST_HEAD(&bitmaps);
509
Josef Bacik9d66e232010-08-25 16:54:15 -0400510 /* Nothing in the space cache, goodbye */
Li Zefan0414efa2011-04-20 10:20:14 +0800511 if (!i_size_read(inode))
Josef Bacika67509c2011-10-05 15:18:58 -0400512 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400513
514 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800515 key.offset = offset;
Josef Bacik9d66e232010-08-25 16:54:15 -0400516 key.type = 0;
517
518 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Li Zefan0414efa2011-04-20 10:20:14 +0800519 if (ret < 0)
Josef Bacika67509c2011-10-05 15:18:58 -0400520 return 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800521 else if (ret > 0) {
Chris Mason945d8962011-05-22 12:33:42 -0400522 btrfs_release_path(path);
Josef Bacika67509c2011-10-05 15:18:58 -0400523 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400524 }
525
Li Zefan0414efa2011-04-20 10:20:14 +0800526 ret = -1;
527
Josef Bacik9d66e232010-08-25 16:54:15 -0400528 leaf = path->nodes[0];
529 header = btrfs_item_ptr(leaf, path->slots[0],
530 struct btrfs_free_space_header);
531 num_entries = btrfs_free_space_entries(leaf, header);
532 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
533 generation = btrfs_free_space_generation(leaf, header);
Chris Mason945d8962011-05-22 12:33:42 -0400534 btrfs_release_path(path);
Josef Bacik9d66e232010-08-25 16:54:15 -0400535
536 if (BTRFS_I(inode)->generation != generation) {
537 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
Li Zefan0414efa2011-04-20 10:20:14 +0800538 " not match free space cache generation (%llu)\n",
Josef Bacik9d66e232010-08-25 16:54:15 -0400539 (unsigned long long)BTRFS_I(inode)->generation,
Li Zefan0414efa2011-04-20 10:20:14 +0800540 (unsigned long long)generation);
Josef Bacika67509c2011-10-05 15:18:58 -0400541 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400542 }
543
544 if (!num_entries)
Josef Bacika67509c2011-10-05 15:18:58 -0400545 return 0;
Josef Bacik9d66e232010-08-25 16:54:15 -0400546
Josef Bacika67509c2011-10-05 15:18:58 -0400547 io_ctl_init(&io_ctl, inode, root);
Josef Bacik9d66e232010-08-25 16:54:15 -0400548 ret = readahead_cache(inode);
Li Zefan0414efa2011-04-20 10:20:14 +0800549 if (ret)
Josef Bacik9d66e232010-08-25 16:54:15 -0400550 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400551
Josef Bacika67509c2011-10-05 15:18:58 -0400552 ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
553 if (ret)
554 goto out;
Josef Bacik9d66e232010-08-25 16:54:15 -0400555
Josef Bacika67509c2011-10-05 15:18:58 -0400556 ret = io_ctl_check_generation(&io_ctl, generation);
557 if (ret)
558 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400559
Josef Bacika67509c2011-10-05 15:18:58 -0400560 while (num_entries) {
561 e = kmem_cache_zalloc(btrfs_free_space_cachep,
562 GFP_NOFS);
563 if (!e)
Josef Bacik9d66e232010-08-25 16:54:15 -0400564 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400565
Josef Bacika67509c2011-10-05 15:18:58 -0400566 type = io_ctl_read_entry(&io_ctl, e);
567 if (!e->bytes) {
568 kmem_cache_free(btrfs_free_space_cachep, e);
569 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400570 }
Josef Bacik9d66e232010-08-25 16:54:15 -0400571
Josef Bacika67509c2011-10-05 15:18:58 -0400572 if (type == BTRFS_FREE_SPACE_EXTENT) {
573 spin_lock(&ctl->tree_lock);
574 ret = link_free_space(ctl, e);
575 spin_unlock(&ctl->tree_lock);
576 if (ret) {
577 printk(KERN_ERR "Duplicate entries in "
578 "free space cache, dumping\n");
Josef Bacikdc89e982011-01-28 17:05:48 -0500579 kmem_cache_free(btrfs_free_space_cachep, e);
Josef Bacik9d66e232010-08-25 16:54:15 -0400580 goto free_cache;
581 }
Josef Bacika67509c2011-10-05 15:18:58 -0400582 } else {
583 BUG_ON(!num_bitmaps);
584 num_bitmaps--;
585 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
586 if (!e->bitmap) {
587 kmem_cache_free(
588 btrfs_free_space_cachep, e);
589 goto free_cache;
Josef Bacik9d66e232010-08-25 16:54:15 -0400590 }
Josef Bacika67509c2011-10-05 15:18:58 -0400591 spin_lock(&ctl->tree_lock);
592 ret = link_free_space(ctl, e);
593 ctl->total_bitmaps++;
594 ctl->op->recalc_thresholds(ctl);
595 spin_unlock(&ctl->tree_lock);
596 if (ret) {
597 printk(KERN_ERR "Duplicate entries in "
598 "free space cache, dumping\n");
599 kmem_cache_free(btrfs_free_space_cachep, e);
600 goto free_cache;
601 }
602 list_add_tail(&e->list, &bitmaps);
Josef Bacik9d66e232010-08-25 16:54:15 -0400603 }
604
Josef Bacika67509c2011-10-05 15:18:58 -0400605 num_entries--;
Josef Bacik9d66e232010-08-25 16:54:15 -0400606 }
607
Josef Bacika67509c2011-10-05 15:18:58 -0400608 /*
609 * We add the bitmaps at the end of the entries in order that
610 * the bitmap entries are added to the cache.
611 */
612 list_for_each_entry_safe(e, n, &bitmaps, list) {
613 list_del_init(&e->list);
614 io_ctl_read_bitmap(&io_ctl, e);
615 }
616
617 io_ctl_drop_pages(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400618 ret = 1;
619out:
Josef Bacika67509c2011-10-05 15:18:58 -0400620 io_ctl_free(&io_ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400621 return ret;
Josef Bacik9d66e232010-08-25 16:54:15 -0400622free_cache:
Josef Bacika67509c2011-10-05 15:18:58 -0400623 io_ctl_drop_pages(&io_ctl);
Li Zefan0414efa2011-04-20 10:20:14 +0800624 __btrfs_remove_free_space_cache(ctl);
Josef Bacik9d66e232010-08-25 16:54:15 -0400625 goto out;
626}
627
Li Zefan0414efa2011-04-20 10:20:14 +0800628int load_free_space_cache(struct btrfs_fs_info *fs_info,
629 struct btrfs_block_group_cache *block_group)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400630{
Li Zefan34d52cb2011-03-29 13:46:06 +0800631 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Zefan0414efa2011-04-20 10:20:14 +0800632 struct btrfs_root *root = fs_info->tree_root;
633 struct inode *inode;
634 struct btrfs_path *path;
635 int ret;
636 bool matched;
637 u64 used = btrfs_block_group_used(&block_group->item);
638
639 /*
640 * If we're unmounting then just return, since this does a search on the
641 * normal root and not the commit root and we could deadlock.
642 */
David Sterba7841cb22011-05-31 18:07:27 +0200643 if (btrfs_fs_closing(fs_info))
Li Zefan0414efa2011-04-20 10:20:14 +0800644 return 0;
645
646 /*
647 * If this block group has been marked to be cleared for one reason or
648 * another then we can't trust the on disk cache, so just return.
649 */
650 spin_lock(&block_group->lock);
651 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
652 spin_unlock(&block_group->lock);
653 return 0;
654 }
655 spin_unlock(&block_group->lock);
656
657 path = btrfs_alloc_path();
658 if (!path)
659 return 0;
660
661 inode = lookup_free_space_inode(root, block_group, path);
662 if (IS_ERR(inode)) {
663 btrfs_free_path(path);
664 return 0;
665 }
666
667 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
668 path, block_group->key.objectid);
669 btrfs_free_path(path);
670 if (ret <= 0)
671 goto out;
672
673 spin_lock(&ctl->tree_lock);
674 matched = (ctl->free_space == (block_group->key.offset - used -
675 block_group->bytes_super));
676 spin_unlock(&ctl->tree_lock);
677
678 if (!matched) {
679 __btrfs_remove_free_space_cache(ctl);
680 printk(KERN_ERR "block group %llu has an wrong amount of free "
681 "space\n", block_group->key.objectid);
682 ret = -1;
683 }
684out:
685 if (ret < 0) {
686 /* This cache is bogus, make sure it gets cleared */
687 spin_lock(&block_group->lock);
688 block_group->disk_cache_state = BTRFS_DC_CLEAR;
689 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +0800690 ret = 0;
Li Zefan0414efa2011-04-20 10:20:14 +0800691
692 printk(KERN_ERR "btrfs: failed to load free space cache "
693 "for block group %llu\n", block_group->key.objectid);
694 }
695
696 iput(inode);
697 return ret;
698}
699
Josef Bacikc09544e2011-08-30 10:19:10 -0400700/**
701 * __btrfs_write_out_cache - write out cached info to an inode
702 * @root - the root the inode belongs to
703 * @ctl - the free space cache we are going to write out
704 * @block_group - the block_group for this cache if it belongs to a block_group
705 * @trans - the trans handle
706 * @path - the path to use
707 * @offset - the offset for the key we'll insert
708 *
709 * This function writes out a free space cache struct to disk for quick recovery
710 * on mount. This will return 0 if it was successfull in writing the cache out,
711 * and -1 if it was not.
712 */
Li Zefan0414efa2011-04-20 10:20:14 +0800713int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
714 struct btrfs_free_space_ctl *ctl,
715 struct btrfs_block_group_cache *block_group,
716 struct btrfs_trans_handle *trans,
717 struct btrfs_path *path, u64 offset)
Josef Bacik0cb59c92010-07-02 12:14:14 -0400718{
719 struct btrfs_free_space_header *header;
720 struct extent_buffer *leaf;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400721 struct rb_node *node;
722 struct list_head *pos, *n;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400723 struct extent_state *cached_state = NULL;
Josef Bacik43be2142011-04-01 14:55:00 +0000724 struct btrfs_free_cluster *cluster = NULL;
725 struct extent_io_tree *unpin = NULL;
Josef Bacika67509c2011-10-05 15:18:58 -0400726 struct io_ctl io_ctl;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400727 struct list_head bitmap_list;
728 struct btrfs_key key;
Josef Bacik43be2142011-04-01 14:55:00 +0000729 u64 start, end, len;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400730 int entries = 0;
731 int bitmaps = 0;
Josef Bacikc09544e2011-08-30 10:19:10 -0400732 int ret;
733 int err = -1;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400734
Josef Bacik0cb59c92010-07-02 12:14:14 -0400735 INIT_LIST_HEAD(&bitmap_list);
736
Li Zefan0414efa2011-04-20 10:20:14 +0800737 if (!i_size_read(inode))
738 return -1;
Josef Bacik2b209822010-12-03 13:17:53 -0500739
Josef Bacik0cb59c92010-07-02 12:14:14 -0400740 filemap_write_and_wait(inode->i_mapping);
741 btrfs_wait_ordered_range(inode, inode->i_size &
742 ~(root->sectorsize - 1), (u64)-1);
743
Josef Bacika67509c2011-10-05 15:18:58 -0400744 io_ctl_init(&io_ctl, inode, root);
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400745
Josef Bacik43be2142011-04-01 14:55:00 +0000746 /* Get the cluster for this block_group if it exists */
Li Zefan0414efa2011-04-20 10:20:14 +0800747 if (block_group && !list_empty(&block_group->cluster_list))
Josef Bacik43be2142011-04-01 14:55:00 +0000748 cluster = list_entry(block_group->cluster_list.next,
749 struct btrfs_free_cluster,
750 block_group_list);
751
752 /*
753 * We shouldn't have switched the pinned extents yet so this is the
754 * right one
755 */
756 unpin = root->fs_info->pinned_extents;
757
Josef Bacika67509c2011-10-05 15:18:58 -0400758 /* Lock all pages first so we can lock the extent safely. */
759 io_ctl_prepare_pages(&io_ctl, inode, 0);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400760
Josef Bacik0cb59c92010-07-02 12:14:14 -0400761 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
762 0, &cached_state, GFP_NOFS);
763
Josef Bacik43be2142011-04-01 14:55:00 +0000764 /*
765 * When searching for pinned extents, we need to start at our start
766 * offset.
767 */
Li Zefan0414efa2011-04-20 10:20:14 +0800768 if (block_group)
769 start = block_group->key.objectid;
Josef Bacik43be2142011-04-01 14:55:00 +0000770
Josef Bacikf75b1302011-10-05 10:00:18 -0400771 node = rb_first(&ctl->free_space_offset);
772 if (!node && cluster) {
773 node = rb_first(&cluster->root);
774 cluster = NULL;
775 }
776
Josef Bacika67509c2011-10-05 15:18:58 -0400777 io_ctl_set_generation(&io_ctl, trans->transid);
778
Josef Bacik0cb59c92010-07-02 12:14:14 -0400779 /* Write out the extent entries */
Josef Bacika67509c2011-10-05 15:18:58 -0400780 while (node) {
781 struct btrfs_free_space *e;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400782
Josef Bacika67509c2011-10-05 15:18:58 -0400783 e = rb_entry(node, struct btrfs_free_space, offset_index);
784 entries++;
Josef Bacik43be2142011-04-01 14:55:00 +0000785
Josef Bacika67509c2011-10-05 15:18:58 -0400786 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
787 e->bitmap);
788 if (ret)
789 goto out_nospc;
790
791 if (e->bitmap) {
792 list_add_tail(&e->list, &bitmap_list);
793 bitmaps++;
794 }
795 node = rb_next(node);
796 if (!node && cluster) {
797 node = rb_first(&cluster->root);
798 cluster = NULL;
799 }
800 }
801
802 /*
803 * We want to add any pinned extents to our free space cache
804 * so we don't leak the space
805 */
806 while (block_group && (start < block_group->key.objectid +
807 block_group->key.offset)) {
808 ret = find_first_extent_bit(unpin, start, &start, &end,
809 EXTENT_DIRTY);
810 if (ret) {
811 ret = 0;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400812 break;
813 }
814
Josef Bacika67509c2011-10-05 15:18:58 -0400815 /* This pinned extent is out of our range */
816 if (start >= block_group->key.objectid +
817 block_group->key.offset)
818 break;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400819
Josef Bacika67509c2011-10-05 15:18:58 -0400820 len = block_group->key.objectid +
821 block_group->key.offset - start;
822 len = min(len, end + 1 - start);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400823
Josef Bacika67509c2011-10-05 15:18:58 -0400824 entries++;
825 ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
826 if (ret)
827 goto out_nospc;
Josef Bacik2f356122011-06-10 15:31:13 -0400828
Josef Bacika67509c2011-10-05 15:18:58 -0400829 start = end + 1;
830 }
Josef Bacik0cb59c92010-07-02 12:14:14 -0400831
832 /* Write out the bitmaps */
833 list_for_each_safe(pos, n, &bitmap_list) {
Josef Bacik0cb59c92010-07-02 12:14:14 -0400834 struct btrfs_free_space *entry =
835 list_entry(pos, struct btrfs_free_space, list);
836
Josef Bacika67509c2011-10-05 15:18:58 -0400837 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
838 if (ret)
839 goto out_nospc;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400840 list_del_init(&entry->list);
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400841 }
842
Josef Bacik0cb59c92010-07-02 12:14:14 -0400843 /* Zero out the rest of the pages just to make sure */
Josef Bacika67509c2011-10-05 15:18:58 -0400844 io_ctl_zero_remaining_pages(&io_ctl);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400845
Josef Bacika67509c2011-10-05 15:18:58 -0400846 ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
847 0, i_size_read(inode), &cached_state);
848 io_ctl_drop_pages(&io_ctl);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400849 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
850 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
851
Josef Bacikc09544e2011-08-30 10:19:10 -0400852 if (ret)
Josef Bacik2f356122011-06-10 15:31:13 -0400853 goto out;
Josef Bacikbe1a12a2011-04-06 13:05:22 -0400854
855 BTRFS_I(inode)->generation = trans->transid;
856
Josef Bacik0cb59c92010-07-02 12:14:14 -0400857 filemap_write_and_wait(inode->i_mapping);
858
859 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
Li Zefan0414efa2011-04-20 10:20:14 +0800860 key.offset = offset;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400861 key.type = 0;
862
Josef Bacika9b5fcd2011-08-19 12:06:12 -0400863 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400864 if (ret < 0) {
Josef Bacika67509c2011-10-05 15:18:58 -0400865 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
Josef Bacik0cb59c92010-07-02 12:14:14 -0400866 EXTENT_DIRTY | EXTENT_DELALLOC |
867 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
Josef Bacik2f356122011-06-10 15:31:13 -0400868 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400869 }
870 leaf = path->nodes[0];
871 if (ret > 0) {
872 struct btrfs_key found_key;
873 BUG_ON(!path->slots[0]);
874 path->slots[0]--;
875 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
876 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
Li Zefan0414efa2011-04-20 10:20:14 +0800877 found_key.offset != offset) {
Josef Bacika67509c2011-10-05 15:18:58 -0400878 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
879 inode->i_size - 1,
Josef Bacik0cb59c92010-07-02 12:14:14 -0400880 EXTENT_DIRTY | EXTENT_DELALLOC |
881 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
882 GFP_NOFS);
David Sterbab3b4aa72011-04-21 01:20:15 +0200883 btrfs_release_path(path);
Josef Bacik2f356122011-06-10 15:31:13 -0400884 goto out;
Josef Bacik0cb59c92010-07-02 12:14:14 -0400885 }
886 }
887 header = btrfs_item_ptr(leaf, path->slots[0],
888 struct btrfs_free_space_header);
889 btrfs_set_free_space_entries(leaf, header, entries);
890 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
891 btrfs_set_free_space_generation(leaf, header, trans->transid);
892 btrfs_mark_buffer_dirty(leaf);
David Sterbab3b4aa72011-04-21 01:20:15 +0200893 btrfs_release_path(path);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400894
Josef Bacikc09544e2011-08-30 10:19:10 -0400895 err = 0;
Josef Bacik2f356122011-06-10 15:31:13 -0400896out:
Josef Bacika67509c2011-10-05 15:18:58 -0400897 io_ctl_free(&io_ctl);
Josef Bacikc09544e2011-08-30 10:19:10 -0400898 if (err) {
Josef Bacika67509c2011-10-05 15:18:58 -0400899 invalidate_inode_pages2(inode->i_mapping);
Josef Bacik0cb59c92010-07-02 12:14:14 -0400900 BTRFS_I(inode)->generation = 0;
901 }
Josef Bacik0cb59c92010-07-02 12:14:14 -0400902 btrfs_update_inode(trans, root, inode);
Josef Bacikc09544e2011-08-30 10:19:10 -0400903 return err;
Josef Bacika67509c2011-10-05 15:18:58 -0400904
905out_nospc:
906 list_for_each_safe(pos, n, &bitmap_list) {
907 struct btrfs_free_space *entry =
908 list_entry(pos, struct btrfs_free_space, list);
909 list_del_init(&entry->list);
910 }
911 io_ctl_drop_pages(&io_ctl);
912 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
913 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
914 goto out;
Li Zefan0414efa2011-04-20 10:20:14 +0800915}
916
917int btrfs_write_out_cache(struct btrfs_root *root,
918 struct btrfs_trans_handle *trans,
919 struct btrfs_block_group_cache *block_group,
920 struct btrfs_path *path)
921{
922 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
923 struct inode *inode;
924 int ret = 0;
925
926 root = root->fs_info->tree_root;
927
928 spin_lock(&block_group->lock);
929 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
930 spin_unlock(&block_group->lock);
931 return 0;
932 }
933 spin_unlock(&block_group->lock);
934
935 inode = lookup_free_space_inode(root, block_group, path);
936 if (IS_ERR(inode))
937 return 0;
938
939 ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
940 path, block_group->key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -0400941 if (ret) {
942 btrfs_delalloc_release_metadata(inode, inode->i_size);
Li Zefan0414efa2011-04-20 10:20:14 +0800943 spin_lock(&block_group->lock);
944 block_group->disk_cache_state = BTRFS_DC_ERROR;
945 spin_unlock(&block_group->lock);
Li Zefan82d59022011-04-20 10:33:24 +0800946 ret = 0;
Josef Bacikc09544e2011-08-30 10:19:10 -0400947#ifdef DEBUG
Li Zefan0414efa2011-04-20 10:20:14 +0800948 printk(KERN_ERR "btrfs: failed to write free space cace "
949 "for block group %llu\n", block_group->key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -0400950#endif
Li Zefan0414efa2011-04-20 10:20:14 +0800951 }
952
Josef Bacik0cb59c92010-07-02 12:14:14 -0400953 iput(inode);
954 return ret;
955}
956
Li Zefan34d52cb2011-03-29 13:46:06 +0800957static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
Josef Bacik96303082009-07-13 21:29:25 -0400958 u64 offset)
959{
960 BUG_ON(offset < bitmap_start);
961 offset -= bitmap_start;
Li Zefan34d52cb2011-03-29 13:46:06 +0800962 return (unsigned long)(div_u64(offset, unit));
Josef Bacik96303082009-07-13 21:29:25 -0400963}
964
Li Zefan34d52cb2011-03-29 13:46:06 +0800965static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
Josef Bacik96303082009-07-13 21:29:25 -0400966{
Li Zefan34d52cb2011-03-29 13:46:06 +0800967 return (unsigned long)(div_u64(bytes, unit));
Josef Bacik96303082009-07-13 21:29:25 -0400968}
969
Li Zefan34d52cb2011-03-29 13:46:06 +0800970static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -0400971 u64 offset)
972{
973 u64 bitmap_start;
974 u64 bytes_per_bitmap;
975
Li Zefan34d52cb2011-03-29 13:46:06 +0800976 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
977 bitmap_start = offset - ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -0400978 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
979 bitmap_start *= bytes_per_bitmap;
Li Zefan34d52cb2011-03-29 13:46:06 +0800980 bitmap_start += ctl->start;
Josef Bacik96303082009-07-13 21:29:25 -0400981
982 return bitmap_start;
983}
Josef Bacik0f9dd462008-09-23 13:14:11 -0400984
985static int tree_insert_offset(struct rb_root *root, u64 offset,
Josef Bacik96303082009-07-13 21:29:25 -0400986 struct rb_node *node, int bitmap)
Josef Bacik0f9dd462008-09-23 13:14:11 -0400987{
988 struct rb_node **p = &root->rb_node;
989 struct rb_node *parent = NULL;
990 struct btrfs_free_space *info;
991
992 while (*p) {
993 parent = *p;
994 info = rb_entry(parent, struct btrfs_free_space, offset_index);
995
Josef Bacik96303082009-07-13 21:29:25 -0400996 if (offset < info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400997 p = &(*p)->rb_left;
Josef Bacik96303082009-07-13 21:29:25 -0400998 } else if (offset > info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400999 p = &(*p)->rb_right;
Josef Bacik96303082009-07-13 21:29:25 -04001000 } else {
1001 /*
1002 * we could have a bitmap entry and an extent entry
1003 * share the same offset. If this is the case, we want
1004 * the extent entry to always be found first if we do a
1005 * linear search through the tree, since we want to have
1006 * the quickest allocation time, and allocating from an
1007 * extent is faster than allocating from a bitmap. So
1008 * if we're inserting a bitmap and we find an entry at
1009 * this offset, we want to go right, or after this entry
1010 * logically. If we are inserting an extent and we've
1011 * found a bitmap, we want to go left, or before
1012 * logically.
1013 */
1014 if (bitmap) {
Josef Bacik207dde82011-05-13 14:49:23 -04001015 if (info->bitmap) {
1016 WARN_ON_ONCE(1);
1017 return -EEXIST;
1018 }
Josef Bacik96303082009-07-13 21:29:25 -04001019 p = &(*p)->rb_right;
1020 } else {
Josef Bacik207dde82011-05-13 14:49:23 -04001021 if (!info->bitmap) {
1022 WARN_ON_ONCE(1);
1023 return -EEXIST;
1024 }
Josef Bacik96303082009-07-13 21:29:25 -04001025 p = &(*p)->rb_left;
1026 }
1027 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001028 }
1029
1030 rb_link_node(node, parent, p);
1031 rb_insert_color(node, root);
1032
1033 return 0;
1034}
1035
1036/*
Josef Bacik70cb0742009-04-03 10:14:19 -04001037 * searches the tree for the given offset.
1038 *
Josef Bacik96303082009-07-13 21:29:25 -04001039 * fuzzy - If this is set, then we are trying to make an allocation, and we just
1040 * want a section that has at least bytes size and comes at or after the given
1041 * offset.
Josef Bacik0f9dd462008-09-23 13:14:11 -04001042 */
Josef Bacik96303082009-07-13 21:29:25 -04001043static struct btrfs_free_space *
Li Zefan34d52cb2011-03-29 13:46:06 +08001044tree_search_offset(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001045 u64 offset, int bitmap_only, int fuzzy)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001046{
Li Zefan34d52cb2011-03-29 13:46:06 +08001047 struct rb_node *n = ctl->free_space_offset.rb_node;
Josef Bacik96303082009-07-13 21:29:25 -04001048 struct btrfs_free_space *entry, *prev = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001049
Josef Bacik96303082009-07-13 21:29:25 -04001050 /* find entry that is closest to the 'offset' */
1051 while (1) {
1052 if (!n) {
1053 entry = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001054 break;
1055 }
Josef Bacik96303082009-07-13 21:29:25 -04001056
1057 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1058 prev = entry;
1059
1060 if (offset < entry->offset)
1061 n = n->rb_left;
1062 else if (offset > entry->offset)
1063 n = n->rb_right;
1064 else
1065 break;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001066 }
1067
Josef Bacik96303082009-07-13 21:29:25 -04001068 if (bitmap_only) {
1069 if (!entry)
1070 return NULL;
1071 if (entry->bitmap)
1072 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001073
Josef Bacik96303082009-07-13 21:29:25 -04001074 /*
1075 * bitmap entry and extent entry may share same offset,
1076 * in that case, bitmap entry comes after extent entry.
1077 */
1078 n = rb_next(n);
1079 if (!n)
1080 return NULL;
1081 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1082 if (entry->offset != offset)
1083 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001084
Josef Bacik96303082009-07-13 21:29:25 -04001085 WARN_ON(!entry->bitmap);
1086 return entry;
1087 } else if (entry) {
1088 if (entry->bitmap) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001089 /*
Josef Bacik96303082009-07-13 21:29:25 -04001090 * if previous extent entry covers the offset,
1091 * we should return it instead of the bitmap entry
Josef Bacik0f9dd462008-09-23 13:14:11 -04001092 */
Josef Bacik96303082009-07-13 21:29:25 -04001093 n = &entry->offset_index;
1094 while (1) {
1095 n = rb_prev(n);
1096 if (!n)
1097 break;
1098 prev = rb_entry(n, struct btrfs_free_space,
1099 offset_index);
1100 if (!prev->bitmap) {
1101 if (prev->offset + prev->bytes > offset)
1102 entry = prev;
1103 break;
1104 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001105 }
Josef Bacik96303082009-07-13 21:29:25 -04001106 }
1107 return entry;
1108 }
1109
1110 if (!prev)
1111 return NULL;
1112
1113 /* find last entry before the 'offset' */
1114 entry = prev;
1115 if (entry->offset > offset) {
1116 n = rb_prev(&entry->offset_index);
1117 if (n) {
1118 entry = rb_entry(n, struct btrfs_free_space,
1119 offset_index);
1120 BUG_ON(entry->offset > offset);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001121 } else {
Josef Bacik96303082009-07-13 21:29:25 -04001122 if (fuzzy)
1123 return entry;
1124 else
1125 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001126 }
1127 }
1128
Josef Bacik96303082009-07-13 21:29:25 -04001129 if (entry->bitmap) {
1130 n = &entry->offset_index;
1131 while (1) {
1132 n = rb_prev(n);
1133 if (!n)
1134 break;
1135 prev = rb_entry(n, struct btrfs_free_space,
1136 offset_index);
1137 if (!prev->bitmap) {
1138 if (prev->offset + prev->bytes > offset)
1139 return prev;
1140 break;
1141 }
1142 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001143 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001144 return entry;
1145 } else if (entry->offset + entry->bytes > offset)
1146 return entry;
1147
1148 if (!fuzzy)
1149 return NULL;
1150
1151 while (1) {
1152 if (entry->bitmap) {
1153 if (entry->offset + BITS_PER_BITMAP *
Li Zefan34d52cb2011-03-29 13:46:06 +08001154 ctl->unit > offset)
Josef Bacik96303082009-07-13 21:29:25 -04001155 break;
1156 } else {
1157 if (entry->offset + entry->bytes > offset)
1158 break;
1159 }
1160
1161 n = rb_next(&entry->offset_index);
1162 if (!n)
1163 return NULL;
1164 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1165 }
1166 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001167}
1168
Li Zefanf333adb2010-11-09 14:57:39 +08001169static inline void
Li Zefan34d52cb2011-03-29 13:46:06 +08001170__unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001171 struct btrfs_free_space *info)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001172{
Li Zefan34d52cb2011-03-29 13:46:06 +08001173 rb_erase(&info->offset_index, &ctl->free_space_offset);
1174 ctl->free_extents--;
Li Zefanf333adb2010-11-09 14:57:39 +08001175}
1176
Li Zefan34d52cb2011-03-29 13:46:06 +08001177static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001178 struct btrfs_free_space *info)
1179{
Li Zefan34d52cb2011-03-29 13:46:06 +08001180 __unlink_free_space(ctl, info);
1181 ctl->free_space -= info->bytes;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001182}
1183
Li Zefan34d52cb2011-03-29 13:46:06 +08001184static int link_free_space(struct btrfs_free_space_ctl *ctl,
Josef Bacik0f9dd462008-09-23 13:14:11 -04001185 struct btrfs_free_space *info)
1186{
1187 int ret = 0;
1188
Josef Bacik96303082009-07-13 21:29:25 -04001189 BUG_ON(!info->bitmap && !info->bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001190 ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001191 &info->offset_index, (info->bitmap != NULL));
Josef Bacik0f9dd462008-09-23 13:14:11 -04001192 if (ret)
1193 return ret;
1194
Li Zefan34d52cb2011-03-29 13:46:06 +08001195 ctl->free_space += info->bytes;
1196 ctl->free_extents++;
Josef Bacik96303082009-07-13 21:29:25 -04001197 return ret;
1198}
1199
Li Zefan34d52cb2011-03-29 13:46:06 +08001200static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
Josef Bacik96303082009-07-13 21:29:25 -04001201{
Li Zefan34d52cb2011-03-29 13:46:06 +08001202 struct btrfs_block_group_cache *block_group = ctl->private;
Josef Bacik25891f72009-09-11 16:11:20 -04001203 u64 max_bytes;
1204 u64 bitmap_bytes;
1205 u64 extent_bytes;
Li Zefan8eb2d822010-11-09 14:48:01 +08001206 u64 size = block_group->key.offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08001207 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1208 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1209
1210 BUG_ON(ctl->total_bitmaps > max_bitmaps);
Josef Bacik96303082009-07-13 21:29:25 -04001211
1212 /*
1213 * The goal is to keep the total amount of memory used per 1gb of space
1214 * at or below 32k, so we need to adjust how much memory we allow to be
1215 * used by extent based free space tracking
1216 */
Li Zefan8eb2d822010-11-09 14:48:01 +08001217 if (size < 1024 * 1024 * 1024)
1218 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1219 else
1220 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1221 div64_u64(size, 1024 * 1024 * 1024);
Josef Bacik96303082009-07-13 21:29:25 -04001222
Josef Bacik25891f72009-09-11 16:11:20 -04001223 /*
1224 * we want to account for 1 more bitmap than what we have so we can make
1225 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1226 * we add more bitmaps.
1227 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001228 bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
Josef Bacik96303082009-07-13 21:29:25 -04001229
Josef Bacik25891f72009-09-11 16:11:20 -04001230 if (bitmap_bytes >= max_bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001231 ctl->extents_thresh = 0;
Josef Bacik25891f72009-09-11 16:11:20 -04001232 return;
Josef Bacik96303082009-07-13 21:29:25 -04001233 }
Josef Bacik25891f72009-09-11 16:11:20 -04001234
1235 /*
1236 * we want the extent entry threshold to always be at most 1/2 the maxw
1237 * bytes we can have, or whatever is less than that.
1238 */
1239 extent_bytes = max_bytes - bitmap_bytes;
1240 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1241
Li Zefan34d52cb2011-03-29 13:46:06 +08001242 ctl->extents_thresh =
Josef Bacik25891f72009-09-11 16:11:20 -04001243 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
Josef Bacik96303082009-07-13 21:29:25 -04001244}
1245
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001246static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1247 struct btrfs_free_space *info,
1248 u64 offset, u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001249{
Li Zefanf38b6e72011-03-14 13:40:51 +08001250 unsigned long start, count;
Josef Bacik96303082009-07-13 21:29:25 -04001251
Li Zefan34d52cb2011-03-29 13:46:06 +08001252 start = offset_to_bit(info->offset, ctl->unit, offset);
1253 count = bytes_to_bits(bytes, ctl->unit);
Li Zefanf38b6e72011-03-14 13:40:51 +08001254 BUG_ON(start + count > BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001255
Li Zefanf38b6e72011-03-14 13:40:51 +08001256 bitmap_clear(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001257
1258 info->bytes -= bytes;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00001259}
1260
1261static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1262 struct btrfs_free_space *info, u64 offset,
1263 u64 bytes)
1264{
1265 __bitmap_clear_bits(ctl, info, offset, bytes);
Li Zefan34d52cb2011-03-29 13:46:06 +08001266 ctl->free_space -= bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001267}
1268
Li Zefan34d52cb2011-03-29 13:46:06 +08001269static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
Josef Bacik817d52f2009-07-13 21:29:25 -04001270 struct btrfs_free_space *info, u64 offset,
1271 u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001272{
Li Zefanf38b6e72011-03-14 13:40:51 +08001273 unsigned long start, count;
Josef Bacik96303082009-07-13 21:29:25 -04001274
Li Zefan34d52cb2011-03-29 13:46:06 +08001275 start = offset_to_bit(info->offset, ctl->unit, offset);
1276 count = bytes_to_bits(bytes, ctl->unit);
Li Zefanf38b6e72011-03-14 13:40:51 +08001277 BUG_ON(start + count > BITS_PER_BITMAP);
Josef Bacik96303082009-07-13 21:29:25 -04001278
Li Zefanf38b6e72011-03-14 13:40:51 +08001279 bitmap_set(info->bitmap, start, count);
Josef Bacik96303082009-07-13 21:29:25 -04001280
1281 info->bytes += bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001282 ctl->free_space += bytes;
Josef Bacik96303082009-07-13 21:29:25 -04001283}
1284
Li Zefan34d52cb2011-03-29 13:46:06 +08001285static int search_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001286 struct btrfs_free_space *bitmap_info, u64 *offset,
1287 u64 *bytes)
1288{
1289 unsigned long found_bits = 0;
1290 unsigned long bits, i;
1291 unsigned long next_zero;
1292
Li Zefan34d52cb2011-03-29 13:46:06 +08001293 i = offset_to_bit(bitmap_info->offset, ctl->unit,
Josef Bacik96303082009-07-13 21:29:25 -04001294 max_t(u64, *offset, bitmap_info->offset));
Li Zefan34d52cb2011-03-29 13:46:06 +08001295 bits = bytes_to_bits(*bytes, ctl->unit);
Josef Bacik96303082009-07-13 21:29:25 -04001296
1297 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1298 i < BITS_PER_BITMAP;
1299 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1300 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1301 BITS_PER_BITMAP, i);
1302 if ((next_zero - i) >= bits) {
1303 found_bits = next_zero - i;
1304 break;
1305 }
1306 i = next_zero;
1307 }
1308
1309 if (found_bits) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001310 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1311 *bytes = (u64)(found_bits) * ctl->unit;
Josef Bacik96303082009-07-13 21:29:25 -04001312 return 0;
1313 }
1314
1315 return -1;
1316}
1317
Li Zefan34d52cb2011-03-29 13:46:06 +08001318static struct btrfs_free_space *
1319find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
Josef Bacik96303082009-07-13 21:29:25 -04001320{
1321 struct btrfs_free_space *entry;
1322 struct rb_node *node;
1323 int ret;
1324
Li Zefan34d52cb2011-03-29 13:46:06 +08001325 if (!ctl->free_space_offset.rb_node)
Josef Bacik96303082009-07-13 21:29:25 -04001326 return NULL;
1327
Li Zefan34d52cb2011-03-29 13:46:06 +08001328 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
Josef Bacik96303082009-07-13 21:29:25 -04001329 if (!entry)
1330 return NULL;
1331
1332 for (node = &entry->offset_index; node; node = rb_next(node)) {
1333 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1334 if (entry->bytes < *bytes)
1335 continue;
1336
1337 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001338 ret = search_bitmap(ctl, entry, offset, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001339 if (!ret)
1340 return entry;
1341 continue;
1342 }
1343
1344 *offset = entry->offset;
1345 *bytes = entry->bytes;
1346 return entry;
1347 }
1348
1349 return NULL;
1350}
1351
Li Zefan34d52cb2011-03-29 13:46:06 +08001352static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001353 struct btrfs_free_space *info, u64 offset)
1354{
Li Zefan34d52cb2011-03-29 13:46:06 +08001355 info->offset = offset_to_bitmap(ctl, offset);
Josef Bacikf019f422009-09-11 16:11:20 -04001356 info->bytes = 0;
Li Zefan34d52cb2011-03-29 13:46:06 +08001357 link_free_space(ctl, info);
1358 ctl->total_bitmaps++;
Josef Bacik96303082009-07-13 21:29:25 -04001359
Li Zefan34d52cb2011-03-29 13:46:06 +08001360 ctl->op->recalc_thresholds(ctl);
Josef Bacik96303082009-07-13 21:29:25 -04001361}
1362
Li Zefan34d52cb2011-03-29 13:46:06 +08001363static void free_bitmap(struct btrfs_free_space_ctl *ctl,
Li Zefanedf6e2d2010-11-09 14:50:07 +08001364 struct btrfs_free_space *bitmap_info)
1365{
Li Zefan34d52cb2011-03-29 13:46:06 +08001366 unlink_free_space(ctl, bitmap_info);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001367 kfree(bitmap_info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001368 kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
Li Zefan34d52cb2011-03-29 13:46:06 +08001369 ctl->total_bitmaps--;
1370 ctl->op->recalc_thresholds(ctl);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001371}
1372
Li Zefan34d52cb2011-03-29 13:46:06 +08001373static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
Josef Bacik96303082009-07-13 21:29:25 -04001374 struct btrfs_free_space *bitmap_info,
1375 u64 *offset, u64 *bytes)
1376{
1377 u64 end;
Josef Bacik6606bb92009-07-31 11:03:58 -04001378 u64 search_start, search_bytes;
1379 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001380
1381again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001382 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001383
Josef Bacik6606bb92009-07-31 11:03:58 -04001384 /*
1385 * XXX - this can go away after a few releases.
1386 *
1387 * since the only user of btrfs_remove_free_space is the tree logging
1388 * stuff, and the only way to test that is under crash conditions, we
1389 * want to have this debug stuff here just in case somethings not
1390 * working. Search the bitmap for the space we are trying to use to
1391 * make sure its actually there. If its not there then we need to stop
1392 * because something has gone wrong.
1393 */
1394 search_start = *offset;
1395 search_bytes = *bytes;
Josef Bacik13dbc082011-02-03 02:39:52 +00001396 search_bytes = min(search_bytes, end - search_start + 1);
Li Zefan34d52cb2011-03-29 13:46:06 +08001397 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
Josef Bacik6606bb92009-07-31 11:03:58 -04001398 BUG_ON(ret < 0 || search_start != *offset);
1399
Josef Bacik96303082009-07-13 21:29:25 -04001400 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001401 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
Josef Bacik96303082009-07-13 21:29:25 -04001402 *bytes -= end - *offset + 1;
1403 *offset = end + 1;
1404 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001405 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001406 *bytes = 0;
1407 }
1408
1409 if (*bytes) {
Josef Bacik6606bb92009-07-31 11:03:58 -04001410 struct rb_node *next = rb_next(&bitmap_info->offset_index);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001411 if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001412 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04001413
Josef Bacik6606bb92009-07-31 11:03:58 -04001414 /*
1415 * no entry after this bitmap, but we still have bytes to
1416 * remove, so something has gone wrong.
1417 */
1418 if (!next)
Josef Bacik96303082009-07-13 21:29:25 -04001419 return -EINVAL;
1420
Josef Bacik6606bb92009-07-31 11:03:58 -04001421 bitmap_info = rb_entry(next, struct btrfs_free_space,
1422 offset_index);
1423
1424 /*
1425 * if the next entry isn't a bitmap we need to return to let the
1426 * extent stuff do its work.
1427 */
Josef Bacik96303082009-07-13 21:29:25 -04001428 if (!bitmap_info->bitmap)
1429 return -EAGAIN;
1430
Josef Bacik6606bb92009-07-31 11:03:58 -04001431 /*
1432 * Ok the next item is a bitmap, but it may not actually hold
1433 * the information for the rest of this free space stuff, so
1434 * look for it, and if we don't find it return so we can try
1435 * everything over again.
1436 */
1437 search_start = *offset;
1438 search_bytes = *bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001439 ret = search_bitmap(ctl, bitmap_info, &search_start,
Josef Bacik6606bb92009-07-31 11:03:58 -04001440 &search_bytes);
1441 if (ret < 0 || search_start != *offset)
1442 return -EAGAIN;
1443
Josef Bacik96303082009-07-13 21:29:25 -04001444 goto again;
Li Zefanedf6e2d2010-11-09 14:50:07 +08001445 } else if (!bitmap_info->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001446 free_bitmap(ctl, bitmap_info);
Josef Bacik96303082009-07-13 21:29:25 -04001447
1448 return 0;
1449}
1450
Josef Bacik2cdc3422011-05-27 14:07:49 -04001451static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1452 struct btrfs_free_space *info, u64 offset,
1453 u64 bytes)
1454{
1455 u64 bytes_to_set = 0;
1456 u64 end;
1457
1458 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1459
1460 bytes_to_set = min(end - offset, bytes);
1461
1462 bitmap_set_bits(ctl, info, offset, bytes_to_set);
1463
1464 return bytes_to_set;
1465
1466}
1467
Li Zefan34d52cb2011-03-29 13:46:06 +08001468static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1469 struct btrfs_free_space *info)
Josef Bacik96303082009-07-13 21:29:25 -04001470{
Li Zefan34d52cb2011-03-29 13:46:06 +08001471 struct btrfs_block_group_cache *block_group = ctl->private;
Josef Bacik96303082009-07-13 21:29:25 -04001472
1473 /*
1474 * If we are below the extents threshold then we can add this as an
1475 * extent, and don't have to deal with the bitmap
1476 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001477 if (ctl->free_extents < ctl->extents_thresh) {
Josef Bacik32cb0842011-03-18 16:16:21 -04001478 /*
1479 * If this block group has some small extents we don't want to
1480 * use up all of our free slots in the cache with them, we want
1481 * to reserve them to larger extents, however if we have plent
1482 * of cache left then go ahead an dadd them, no sense in adding
1483 * the overhead of a bitmap if we don't have to.
1484 */
1485 if (info->bytes <= block_group->sectorsize * 4) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001486 if (ctl->free_extents * 2 <= ctl->extents_thresh)
1487 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04001488 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001489 return false;
Josef Bacik32cb0842011-03-18 16:16:21 -04001490 }
1491 }
Josef Bacik96303082009-07-13 21:29:25 -04001492
1493 /*
1494 * some block groups are so tiny they can't be enveloped by a bitmap, so
1495 * don't even bother to create a bitmap for this
1496 */
1497 if (BITS_PER_BITMAP * block_group->sectorsize >
1498 block_group->key.offset)
Li Zefan34d52cb2011-03-29 13:46:06 +08001499 return false;
1500
1501 return true;
1502}
1503
Josef Bacik2cdc3422011-05-27 14:07:49 -04001504static struct btrfs_free_space_op free_space_op = {
1505 .recalc_thresholds = recalculate_thresholds,
1506 .use_bitmap = use_bitmap,
1507};
1508
Li Zefan34d52cb2011-03-29 13:46:06 +08001509static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1510 struct btrfs_free_space *info)
1511{
1512 struct btrfs_free_space *bitmap_info;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001513 struct btrfs_block_group_cache *block_group = NULL;
Li Zefan34d52cb2011-03-29 13:46:06 +08001514 int added = 0;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001515 u64 bytes, offset, bytes_added;
Li Zefan34d52cb2011-03-29 13:46:06 +08001516 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04001517
1518 bytes = info->bytes;
1519 offset = info->offset;
1520
Li Zefan34d52cb2011-03-29 13:46:06 +08001521 if (!ctl->op->use_bitmap(ctl, info))
1522 return 0;
1523
Josef Bacik2cdc3422011-05-27 14:07:49 -04001524 if (ctl->op == &free_space_op)
1525 block_group = ctl->private;
Chris Mason38e87882011-06-10 16:36:57 -04001526again:
Josef Bacik2cdc3422011-05-27 14:07:49 -04001527 /*
1528 * Since we link bitmaps right into the cluster we need to see if we
1529 * have a cluster here, and if so and it has our bitmap we need to add
1530 * the free space to that bitmap.
1531 */
1532 if (block_group && !list_empty(&block_group->cluster_list)) {
1533 struct btrfs_free_cluster *cluster;
1534 struct rb_node *node;
1535 struct btrfs_free_space *entry;
1536
1537 cluster = list_entry(block_group->cluster_list.next,
1538 struct btrfs_free_cluster,
1539 block_group_list);
1540 spin_lock(&cluster->lock);
1541 node = rb_first(&cluster->root);
1542 if (!node) {
1543 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04001544 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001545 }
1546
1547 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1548 if (!entry->bitmap) {
1549 spin_unlock(&cluster->lock);
Chris Mason38e87882011-06-10 16:36:57 -04001550 goto no_cluster_bitmap;
Josef Bacik2cdc3422011-05-27 14:07:49 -04001551 }
1552
1553 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1554 bytes_added = add_bytes_to_bitmap(ctl, entry,
1555 offset, bytes);
1556 bytes -= bytes_added;
1557 offset += bytes_added;
1558 }
1559 spin_unlock(&cluster->lock);
1560 if (!bytes) {
1561 ret = 1;
1562 goto out;
1563 }
1564 }
Chris Mason38e87882011-06-10 16:36:57 -04001565
1566no_cluster_bitmap:
Li Zefan34d52cb2011-03-29 13:46:06 +08001567 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik96303082009-07-13 21:29:25 -04001568 1, 0);
1569 if (!bitmap_info) {
1570 BUG_ON(added);
1571 goto new_bitmap;
1572 }
1573
Josef Bacik2cdc3422011-05-27 14:07:49 -04001574 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1575 bytes -= bytes_added;
1576 offset += bytes_added;
1577 added = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001578
1579 if (!bytes) {
1580 ret = 1;
1581 goto out;
1582 } else
1583 goto again;
1584
1585new_bitmap:
1586 if (info && info->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001587 add_new_bitmap(ctl, info, offset);
Josef Bacik96303082009-07-13 21:29:25 -04001588 added = 1;
1589 info = NULL;
1590 goto again;
1591 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001592 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001593
1594 /* no pre-allocated info, allocate a new one */
1595 if (!info) {
Josef Bacikdc89e982011-01-28 17:05:48 -05001596 info = kmem_cache_zalloc(btrfs_free_space_cachep,
1597 GFP_NOFS);
Josef Bacik96303082009-07-13 21:29:25 -04001598 if (!info) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001599 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001600 ret = -ENOMEM;
1601 goto out;
1602 }
1603 }
1604
1605 /* allocate the bitmap */
1606 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
Li Zefan34d52cb2011-03-29 13:46:06 +08001607 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001608 if (!info->bitmap) {
1609 ret = -ENOMEM;
1610 goto out;
1611 }
1612 goto again;
1613 }
1614
1615out:
1616 if (info) {
1617 if (info->bitmap)
1618 kfree(info->bitmap);
Josef Bacikdc89e982011-01-28 17:05:48 -05001619 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001620 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001621
1622 return ret;
1623}
1624
Chris Mason945d8962011-05-22 12:33:42 -04001625static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
Li Zefanf333adb2010-11-09 14:57:39 +08001626 struct btrfs_free_space *info, bool update_stat)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001627{
Li Zefan120d66e2010-11-09 14:56:50 +08001628 struct btrfs_free_space *left_info;
1629 struct btrfs_free_space *right_info;
1630 bool merged = false;
1631 u64 offset = info->offset;
1632 u64 bytes = info->bytes;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001633
Josef Bacik0f9dd462008-09-23 13:14:11 -04001634 /*
1635 * first we want to see if there is free space adjacent to the range we
1636 * are adding, if there is remove that struct and add a new one to
1637 * cover the entire range
1638 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001639 right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001640 if (right_info && rb_prev(&right_info->offset_index))
1641 left_info = rb_entry(rb_prev(&right_info->offset_index),
1642 struct btrfs_free_space, offset_index);
1643 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001644 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001645
Josef Bacik96303082009-07-13 21:29:25 -04001646 if (right_info && !right_info->bitmap) {
Li Zefanf333adb2010-11-09 14:57:39 +08001647 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08001648 unlink_free_space(ctl, right_info);
Li Zefanf333adb2010-11-09 14:57:39 +08001649 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001650 __unlink_free_space(ctl, right_info);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001651 info->bytes += right_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05001652 kmem_cache_free(btrfs_free_space_cachep, right_info);
Li Zefan120d66e2010-11-09 14:56:50 +08001653 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001654 }
1655
Josef Bacik96303082009-07-13 21:29:25 -04001656 if (left_info && !left_info->bitmap &&
1657 left_info->offset + left_info->bytes == offset) {
Li Zefanf333adb2010-11-09 14:57:39 +08001658 if (update_stat)
Li Zefan34d52cb2011-03-29 13:46:06 +08001659 unlink_free_space(ctl, left_info);
Li Zefanf333adb2010-11-09 14:57:39 +08001660 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001661 __unlink_free_space(ctl, left_info);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001662 info->offset = left_info->offset;
1663 info->bytes += left_info->bytes;
Josef Bacikdc89e982011-01-28 17:05:48 -05001664 kmem_cache_free(btrfs_free_space_cachep, left_info);
Li Zefan120d66e2010-11-09 14:56:50 +08001665 merged = true;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001666 }
1667
Li Zefan120d66e2010-11-09 14:56:50 +08001668 return merged;
1669}
1670
Li Zefan581bb052011-04-20 10:06:11 +08001671int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1672 u64 offset, u64 bytes)
Li Zefan120d66e2010-11-09 14:56:50 +08001673{
1674 struct btrfs_free_space *info;
1675 int ret = 0;
1676
Josef Bacikdc89e982011-01-28 17:05:48 -05001677 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
Li Zefan120d66e2010-11-09 14:56:50 +08001678 if (!info)
1679 return -ENOMEM;
1680
1681 info->offset = offset;
1682 info->bytes = bytes;
1683
Li Zefan34d52cb2011-03-29 13:46:06 +08001684 spin_lock(&ctl->tree_lock);
Li Zefan120d66e2010-11-09 14:56:50 +08001685
Li Zefan34d52cb2011-03-29 13:46:06 +08001686 if (try_merge_free_space(ctl, info, true))
Li Zefan120d66e2010-11-09 14:56:50 +08001687 goto link;
1688
1689 /*
1690 * There was no extent directly to the left or right of this new
1691 * extent then we know we're going to have to allocate a new extent, so
1692 * before we do that see if we need to drop this into a bitmap
1693 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001694 ret = insert_into_bitmap(ctl, info);
Li Zefan120d66e2010-11-09 14:56:50 +08001695 if (ret < 0) {
1696 goto out;
1697 } else if (ret) {
1698 ret = 0;
1699 goto out;
1700 }
1701link:
Li Zefan34d52cb2011-03-29 13:46:06 +08001702 ret = link_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001703 if (ret)
Josef Bacikdc89e982011-01-28 17:05:48 -05001704 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001705out:
Li Zefan34d52cb2011-03-29 13:46:06 +08001706 spin_unlock(&ctl->tree_lock);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001707
Josef Bacik0f9dd462008-09-23 13:14:11 -04001708 if (ret) {
Josef Bacik96303082009-07-13 21:29:25 -04001709 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04001710 BUG_ON(ret == -EEXIST);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001711 }
1712
Josef Bacik0f9dd462008-09-23 13:14:11 -04001713 return ret;
1714}
1715
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001716int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1717 u64 offset, u64 bytes)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001718{
Li Zefan34d52cb2011-03-29 13:46:06 +08001719 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001720 struct btrfs_free_space *info;
Josef Bacik96303082009-07-13 21:29:25 -04001721 struct btrfs_free_space *next_info = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001722 int ret = 0;
1723
Li Zefan34d52cb2011-03-29 13:46:06 +08001724 spin_lock(&ctl->tree_lock);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001725
Josef Bacik96303082009-07-13 21:29:25 -04001726again:
Li Zefan34d52cb2011-03-29 13:46:06 +08001727 info = tree_search_offset(ctl, offset, 0, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001728 if (!info) {
Josef Bacik6606bb92009-07-31 11:03:58 -04001729 /*
1730 * oops didn't find an extent that matched the space we wanted
1731 * to remove, look for a bitmap instead
1732 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001733 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
Josef Bacik6606bb92009-07-31 11:03:58 -04001734 1, 0);
1735 if (!info) {
1736 WARN_ON(1);
1737 goto out_lock;
1738 }
Josef Bacik96303082009-07-13 21:29:25 -04001739 }
1740
1741 if (info->bytes < bytes && rb_next(&info->offset_index)) {
1742 u64 end;
1743 next_info = rb_entry(rb_next(&info->offset_index),
1744 struct btrfs_free_space,
1745 offset_index);
1746
1747 if (next_info->bitmap)
Li Zefan34d52cb2011-03-29 13:46:06 +08001748 end = next_info->offset +
1749 BITS_PER_BITMAP * ctl->unit - 1;
Josef Bacik96303082009-07-13 21:29:25 -04001750 else
1751 end = next_info->offset + next_info->bytes;
1752
1753 if (next_info->bytes < bytes ||
1754 next_info->offset > offset || offset > end) {
1755 printk(KERN_CRIT "Found free space at %llu, size %llu,"
1756 " trying to use %llu\n",
1757 (unsigned long long)info->offset,
1758 (unsigned long long)info->bytes,
1759 (unsigned long long)bytes);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001760 WARN_ON(1);
1761 ret = -EINVAL;
Josef Bacik96303082009-07-13 21:29:25 -04001762 goto out_lock;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001763 }
Josef Bacik96303082009-07-13 21:29:25 -04001764
1765 info = next_info;
1766 }
1767
1768 if (info->bytes == bytes) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001769 unlink_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001770 if (info->bitmap) {
1771 kfree(info->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08001772 ctl->total_bitmaps--;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001773 }
Josef Bacikdc89e982011-01-28 17:05:48 -05001774 kmem_cache_free(btrfs_free_space_cachep, info);
Josef Bacik96303082009-07-13 21:29:25 -04001775 goto out_lock;
1776 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001777
Josef Bacik96303082009-07-13 21:29:25 -04001778 if (!info->bitmap && info->offset == offset) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001779 unlink_free_space(ctl, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001780 info->offset += bytes;
1781 info->bytes -= bytes;
Li Zefan34d52cb2011-03-29 13:46:06 +08001782 link_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001783 goto out_lock;
1784 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04001785
Josef Bacik96303082009-07-13 21:29:25 -04001786 if (!info->bitmap && info->offset <= offset &&
1787 info->offset + info->bytes >= offset + bytes) {
Chris Mason9b49c9b2008-09-24 11:23:25 -04001788 u64 old_start = info->offset;
1789 /*
1790 * we're freeing space in the middle of the info,
1791 * this can happen during tree log replay
1792 *
1793 * first unlink the old info and then
1794 * insert it again after the hole we're creating
1795 */
Li Zefan34d52cb2011-03-29 13:46:06 +08001796 unlink_free_space(ctl, info);
Chris Mason9b49c9b2008-09-24 11:23:25 -04001797 if (offset + bytes < info->offset + info->bytes) {
1798 u64 old_end = info->offset + info->bytes;
1799
1800 info->offset = offset + bytes;
1801 info->bytes = old_end - info->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08001802 ret = link_free_space(ctl, info);
Josef Bacik96303082009-07-13 21:29:25 -04001803 WARN_ON(ret);
1804 if (ret)
1805 goto out_lock;
Chris Mason9b49c9b2008-09-24 11:23:25 -04001806 } else {
1807 /* the hole we're creating ends at the end
1808 * of the info struct, just free the info
1809 */
Josef Bacikdc89e982011-01-28 17:05:48 -05001810 kmem_cache_free(btrfs_free_space_cachep, info);
Chris Mason9b49c9b2008-09-24 11:23:25 -04001811 }
Li Zefan34d52cb2011-03-29 13:46:06 +08001812 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001813
1814 /* step two, insert a new info struct to cover
1815 * anything before the hole
Chris Mason9b49c9b2008-09-24 11:23:25 -04001816 */
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001817 ret = btrfs_add_free_space(block_group, old_start,
1818 offset - old_start);
Josef Bacik96303082009-07-13 21:29:25 -04001819 WARN_ON(ret);
1820 goto out;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001821 }
Josef Bacik96303082009-07-13 21:29:25 -04001822
Li Zefan34d52cb2011-03-29 13:46:06 +08001823 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001824 if (ret == -EAGAIN)
1825 goto again;
1826 BUG_ON(ret);
1827out_lock:
Li Zefan34d52cb2011-03-29 13:46:06 +08001828 spin_unlock(&ctl->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001829out:
Josef Bacik25179202008-10-29 14:49:05 -04001830 return ret;
1831}
1832
Josef Bacik0f9dd462008-09-23 13:14:11 -04001833void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1834 u64 bytes)
1835{
Li Zefan34d52cb2011-03-29 13:46:06 +08001836 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001837 struct btrfs_free_space *info;
1838 struct rb_node *n;
1839 int count = 0;
1840
Li Zefan34d52cb2011-03-29 13:46:06 +08001841 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
Josef Bacik0f9dd462008-09-23 13:14:11 -04001842 info = rb_entry(n, struct btrfs_free_space, offset_index);
1843 if (info->bytes >= bytes)
1844 count++;
Josef Bacik96303082009-07-13 21:29:25 -04001845 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
Joel Becker21380932009-04-21 12:38:29 -07001846 (unsigned long long)info->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001847 (unsigned long long)info->bytes,
1848 (info->bitmap) ? "yes" : "no");
Josef Bacik0f9dd462008-09-23 13:14:11 -04001849 }
Josef Bacik96303082009-07-13 21:29:25 -04001850 printk(KERN_INFO "block group has cluster?: %s\n",
1851 list_empty(&block_group->cluster_list) ? "no" : "yes");
Josef Bacik0f9dd462008-09-23 13:14:11 -04001852 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1853 "\n", count);
1854}
1855
Li Zefan34d52cb2011-03-29 13:46:06 +08001856void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001857{
Li Zefan34d52cb2011-03-29 13:46:06 +08001858 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001859
Li Zefan34d52cb2011-03-29 13:46:06 +08001860 spin_lock_init(&ctl->tree_lock);
1861 ctl->unit = block_group->sectorsize;
1862 ctl->start = block_group->key.objectid;
1863 ctl->private = block_group;
1864 ctl->op = &free_space_op;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001865
Li Zefan34d52cb2011-03-29 13:46:06 +08001866 /*
1867 * we only want to have 32k of ram per block group for keeping
1868 * track of free space, and if we pass 1/2 of that we want to
1869 * start converting things over to using bitmaps
1870 */
1871 ctl->extents_thresh = ((1024 * 32) / 2) /
1872 sizeof(struct btrfs_free_space);
Josef Bacik0f9dd462008-09-23 13:14:11 -04001873}
1874
Chris Masonfa9c0d792009-04-03 09:47:43 -04001875/*
1876 * for a given cluster, put all of its extents back into the free
1877 * space cache. If the block group passed doesn't match the block group
1878 * pointed to by the cluster, someone else raced in and freed the
1879 * cluster already. In that case, we just return without changing anything
1880 */
1881static int
1882__btrfs_return_cluster_to_free_space(
1883 struct btrfs_block_group_cache *block_group,
1884 struct btrfs_free_cluster *cluster)
1885{
Li Zefan34d52cb2011-03-29 13:46:06 +08001886 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001887 struct btrfs_free_space *entry;
1888 struct rb_node *node;
1889
1890 spin_lock(&cluster->lock);
1891 if (cluster->block_group != block_group)
1892 goto out;
1893
Josef Bacik96303082009-07-13 21:29:25 -04001894 cluster->block_group = NULL;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001895 cluster->window_start = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001896 list_del_init(&cluster->block_group_list);
Josef Bacik96303082009-07-13 21:29:25 -04001897
Chris Masonfa9c0d792009-04-03 09:47:43 -04001898 node = rb_first(&cluster->root);
Josef Bacik96303082009-07-13 21:29:25 -04001899 while (node) {
Josef Bacik4e69b592011-03-21 10:11:24 -04001900 bool bitmap;
1901
Chris Masonfa9c0d792009-04-03 09:47:43 -04001902 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1903 node = rb_next(&entry->offset_index);
1904 rb_erase(&entry->offset_index, &cluster->root);
Josef Bacik4e69b592011-03-21 10:11:24 -04001905
1906 bitmap = (entry->bitmap != NULL);
1907 if (!bitmap)
Li Zefan34d52cb2011-03-29 13:46:06 +08001908 try_merge_free_space(ctl, entry, false);
1909 tree_insert_offset(&ctl->free_space_offset,
Josef Bacik4e69b592011-03-21 10:11:24 -04001910 entry->offset, &entry->offset_index, bitmap);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001911 }
Eric Paris6bef4d32010-02-23 19:43:04 +00001912 cluster->root = RB_ROOT;
Josef Bacik96303082009-07-13 21:29:25 -04001913
Chris Masonfa9c0d792009-04-03 09:47:43 -04001914out:
1915 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04001916 btrfs_put_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001917 return 0;
1918}
1919
Chris Mason09655372011-05-21 09:27:38 -04001920void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001921{
1922 struct btrfs_free_space *info;
1923 struct rb_node *node;
Li Zefan581bb052011-04-20 10:06:11 +08001924
Li Zefan581bb052011-04-20 10:06:11 +08001925 while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
1926 info = rb_entry(node, struct btrfs_free_space, offset_index);
Josef Bacik9b90f512011-06-24 16:02:51 +00001927 if (!info->bitmap) {
1928 unlink_free_space(ctl, info);
1929 kmem_cache_free(btrfs_free_space_cachep, info);
1930 } else {
1931 free_bitmap(ctl, info);
1932 }
Li Zefan581bb052011-04-20 10:06:11 +08001933 if (need_resched()) {
1934 spin_unlock(&ctl->tree_lock);
1935 cond_resched();
1936 spin_lock(&ctl->tree_lock);
1937 }
1938 }
Chris Mason09655372011-05-21 09:27:38 -04001939}
1940
1941void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
1942{
1943 spin_lock(&ctl->tree_lock);
1944 __btrfs_remove_free_space_cache_locked(ctl);
Li Zefan581bb052011-04-20 10:06:11 +08001945 spin_unlock(&ctl->tree_lock);
1946}
1947
Josef Bacik0f9dd462008-09-23 13:14:11 -04001948void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1949{
Li Zefan34d52cb2011-03-29 13:46:06 +08001950 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001951 struct btrfs_free_cluster *cluster;
Josef Bacik96303082009-07-13 21:29:25 -04001952 struct list_head *head;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001953
Li Zefan34d52cb2011-03-29 13:46:06 +08001954 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001955 while ((head = block_group->cluster_list.next) !=
1956 &block_group->cluster_list) {
1957 cluster = list_entry(head, struct btrfs_free_cluster,
1958 block_group_list);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001959
1960 WARN_ON(cluster->block_group != block_group);
1961 __btrfs_return_cluster_to_free_space(block_group, cluster);
Josef Bacik96303082009-07-13 21:29:25 -04001962 if (need_resched()) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001963 spin_unlock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001964 cond_resched();
Li Zefan34d52cb2011-03-29 13:46:06 +08001965 spin_lock(&ctl->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -04001966 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04001967 }
Chris Mason09655372011-05-21 09:27:38 -04001968 __btrfs_remove_free_space_cache_locked(ctl);
Li Zefan34d52cb2011-03-29 13:46:06 +08001969 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001970
Josef Bacik0f9dd462008-09-23 13:14:11 -04001971}
1972
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001973u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1974 u64 offset, u64 bytes, u64 empty_size)
Josef Bacik0f9dd462008-09-23 13:14:11 -04001975{
Li Zefan34d52cb2011-03-29 13:46:06 +08001976 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001977 struct btrfs_free_space *entry = NULL;
Josef Bacik96303082009-07-13 21:29:25 -04001978 u64 bytes_search = bytes + empty_size;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001979 u64 ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -04001980
Li Zefan34d52cb2011-03-29 13:46:06 +08001981 spin_lock(&ctl->tree_lock);
1982 entry = find_free_space(ctl, &offset, &bytes_search);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001983 if (!entry)
Josef Bacik96303082009-07-13 21:29:25 -04001984 goto out;
1985
1986 ret = offset;
1987 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08001988 bitmap_clear_bits(ctl, entry, offset, bytes);
Li Zefanedf6e2d2010-11-09 14:50:07 +08001989 if (!entry->bytes)
Li Zefan34d52cb2011-03-29 13:46:06 +08001990 free_bitmap(ctl, entry);
Josef Bacik96303082009-07-13 21:29:25 -04001991 } else {
Li Zefan34d52cb2011-03-29 13:46:06 +08001992 unlink_free_space(ctl, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001993 entry->offset += bytes;
1994 entry->bytes -= bytes;
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001995 if (!entry->bytes)
Josef Bacikdc89e982011-01-28 17:05:48 -05001996 kmem_cache_free(btrfs_free_space_cachep, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001997 else
Li Zefan34d52cb2011-03-29 13:46:06 +08001998 link_free_space(ctl, entry);
Josef Bacik6226cb0a2009-04-03 10:14:18 -04001999 }
Josef Bacik0f9dd462008-09-23 13:14:11 -04002000
Josef Bacik96303082009-07-13 21:29:25 -04002001out:
Li Zefan34d52cb2011-03-29 13:46:06 +08002002 spin_unlock(&ctl->tree_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -04002003
Josef Bacik0f9dd462008-09-23 13:14:11 -04002004 return ret;
2005}
Chris Masonfa9c0d792009-04-03 09:47:43 -04002006
2007/*
2008 * given a cluster, put all of its extents back into the free space
2009 * cache. If a block group is passed, this function will only free
2010 * a cluster that belongs to the passed block group.
2011 *
2012 * Otherwise, it'll get a reference on the block group pointed to by the
2013 * cluster and remove the cluster from it.
2014 */
2015int btrfs_return_cluster_to_free_space(
2016 struct btrfs_block_group_cache *block_group,
2017 struct btrfs_free_cluster *cluster)
2018{
Li Zefan34d52cb2011-03-29 13:46:06 +08002019 struct btrfs_free_space_ctl *ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002020 int ret;
2021
2022 /* first, get a safe pointer to the block group */
2023 spin_lock(&cluster->lock);
2024 if (!block_group) {
2025 block_group = cluster->block_group;
2026 if (!block_group) {
2027 spin_unlock(&cluster->lock);
2028 return 0;
2029 }
2030 } else if (cluster->block_group != block_group) {
2031 /* someone else has already freed it don't redo their work */
2032 spin_unlock(&cluster->lock);
2033 return 0;
2034 }
2035 atomic_inc(&block_group->count);
2036 spin_unlock(&cluster->lock);
2037
Li Zefan34d52cb2011-03-29 13:46:06 +08002038 ctl = block_group->free_space_ctl;
2039
Chris Masonfa9c0d792009-04-03 09:47:43 -04002040 /* now return any extents the cluster had on it */
Li Zefan34d52cb2011-03-29 13:46:06 +08002041 spin_lock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002042 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
Li Zefan34d52cb2011-03-29 13:46:06 +08002043 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002044
2045 /* finally drop our ref */
2046 btrfs_put_block_group(block_group);
2047 return ret;
2048}
2049
Josef Bacik96303082009-07-13 21:29:25 -04002050static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2051 struct btrfs_free_cluster *cluster,
Josef Bacik4e69b592011-03-21 10:11:24 -04002052 struct btrfs_free_space *entry,
Josef Bacik96303082009-07-13 21:29:25 -04002053 u64 bytes, u64 min_start)
2054{
Li Zefan34d52cb2011-03-29 13:46:06 +08002055 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04002056 int err;
2057 u64 search_start = cluster->window_start;
2058 u64 search_bytes = bytes;
2059 u64 ret = 0;
2060
Josef Bacik96303082009-07-13 21:29:25 -04002061 search_start = min_start;
2062 search_bytes = bytes;
2063
Li Zefan34d52cb2011-03-29 13:46:06 +08002064 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
Josef Bacik96303082009-07-13 21:29:25 -04002065 if (err)
Josef Bacik4e69b592011-03-21 10:11:24 -04002066 return 0;
Josef Bacik96303082009-07-13 21:29:25 -04002067
2068 ret = search_start;
Miao Xiebb3ac5a2011-08-05 09:32:35 +00002069 __bitmap_clear_bits(ctl, entry, ret, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04002070
2071 return ret;
2072}
2073
Chris Masonfa9c0d792009-04-03 09:47:43 -04002074/*
2075 * given a cluster, try to allocate 'bytes' from it, returns 0
2076 * if it couldn't find anything suitably large, or a logical disk offset
2077 * if things worked out
2078 */
2079u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2080 struct btrfs_free_cluster *cluster, u64 bytes,
2081 u64 min_start)
2082{
Li Zefan34d52cb2011-03-29 13:46:06 +08002083 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002084 struct btrfs_free_space *entry = NULL;
2085 struct rb_node *node;
2086 u64 ret = 0;
2087
2088 spin_lock(&cluster->lock);
2089 if (bytes > cluster->max_size)
2090 goto out;
2091
2092 if (cluster->block_group != block_group)
2093 goto out;
2094
2095 node = rb_first(&cluster->root);
2096 if (!node)
2097 goto out;
2098
2099 entry = rb_entry(node, struct btrfs_free_space, offset_index);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002100 while(1) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002101 if (entry->bytes < bytes ||
2102 (!entry->bitmap && entry->offset < min_start)) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04002103 node = rb_next(&entry->offset_index);
2104 if (!node)
2105 break;
2106 entry = rb_entry(node, struct btrfs_free_space,
2107 offset_index);
2108 continue;
2109 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002110
Josef Bacik4e69b592011-03-21 10:11:24 -04002111 if (entry->bitmap) {
2112 ret = btrfs_alloc_from_bitmap(block_group,
2113 cluster, entry, bytes,
2114 min_start);
2115 if (ret == 0) {
Josef Bacik4e69b592011-03-21 10:11:24 -04002116 node = rb_next(&entry->offset_index);
2117 if (!node)
2118 break;
2119 entry = rb_entry(node, struct btrfs_free_space,
2120 offset_index);
2121 continue;
2122 }
2123 } else {
Josef Bacik4e69b592011-03-21 10:11:24 -04002124 ret = entry->offset;
2125
2126 entry->offset += bytes;
2127 entry->bytes -= bytes;
2128 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002129
Li Zefan5e71b5d2010-11-09 14:55:34 +08002130 if (entry->bytes == 0)
Chris Masonfa9c0d792009-04-03 09:47:43 -04002131 rb_erase(&entry->offset_index, &cluster->root);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002132 break;
2133 }
2134out:
2135 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04002136
Li Zefan5e71b5d2010-11-09 14:55:34 +08002137 if (!ret)
2138 return 0;
2139
Li Zefan34d52cb2011-03-29 13:46:06 +08002140 spin_lock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002141
Li Zefan34d52cb2011-03-29 13:46:06 +08002142 ctl->free_space -= bytes;
Li Zefan5e71b5d2010-11-09 14:55:34 +08002143 if (entry->bytes == 0) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002144 ctl->free_extents--;
Josef Bacik4e69b592011-03-21 10:11:24 -04002145 if (entry->bitmap) {
2146 kfree(entry->bitmap);
Li Zefan34d52cb2011-03-29 13:46:06 +08002147 ctl->total_bitmaps--;
2148 ctl->op->recalc_thresholds(ctl);
Josef Bacik4e69b592011-03-21 10:11:24 -04002149 }
Josef Bacikdc89e982011-01-28 17:05:48 -05002150 kmem_cache_free(btrfs_free_space_cachep, entry);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002151 }
2152
Li Zefan34d52cb2011-03-29 13:46:06 +08002153 spin_unlock(&ctl->tree_lock);
Li Zefan5e71b5d2010-11-09 14:55:34 +08002154
Chris Masonfa9c0d792009-04-03 09:47:43 -04002155 return ret;
2156}
2157
Josef Bacik96303082009-07-13 21:29:25 -04002158static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2159 struct btrfs_free_space *entry,
2160 struct btrfs_free_cluster *cluster,
2161 u64 offset, u64 bytes, u64 min_bytes)
2162{
Li Zefan34d52cb2011-03-29 13:46:06 +08002163 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik96303082009-07-13 21:29:25 -04002164 unsigned long next_zero;
2165 unsigned long i;
2166 unsigned long search_bits;
2167 unsigned long total_bits;
2168 unsigned long found_bits;
2169 unsigned long start = 0;
2170 unsigned long total_found = 0;
Josef Bacik4e69b592011-03-21 10:11:24 -04002171 int ret;
Josef Bacik96303082009-07-13 21:29:25 -04002172 bool found = false;
2173
2174 i = offset_to_bit(entry->offset, block_group->sectorsize,
2175 max_t(u64, offset, entry->offset));
Josef Bacikd0a365e2011-03-18 15:27:43 -04002176 search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2177 total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
Josef Bacik96303082009-07-13 21:29:25 -04002178
2179again:
2180 found_bits = 0;
2181 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2182 i < BITS_PER_BITMAP;
2183 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2184 next_zero = find_next_zero_bit(entry->bitmap,
2185 BITS_PER_BITMAP, i);
2186 if (next_zero - i >= search_bits) {
2187 found_bits = next_zero - i;
2188 break;
2189 }
2190 i = next_zero;
2191 }
2192
2193 if (!found_bits)
Josef Bacik4e69b592011-03-21 10:11:24 -04002194 return -ENOSPC;
Josef Bacik96303082009-07-13 21:29:25 -04002195
2196 if (!found) {
2197 start = i;
2198 found = true;
2199 }
2200
2201 total_found += found_bits;
2202
2203 if (cluster->max_size < found_bits * block_group->sectorsize)
2204 cluster->max_size = found_bits * block_group->sectorsize;
2205
2206 if (total_found < total_bits) {
2207 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2208 if (i - start > total_bits * 2) {
2209 total_found = 0;
2210 cluster->max_size = 0;
2211 found = false;
2212 }
2213 goto again;
2214 }
2215
2216 cluster->window_start = start * block_group->sectorsize +
2217 entry->offset;
Li Zefan34d52cb2011-03-29 13:46:06 +08002218 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002219 ret = tree_insert_offset(&cluster->root, entry->offset,
2220 &entry->offset_index, 1);
2221 BUG_ON(ret);
Josef Bacik96303082009-07-13 21:29:25 -04002222
2223 return 0;
2224}
2225
Chris Masonfa9c0d792009-04-03 09:47:43 -04002226/*
Josef Bacik4e69b592011-03-21 10:11:24 -04002227 * This searches the block group for just extents to fill the cluster with.
2228 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04002229static noinline int
2230setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2231 struct btrfs_free_cluster *cluster,
2232 struct list_head *bitmaps, u64 offset, u64 bytes,
2233 u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002234{
Li Zefan34d52cb2011-03-29 13:46:06 +08002235 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04002236 struct btrfs_free_space *first = NULL;
2237 struct btrfs_free_space *entry = NULL;
2238 struct btrfs_free_space *prev = NULL;
2239 struct btrfs_free_space *last;
2240 struct rb_node *node;
2241 u64 window_start;
2242 u64 window_free;
2243 u64 max_extent;
2244 u64 max_gap = 128 * 1024;
2245
Li Zefan34d52cb2011-03-29 13:46:06 +08002246 entry = tree_search_offset(ctl, offset, 0, 1);
Josef Bacik4e69b592011-03-21 10:11:24 -04002247 if (!entry)
2248 return -ENOSPC;
2249
2250 /*
2251 * We don't want bitmaps, so just move along until we find a normal
2252 * extent entry.
2253 */
2254 while (entry->bitmap) {
Josef Bacik86d4a772011-05-25 13:03:16 -04002255 if (list_empty(&entry->list))
2256 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04002257 node = rb_next(&entry->offset_index);
2258 if (!node)
2259 return -ENOSPC;
2260 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2261 }
2262
2263 window_start = entry->offset;
2264 window_free = entry->bytes;
2265 max_extent = entry->bytes;
2266 first = entry;
2267 last = entry;
2268 prev = entry;
2269
2270 while (window_free <= min_bytes) {
2271 node = rb_next(&entry->offset_index);
2272 if (!node)
2273 return -ENOSPC;
2274 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2275
Josef Bacik86d4a772011-05-25 13:03:16 -04002276 if (entry->bitmap) {
2277 if (list_empty(&entry->list))
2278 list_add_tail(&entry->list, bitmaps);
Josef Bacik4e69b592011-03-21 10:11:24 -04002279 continue;
Josef Bacik86d4a772011-05-25 13:03:16 -04002280 }
2281
Josef Bacik4e69b592011-03-21 10:11:24 -04002282 /*
2283 * we haven't filled the empty size and the window is
2284 * very large. reset and try again
2285 */
2286 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2287 entry->offset - window_start > (min_bytes * 2)) {
2288 first = entry;
2289 window_start = entry->offset;
2290 window_free = entry->bytes;
2291 last = entry;
2292 max_extent = entry->bytes;
2293 } else {
2294 last = entry;
2295 window_free += entry->bytes;
2296 if (entry->bytes > max_extent)
2297 max_extent = entry->bytes;
2298 }
2299 prev = entry;
2300 }
2301
2302 cluster->window_start = first->offset;
2303
2304 node = &first->offset_index;
2305
2306 /*
2307 * now we've found our entries, pull them out of the free space
2308 * cache and put them into the cluster rbtree
2309 */
2310 do {
2311 int ret;
2312
2313 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2314 node = rb_next(&entry->offset_index);
2315 if (entry->bitmap)
2316 continue;
2317
Li Zefan34d52cb2011-03-29 13:46:06 +08002318 rb_erase(&entry->offset_index, &ctl->free_space_offset);
Josef Bacik4e69b592011-03-21 10:11:24 -04002319 ret = tree_insert_offset(&cluster->root, entry->offset,
2320 &entry->offset_index, 0);
2321 BUG_ON(ret);
2322 } while (node && entry != last);
2323
2324 cluster->max_size = max_extent;
2325
2326 return 0;
2327}
2328
2329/*
2330 * This specifically looks for bitmaps that may work in the cluster, we assume
2331 * that we have already failed to find extents that will work.
2332 */
Josef Bacik3de85bb2011-05-25 13:07:37 -04002333static noinline int
2334setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2335 struct btrfs_free_cluster *cluster,
2336 struct list_head *bitmaps, u64 offset, u64 bytes,
2337 u64 min_bytes)
Josef Bacik4e69b592011-03-21 10:11:24 -04002338{
Li Zefan34d52cb2011-03-29 13:46:06 +08002339 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik4e69b592011-03-21 10:11:24 -04002340 struct btrfs_free_space *entry;
2341 struct rb_node *node;
2342 int ret = -ENOSPC;
2343
Li Zefan34d52cb2011-03-29 13:46:06 +08002344 if (ctl->total_bitmaps == 0)
Josef Bacik4e69b592011-03-21 10:11:24 -04002345 return -ENOSPC;
2346
Josef Bacik86d4a772011-05-25 13:03:16 -04002347 /*
2348 * First check our cached list of bitmaps and see if there is an entry
2349 * here that will work.
2350 */
2351 list_for_each_entry(entry, bitmaps, list) {
2352 if (entry->bytes < min_bytes)
2353 continue;
2354 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2355 bytes, min_bytes);
2356 if (!ret)
2357 return 0;
2358 }
2359
2360 /*
2361 * If we do have entries on our list and we are here then we didn't find
2362 * anything, so go ahead and get the next entry after the last entry in
2363 * this list and start the search from there.
2364 */
2365 if (!list_empty(bitmaps)) {
2366 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2367 list);
2368 node = rb_next(&entry->offset_index);
2369 if (!node)
2370 return -ENOSPC;
2371 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2372 goto search;
2373 }
2374
Li Zefan34d52cb2011-03-29 13:46:06 +08002375 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
Josef Bacik4e69b592011-03-21 10:11:24 -04002376 if (!entry)
2377 return -ENOSPC;
2378
Josef Bacik86d4a772011-05-25 13:03:16 -04002379search:
Josef Bacik4e69b592011-03-21 10:11:24 -04002380 node = &entry->offset_index;
2381 do {
2382 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2383 node = rb_next(&entry->offset_index);
2384 if (!entry->bitmap)
2385 continue;
2386 if (entry->bytes < min_bytes)
2387 continue;
2388 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2389 bytes, min_bytes);
2390 } while (ret && node);
2391
2392 return ret;
2393}
2394
2395/*
Chris Masonfa9c0d792009-04-03 09:47:43 -04002396 * here we try to find a cluster of blocks in a block group. The goal
2397 * is to find at least bytes free and up to empty_size + bytes free.
2398 * We might not find them all in one contiguous area.
2399 *
2400 * returns zero and sets up cluster if things worked out, otherwise
2401 * it returns -enospc
2402 */
2403int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
Chris Mason451d7582009-06-09 20:28:34 -04002404 struct btrfs_root *root,
Chris Masonfa9c0d792009-04-03 09:47:43 -04002405 struct btrfs_block_group_cache *block_group,
2406 struct btrfs_free_cluster *cluster,
2407 u64 offset, u64 bytes, u64 empty_size)
2408{
Li Zefan34d52cb2011-03-29 13:46:06 +08002409 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Josef Bacik86d4a772011-05-25 13:03:16 -04002410 struct list_head bitmaps;
2411 struct btrfs_free_space *entry, *tmp;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002412 u64 min_bytes;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002413 int ret;
2414
2415 /* for metadata, allow allocates with more holes */
Chris Mason451d7582009-06-09 20:28:34 -04002416 if (btrfs_test_opt(root, SSD_SPREAD)) {
2417 min_bytes = bytes + empty_size;
2418 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04002419 /*
2420 * we want to do larger allocations when we are
2421 * flushing out the delayed refs, it helps prevent
2422 * making more work as we go along.
2423 */
2424 if (trans->transaction->delayed_refs.flushing)
2425 min_bytes = max(bytes, (bytes + empty_size) >> 1);
2426 else
2427 min_bytes = max(bytes, (bytes + empty_size) >> 4);
2428 } else
2429 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2430
Li Zefan34d52cb2011-03-29 13:46:06 +08002431 spin_lock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04002432
2433 /*
2434 * If we know we don't have enough space to make a cluster don't even
2435 * bother doing all the work to try and find one.
2436 */
Li Zefan34d52cb2011-03-29 13:46:06 +08002437 if (ctl->free_space < min_bytes) {
2438 spin_unlock(&ctl->tree_lock);
Josef Bacik7d0d2e82011-03-18 15:13:42 -04002439 return -ENOSPC;
2440 }
2441
Chris Masonfa9c0d792009-04-03 09:47:43 -04002442 spin_lock(&cluster->lock);
2443
2444 /* someone already found a cluster, hooray */
2445 if (cluster->block_group) {
2446 ret = 0;
2447 goto out;
2448 }
Josef Bacik4e69b592011-03-21 10:11:24 -04002449
Josef Bacik86d4a772011-05-25 13:03:16 -04002450 INIT_LIST_HEAD(&bitmaps);
2451 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2452 bytes, min_bytes);
Josef Bacik4e69b592011-03-21 10:11:24 -04002453 if (ret)
Josef Bacik86d4a772011-05-25 13:03:16 -04002454 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2455 offset, bytes, min_bytes);
2456
2457 /* Clear our temporary list */
2458 list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2459 list_del_init(&entry->list);
Josef Bacik4e69b592011-03-21 10:11:24 -04002460
2461 if (!ret) {
2462 atomic_inc(&block_group->count);
2463 list_add_tail(&cluster->block_group_list,
2464 &block_group->cluster_list);
2465 cluster->block_group = block_group;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002466 }
Chris Masonfa9c0d792009-04-03 09:47:43 -04002467out:
2468 spin_unlock(&cluster->lock);
Li Zefan34d52cb2011-03-29 13:46:06 +08002469 spin_unlock(&ctl->tree_lock);
Chris Masonfa9c0d792009-04-03 09:47:43 -04002470
2471 return ret;
2472}
2473
2474/*
2475 * simple code to zero out a cluster
2476 */
2477void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2478{
2479 spin_lock_init(&cluster->lock);
2480 spin_lock_init(&cluster->refill_lock);
Eric Paris6bef4d32010-02-23 19:43:04 +00002481 cluster->root = RB_ROOT;
Chris Masonfa9c0d792009-04-03 09:47:43 -04002482 cluster->max_size = 0;
2483 INIT_LIST_HEAD(&cluster->block_group_list);
2484 cluster->block_group = NULL;
2485}
2486
Li Dongyangf7039b12011-03-24 10:24:28 +00002487int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2488 u64 *trimmed, u64 start, u64 end, u64 minlen)
2489{
Li Zefan34d52cb2011-03-29 13:46:06 +08002490 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
Li Dongyangf7039b12011-03-24 10:24:28 +00002491 struct btrfs_free_space *entry = NULL;
2492 struct btrfs_fs_info *fs_info = block_group->fs_info;
2493 u64 bytes = 0;
2494 u64 actually_trimmed;
2495 int ret = 0;
2496
2497 *trimmed = 0;
2498
2499 while (start < end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002500 spin_lock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002501
Li Zefan34d52cb2011-03-29 13:46:06 +08002502 if (ctl->free_space < minlen) {
2503 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002504 break;
2505 }
2506
Li Zefan34d52cb2011-03-29 13:46:06 +08002507 entry = tree_search_offset(ctl, start, 0, 1);
Li Dongyangf7039b12011-03-24 10:24:28 +00002508 if (!entry)
Li Zefan34d52cb2011-03-29 13:46:06 +08002509 entry = tree_search_offset(ctl,
2510 offset_to_bitmap(ctl, start),
Li Dongyangf7039b12011-03-24 10:24:28 +00002511 1, 1);
2512
2513 if (!entry || entry->offset >= end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002514 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002515 break;
2516 }
2517
2518 if (entry->bitmap) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002519 ret = search_bitmap(ctl, entry, &start, &bytes);
Li Dongyangf7039b12011-03-24 10:24:28 +00002520 if (!ret) {
2521 if (start >= end) {
Li Zefan34d52cb2011-03-29 13:46:06 +08002522 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002523 break;
2524 }
2525 bytes = min(bytes, end - start);
Li Zefan34d52cb2011-03-29 13:46:06 +08002526 bitmap_clear_bits(ctl, entry, start, bytes);
Li Dongyangf7039b12011-03-24 10:24:28 +00002527 if (entry->bytes == 0)
Li Zefan34d52cb2011-03-29 13:46:06 +08002528 free_bitmap(ctl, entry);
Li Dongyangf7039b12011-03-24 10:24:28 +00002529 } else {
2530 start = entry->offset + BITS_PER_BITMAP *
2531 block_group->sectorsize;
Li Zefan34d52cb2011-03-29 13:46:06 +08002532 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002533 ret = 0;
2534 continue;
2535 }
2536 } else {
2537 start = entry->offset;
2538 bytes = min(entry->bytes, end - start);
Li Zefan34d52cb2011-03-29 13:46:06 +08002539 unlink_free_space(ctl, entry);
Li Zefanf789b682011-04-25 19:43:52 -04002540 kmem_cache_free(btrfs_free_space_cachep, entry);
Li Dongyangf7039b12011-03-24 10:24:28 +00002541 }
2542
Li Zefan34d52cb2011-03-29 13:46:06 +08002543 spin_unlock(&ctl->tree_lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002544
2545 if (bytes >= minlen) {
Josef Bacikfb25e912011-07-26 17:00:46 -04002546 struct btrfs_space_info *space_info;
2547 int update = 0;
2548
2549 space_info = block_group->space_info;
2550 spin_lock(&space_info->lock);
2551 spin_lock(&block_group->lock);
2552 if (!block_group->ro) {
2553 block_group->reserved += bytes;
2554 space_info->bytes_reserved += bytes;
2555 update = 1;
2556 }
2557 spin_unlock(&block_group->lock);
2558 spin_unlock(&space_info->lock);
Li Dongyangf7039b12011-03-24 10:24:28 +00002559
2560 ret = btrfs_error_discard_extent(fs_info->extent_root,
2561 start,
2562 bytes,
2563 &actually_trimmed);
2564
Li Zefan34d52cb2011-03-29 13:46:06 +08002565 btrfs_add_free_space(block_group, start, bytes);
Josef Bacikfb25e912011-07-26 17:00:46 -04002566 if (update) {
2567 spin_lock(&space_info->lock);
2568 spin_lock(&block_group->lock);
2569 if (block_group->ro)
2570 space_info->bytes_readonly += bytes;
2571 block_group->reserved -= bytes;
2572 space_info->bytes_reserved -= bytes;
2573 spin_unlock(&space_info->lock);
2574 spin_unlock(&block_group->lock);
2575 }
Li Dongyangf7039b12011-03-24 10:24:28 +00002576
2577 if (ret)
2578 break;
2579 *trimmed += actually_trimmed;
2580 }
2581 start += bytes;
2582 bytes = 0;
2583
2584 if (fatal_signal_pending(current)) {
2585 ret = -ERESTARTSYS;
2586 break;
2587 }
2588
2589 cond_resched();
2590 }
2591
2592 return ret;
2593}
Li Zefan581bb052011-04-20 10:06:11 +08002594
2595/*
2596 * Find the left-most item in the cache tree, and then return the
2597 * smallest inode number in the item.
2598 *
2599 * Note: the returned inode number may not be the smallest one in
2600 * the tree, if the left-most item is a bitmap.
2601 */
2602u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2603{
2604 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2605 struct btrfs_free_space *entry = NULL;
2606 u64 ino = 0;
2607
2608 spin_lock(&ctl->tree_lock);
2609
2610 if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2611 goto out;
2612
2613 entry = rb_entry(rb_first(&ctl->free_space_offset),
2614 struct btrfs_free_space, offset_index);
2615
2616 if (!entry->bitmap) {
2617 ino = entry->offset;
2618
2619 unlink_free_space(ctl, entry);
2620 entry->offset++;
2621 entry->bytes--;
2622 if (!entry->bytes)
2623 kmem_cache_free(btrfs_free_space_cachep, entry);
2624 else
2625 link_free_space(ctl, entry);
2626 } else {
2627 u64 offset = 0;
2628 u64 count = 1;
2629 int ret;
2630
2631 ret = search_bitmap(ctl, entry, &offset, &count);
2632 BUG_ON(ret);
2633
2634 ino = offset;
2635 bitmap_clear_bits(ctl, entry, offset, 1);
2636 if (entry->bytes == 0)
2637 free_bitmap(ctl, entry);
2638 }
2639out:
2640 spin_unlock(&ctl->tree_lock);
2641
2642 return ino;
2643}
Li Zefan82d59022011-04-20 10:33:24 +08002644
2645struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2646 struct btrfs_path *path)
2647{
2648 struct inode *inode = NULL;
2649
2650 spin_lock(&root->cache_lock);
2651 if (root->cache_inode)
2652 inode = igrab(root->cache_inode);
2653 spin_unlock(&root->cache_lock);
2654 if (inode)
2655 return inode;
2656
2657 inode = __lookup_free_space_inode(root, path, 0);
2658 if (IS_ERR(inode))
2659 return inode;
2660
2661 spin_lock(&root->cache_lock);
David Sterba7841cb22011-05-31 18:07:27 +02002662 if (!btrfs_fs_closing(root->fs_info))
Li Zefan82d59022011-04-20 10:33:24 +08002663 root->cache_inode = igrab(inode);
2664 spin_unlock(&root->cache_lock);
2665
2666 return inode;
2667}
2668
2669int create_free_ino_inode(struct btrfs_root *root,
2670 struct btrfs_trans_handle *trans,
2671 struct btrfs_path *path)
2672{
2673 return __create_free_space_inode(root, trans, path,
2674 BTRFS_FREE_INO_OBJECTID, 0);
2675}
2676
2677int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2678{
2679 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2680 struct btrfs_path *path;
2681 struct inode *inode;
2682 int ret = 0;
2683 u64 root_gen = btrfs_root_generation(&root->root_item);
2684
Chris Mason4b9465c2011-06-03 09:36:29 -04002685 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2686 return 0;
2687
Li Zefan82d59022011-04-20 10:33:24 +08002688 /*
2689 * If we're unmounting then just return, since this does a search on the
2690 * normal root and not the commit root and we could deadlock.
2691 */
David Sterba7841cb22011-05-31 18:07:27 +02002692 if (btrfs_fs_closing(fs_info))
Li Zefan82d59022011-04-20 10:33:24 +08002693 return 0;
2694
2695 path = btrfs_alloc_path();
2696 if (!path)
2697 return 0;
2698
2699 inode = lookup_free_ino_inode(root, path);
2700 if (IS_ERR(inode))
2701 goto out;
2702
2703 if (root_gen != BTRFS_I(inode)->generation)
2704 goto out_put;
2705
2706 ret = __load_free_space_cache(root, inode, ctl, path, 0);
2707
2708 if (ret < 0)
2709 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2710 "root %llu\n", root->root_key.objectid);
2711out_put:
2712 iput(inode);
2713out:
2714 btrfs_free_path(path);
2715 return ret;
2716}
2717
2718int btrfs_write_out_ino_cache(struct btrfs_root *root,
2719 struct btrfs_trans_handle *trans,
2720 struct btrfs_path *path)
2721{
2722 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2723 struct inode *inode;
2724 int ret;
2725
Chris Mason4b9465c2011-06-03 09:36:29 -04002726 if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2727 return 0;
2728
Li Zefan82d59022011-04-20 10:33:24 +08002729 inode = lookup_free_ino_inode(root, path);
2730 if (IS_ERR(inode))
2731 return 0;
2732
2733 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
Josef Bacikc09544e2011-08-30 10:19:10 -04002734 if (ret) {
2735 btrfs_delalloc_release_metadata(inode, inode->i_size);
2736#ifdef DEBUG
Li Zefan82d59022011-04-20 10:33:24 +08002737 printk(KERN_ERR "btrfs: failed to write free ino cache "
2738 "for root %llu\n", root->root_key.objectid);
Josef Bacikc09544e2011-08-30 10:19:10 -04002739#endif
2740 }
Li Zefan82d59022011-04-20 10:33:24 +08002741
2742 iput(inode);
2743 return ret;
2744}