blob: f488fac04d99ea45eea93607bbf17c021b5b2207 [file] [log] [blame]
Josef Bacik0f9dd462008-09-23 13:14:11 -04001/*
2 * Copyright (C) 2008 Red Hat. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
Josef Bacik96303082009-07-13 21:29:25 -040019#include <linux/pagemap.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040020#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090021#include <linux/slab.h>
Josef Bacik96303082009-07-13 21:29:25 -040022#include <linux/math64.h>
Josef Bacik0f9dd462008-09-23 13:14:11 -040023#include "ctree.h"
Chris Masonfa9c0d792009-04-03 09:47:43 -040024#include "free-space-cache.h"
25#include "transaction.h"
26
Josef Bacik96303082009-07-13 21:29:25 -040027#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29
30static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
31 u64 offset)
32{
33 BUG_ON(offset < bitmap_start);
34 offset -= bitmap_start;
35 return (unsigned long)(div64_u64(offset, sectorsize));
36}
37
38static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
39{
40 return (unsigned long)(div64_u64(bytes, sectorsize));
41}
42
43static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
44 u64 offset)
45{
46 u64 bitmap_start;
47 u64 bytes_per_bitmap;
48
49 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
50 bitmap_start = offset - block_group->key.objectid;
51 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
52 bitmap_start *= bytes_per_bitmap;
53 bitmap_start += block_group->key.objectid;
54
55 return bitmap_start;
56}
Josef Bacik0f9dd462008-09-23 13:14:11 -040057
58static int tree_insert_offset(struct rb_root *root, u64 offset,
Josef Bacik96303082009-07-13 21:29:25 -040059 struct rb_node *node, int bitmap)
Josef Bacik0f9dd462008-09-23 13:14:11 -040060{
61 struct rb_node **p = &root->rb_node;
62 struct rb_node *parent = NULL;
63 struct btrfs_free_space *info;
64
65 while (*p) {
66 parent = *p;
67 info = rb_entry(parent, struct btrfs_free_space, offset_index);
68
Josef Bacik96303082009-07-13 21:29:25 -040069 if (offset < info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -040070 p = &(*p)->rb_left;
Josef Bacik96303082009-07-13 21:29:25 -040071 } else if (offset > info->offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -040072 p = &(*p)->rb_right;
Josef Bacik96303082009-07-13 21:29:25 -040073 } else {
74 /*
75 * we could have a bitmap entry and an extent entry
76 * share the same offset. If this is the case, we want
77 * the extent entry to always be found first if we do a
78 * linear search through the tree, since we want to have
79 * the quickest allocation time, and allocating from an
80 * extent is faster than allocating from a bitmap. So
81 * if we're inserting a bitmap and we find an entry at
82 * this offset, we want to go right, or after this entry
83 * logically. If we are inserting an extent and we've
84 * found a bitmap, we want to go left, or before
85 * logically.
86 */
87 if (bitmap) {
88 WARN_ON(info->bitmap);
89 p = &(*p)->rb_right;
90 } else {
91 WARN_ON(!info->bitmap);
92 p = &(*p)->rb_left;
93 }
94 }
Josef Bacik0f9dd462008-09-23 13:14:11 -040095 }
96
97 rb_link_node(node, parent, p);
98 rb_insert_color(node, root);
99
100 return 0;
101}
102
103/*
Josef Bacik70cb0742009-04-03 10:14:19 -0400104 * searches the tree for the given offset.
105 *
Josef Bacik96303082009-07-13 21:29:25 -0400106 * fuzzy - If this is set, then we are trying to make an allocation, and we just
107 * want a section that has at least bytes size and comes at or after the given
108 * offset.
Josef Bacik0f9dd462008-09-23 13:14:11 -0400109 */
Josef Bacik96303082009-07-13 21:29:25 -0400110static struct btrfs_free_space *
111tree_search_offset(struct btrfs_block_group_cache *block_group,
112 u64 offset, int bitmap_only, int fuzzy)
Josef Bacik0f9dd462008-09-23 13:14:11 -0400113{
Josef Bacik96303082009-07-13 21:29:25 -0400114 struct rb_node *n = block_group->free_space_offset.rb_node;
115 struct btrfs_free_space *entry, *prev = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400116
Josef Bacik96303082009-07-13 21:29:25 -0400117 /* find entry that is closest to the 'offset' */
118 while (1) {
119 if (!n) {
120 entry = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400121 break;
122 }
Josef Bacik96303082009-07-13 21:29:25 -0400123
124 entry = rb_entry(n, struct btrfs_free_space, offset_index);
125 prev = entry;
126
127 if (offset < entry->offset)
128 n = n->rb_left;
129 else if (offset > entry->offset)
130 n = n->rb_right;
131 else
132 break;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400133 }
134
Josef Bacik96303082009-07-13 21:29:25 -0400135 if (bitmap_only) {
136 if (!entry)
137 return NULL;
138 if (entry->bitmap)
139 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400140
Josef Bacik96303082009-07-13 21:29:25 -0400141 /*
142 * bitmap entry and extent entry may share same offset,
143 * in that case, bitmap entry comes after extent entry.
144 */
145 n = rb_next(n);
146 if (!n)
147 return NULL;
148 entry = rb_entry(n, struct btrfs_free_space, offset_index);
149 if (entry->offset != offset)
150 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400151
Josef Bacik96303082009-07-13 21:29:25 -0400152 WARN_ON(!entry->bitmap);
153 return entry;
154 } else if (entry) {
155 if (entry->bitmap) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400156 /*
Josef Bacik96303082009-07-13 21:29:25 -0400157 * if previous extent entry covers the offset,
158 * we should return it instead of the bitmap entry
Josef Bacik0f9dd462008-09-23 13:14:11 -0400159 */
Josef Bacik96303082009-07-13 21:29:25 -0400160 n = &entry->offset_index;
161 while (1) {
162 n = rb_prev(n);
163 if (!n)
164 break;
165 prev = rb_entry(n, struct btrfs_free_space,
166 offset_index);
167 if (!prev->bitmap) {
168 if (prev->offset + prev->bytes > offset)
169 entry = prev;
170 break;
171 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400172 }
Josef Bacik96303082009-07-13 21:29:25 -0400173 }
174 return entry;
175 }
176
177 if (!prev)
178 return NULL;
179
180 /* find last entry before the 'offset' */
181 entry = prev;
182 if (entry->offset > offset) {
183 n = rb_prev(&entry->offset_index);
184 if (n) {
185 entry = rb_entry(n, struct btrfs_free_space,
186 offset_index);
187 BUG_ON(entry->offset > offset);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400188 } else {
Josef Bacik96303082009-07-13 21:29:25 -0400189 if (fuzzy)
190 return entry;
191 else
192 return NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400193 }
194 }
195
Josef Bacik96303082009-07-13 21:29:25 -0400196 if (entry->bitmap) {
197 n = &entry->offset_index;
198 while (1) {
199 n = rb_prev(n);
200 if (!n)
201 break;
202 prev = rb_entry(n, struct btrfs_free_space,
203 offset_index);
204 if (!prev->bitmap) {
205 if (prev->offset + prev->bytes > offset)
206 return prev;
207 break;
208 }
209 }
210 if (entry->offset + BITS_PER_BITMAP *
211 block_group->sectorsize > offset)
212 return entry;
213 } else if (entry->offset + entry->bytes > offset)
214 return entry;
215
216 if (!fuzzy)
217 return NULL;
218
219 while (1) {
220 if (entry->bitmap) {
221 if (entry->offset + BITS_PER_BITMAP *
222 block_group->sectorsize > offset)
223 break;
224 } else {
225 if (entry->offset + entry->bytes > offset)
226 break;
227 }
228
229 n = rb_next(&entry->offset_index);
230 if (!n)
231 return NULL;
232 entry = rb_entry(n, struct btrfs_free_space, offset_index);
233 }
234 return entry;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400235}
236
237static void unlink_free_space(struct btrfs_block_group_cache *block_group,
238 struct btrfs_free_space *info)
239{
240 rb_erase(&info->offset_index, &block_group->free_space_offset);
Josef Bacik96303082009-07-13 21:29:25 -0400241 block_group->free_extents--;
Josef Bacik817d52f2009-07-13 21:29:25 -0400242 block_group->free_space -= info->bytes;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400243}
244
245static int link_free_space(struct btrfs_block_group_cache *block_group,
246 struct btrfs_free_space *info)
247{
248 int ret = 0;
249
Josef Bacik96303082009-07-13 21:29:25 -0400250 BUG_ON(!info->bitmap && !info->bytes);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400251 ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
Josef Bacik96303082009-07-13 21:29:25 -0400252 &info->offset_index, (info->bitmap != NULL));
Josef Bacik0f9dd462008-09-23 13:14:11 -0400253 if (ret)
254 return ret;
255
Josef Bacik817d52f2009-07-13 21:29:25 -0400256 block_group->free_space += info->bytes;
Josef Bacik96303082009-07-13 21:29:25 -0400257 block_group->free_extents++;
258 return ret;
259}
260
261static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
262{
Josef Bacik25891f72009-09-11 16:11:20 -0400263 u64 max_bytes;
264 u64 bitmap_bytes;
265 u64 extent_bytes;
Josef Bacik96303082009-07-13 21:29:25 -0400266
267 /*
268 * The goal is to keep the total amount of memory used per 1gb of space
269 * at or below 32k, so we need to adjust how much memory we allow to be
270 * used by extent based free space tracking
271 */
272 max_bytes = MAX_CACHE_BYTES_PER_GIG *
273 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
274
Josef Bacik25891f72009-09-11 16:11:20 -0400275 /*
276 * we want to account for 1 more bitmap than what we have so we can make
277 * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
278 * we add more bitmaps.
279 */
280 bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
Josef Bacik96303082009-07-13 21:29:25 -0400281
Josef Bacik25891f72009-09-11 16:11:20 -0400282 if (bitmap_bytes >= max_bytes) {
283 block_group->extents_thresh = 0;
284 return;
Josef Bacik96303082009-07-13 21:29:25 -0400285 }
Josef Bacik25891f72009-09-11 16:11:20 -0400286
287 /*
288 * we want the extent entry threshold to always be at most 1/2 the maxw
289 * bytes we can have, or whatever is less than that.
290 */
291 extent_bytes = max_bytes - bitmap_bytes;
292 extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
293
294 block_group->extents_thresh =
295 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
Josef Bacik96303082009-07-13 21:29:25 -0400296}
297
Josef Bacik817d52f2009-07-13 21:29:25 -0400298static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
299 struct btrfs_free_space *info, u64 offset,
300 u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -0400301{
302 unsigned long start, end;
303 unsigned long i;
304
Josef Bacik817d52f2009-07-13 21:29:25 -0400305 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
306 end = start + bytes_to_bits(bytes, block_group->sectorsize);
Josef Bacik96303082009-07-13 21:29:25 -0400307 BUG_ON(end > BITS_PER_BITMAP);
308
309 for (i = start; i < end; i++)
310 clear_bit(i, info->bitmap);
311
312 info->bytes -= bytes;
Josef Bacik817d52f2009-07-13 21:29:25 -0400313 block_group->free_space -= bytes;
Josef Bacik96303082009-07-13 21:29:25 -0400314}
315
Josef Bacik817d52f2009-07-13 21:29:25 -0400316static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
317 struct btrfs_free_space *info, u64 offset,
318 u64 bytes)
Josef Bacik96303082009-07-13 21:29:25 -0400319{
320 unsigned long start, end;
321 unsigned long i;
322
Josef Bacik817d52f2009-07-13 21:29:25 -0400323 start = offset_to_bit(info->offset, block_group->sectorsize, offset);
324 end = start + bytes_to_bits(bytes, block_group->sectorsize);
Josef Bacik96303082009-07-13 21:29:25 -0400325 BUG_ON(end > BITS_PER_BITMAP);
326
327 for (i = start; i < end; i++)
328 set_bit(i, info->bitmap);
329
330 info->bytes += bytes;
Josef Bacik817d52f2009-07-13 21:29:25 -0400331 block_group->free_space += bytes;
Josef Bacik96303082009-07-13 21:29:25 -0400332}
333
334static int search_bitmap(struct btrfs_block_group_cache *block_group,
335 struct btrfs_free_space *bitmap_info, u64 *offset,
336 u64 *bytes)
337{
338 unsigned long found_bits = 0;
339 unsigned long bits, i;
340 unsigned long next_zero;
341
342 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
343 max_t(u64, *offset, bitmap_info->offset));
344 bits = bytes_to_bits(*bytes, block_group->sectorsize);
345
346 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
347 i < BITS_PER_BITMAP;
348 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
349 next_zero = find_next_zero_bit(bitmap_info->bitmap,
350 BITS_PER_BITMAP, i);
351 if ((next_zero - i) >= bits) {
352 found_bits = next_zero - i;
353 break;
354 }
355 i = next_zero;
356 }
357
358 if (found_bits) {
359 *offset = (u64)(i * block_group->sectorsize) +
360 bitmap_info->offset;
361 *bytes = (u64)(found_bits) * block_group->sectorsize;
362 return 0;
363 }
364
365 return -1;
366}
367
368static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
369 *block_group, u64 *offset,
370 u64 *bytes, int debug)
371{
372 struct btrfs_free_space *entry;
373 struct rb_node *node;
374 int ret;
375
376 if (!block_group->free_space_offset.rb_node)
377 return NULL;
378
379 entry = tree_search_offset(block_group,
380 offset_to_bitmap(block_group, *offset),
381 0, 1);
382 if (!entry)
383 return NULL;
384
385 for (node = &entry->offset_index; node; node = rb_next(node)) {
386 entry = rb_entry(node, struct btrfs_free_space, offset_index);
387 if (entry->bytes < *bytes)
388 continue;
389
390 if (entry->bitmap) {
391 ret = search_bitmap(block_group, entry, offset, bytes);
392 if (!ret)
393 return entry;
394 continue;
395 }
396
397 *offset = entry->offset;
398 *bytes = entry->bytes;
399 return entry;
400 }
401
402 return NULL;
403}
404
405static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
406 struct btrfs_free_space *info, u64 offset)
407{
408 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
409 int max_bitmaps = (int)div64_u64(block_group->key.offset +
410 bytes_per_bg - 1, bytes_per_bg);
411 BUG_ON(block_group->total_bitmaps >= max_bitmaps);
412
413 info->offset = offset_to_bitmap(block_group, offset);
Josef Bacikf019f422009-09-11 16:11:20 -0400414 info->bytes = 0;
Josef Bacik96303082009-07-13 21:29:25 -0400415 link_free_space(block_group, info);
416 block_group->total_bitmaps++;
417
418 recalculate_thresholds(block_group);
419}
420
421static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
422 struct btrfs_free_space *bitmap_info,
423 u64 *offset, u64 *bytes)
424{
425 u64 end;
Josef Bacik6606bb92009-07-31 11:03:58 -0400426 u64 search_start, search_bytes;
427 int ret;
Josef Bacik96303082009-07-13 21:29:25 -0400428
429again:
430 end = bitmap_info->offset +
431 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
432
Josef Bacik6606bb92009-07-31 11:03:58 -0400433 /*
434 * XXX - this can go away after a few releases.
435 *
436 * since the only user of btrfs_remove_free_space is the tree logging
437 * stuff, and the only way to test that is under crash conditions, we
438 * want to have this debug stuff here just in case somethings not
439 * working. Search the bitmap for the space we are trying to use to
440 * make sure its actually there. If its not there then we need to stop
441 * because something has gone wrong.
442 */
443 search_start = *offset;
444 search_bytes = *bytes;
445 ret = search_bitmap(block_group, bitmap_info, &search_start,
446 &search_bytes);
447 BUG_ON(ret < 0 || search_start != *offset);
448
Josef Bacik96303082009-07-13 21:29:25 -0400449 if (*offset > bitmap_info->offset && *offset + *bytes > end) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400450 bitmap_clear_bits(block_group, bitmap_info, *offset,
451 end - *offset + 1);
Josef Bacik96303082009-07-13 21:29:25 -0400452 *bytes -= end - *offset + 1;
453 *offset = end + 1;
454 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400455 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
Josef Bacik96303082009-07-13 21:29:25 -0400456 *bytes = 0;
457 }
458
459 if (*bytes) {
Josef Bacik6606bb92009-07-31 11:03:58 -0400460 struct rb_node *next = rb_next(&bitmap_info->offset_index);
Josef Bacik96303082009-07-13 21:29:25 -0400461 if (!bitmap_info->bytes) {
462 unlink_free_space(block_group, bitmap_info);
463 kfree(bitmap_info->bitmap);
464 kfree(bitmap_info);
465 block_group->total_bitmaps--;
466 recalculate_thresholds(block_group);
467 }
468
Josef Bacik6606bb92009-07-31 11:03:58 -0400469 /*
470 * no entry after this bitmap, but we still have bytes to
471 * remove, so something has gone wrong.
472 */
473 if (!next)
Josef Bacik96303082009-07-13 21:29:25 -0400474 return -EINVAL;
475
Josef Bacik6606bb92009-07-31 11:03:58 -0400476 bitmap_info = rb_entry(next, struct btrfs_free_space,
477 offset_index);
478
479 /*
480 * if the next entry isn't a bitmap we need to return to let the
481 * extent stuff do its work.
482 */
Josef Bacik96303082009-07-13 21:29:25 -0400483 if (!bitmap_info->bitmap)
484 return -EAGAIN;
485
Josef Bacik6606bb92009-07-31 11:03:58 -0400486 /*
487 * Ok the next item is a bitmap, but it may not actually hold
488 * the information for the rest of this free space stuff, so
489 * look for it, and if we don't find it return so we can try
490 * everything over again.
491 */
492 search_start = *offset;
493 search_bytes = *bytes;
494 ret = search_bitmap(block_group, bitmap_info, &search_start,
495 &search_bytes);
496 if (ret < 0 || search_start != *offset)
497 return -EAGAIN;
498
Josef Bacik96303082009-07-13 21:29:25 -0400499 goto again;
500 } else if (!bitmap_info->bytes) {
501 unlink_free_space(block_group, bitmap_info);
502 kfree(bitmap_info->bitmap);
503 kfree(bitmap_info);
504 block_group->total_bitmaps--;
505 recalculate_thresholds(block_group);
506 }
507
508 return 0;
509}
510
511static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
512 struct btrfs_free_space *info)
513{
514 struct btrfs_free_space *bitmap_info;
515 int added = 0;
516 u64 bytes, offset, end;
517 int ret;
518
519 /*
520 * If we are below the extents threshold then we can add this as an
521 * extent, and don't have to deal with the bitmap
522 */
523 if (block_group->free_extents < block_group->extents_thresh &&
524 info->bytes > block_group->sectorsize * 4)
525 return 0;
526
527 /*
528 * some block groups are so tiny they can't be enveloped by a bitmap, so
529 * don't even bother to create a bitmap for this
530 */
531 if (BITS_PER_BITMAP * block_group->sectorsize >
532 block_group->key.offset)
533 return 0;
534
535 bytes = info->bytes;
536 offset = info->offset;
537
538again:
539 bitmap_info = tree_search_offset(block_group,
540 offset_to_bitmap(block_group, offset),
541 1, 0);
542 if (!bitmap_info) {
543 BUG_ON(added);
544 goto new_bitmap;
545 }
546
547 end = bitmap_info->offset +
548 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
549
550 if (offset >= bitmap_info->offset && offset + bytes > end) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400551 bitmap_set_bits(block_group, bitmap_info, offset,
552 end - offset);
Josef Bacik96303082009-07-13 21:29:25 -0400553 bytes -= end - offset;
554 offset = end;
555 added = 0;
556 } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400557 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
Josef Bacik96303082009-07-13 21:29:25 -0400558 bytes = 0;
559 } else {
560 BUG();
561 }
562
563 if (!bytes) {
564 ret = 1;
565 goto out;
566 } else
567 goto again;
568
569new_bitmap:
570 if (info && info->bitmap) {
571 add_new_bitmap(block_group, info, offset);
572 added = 1;
573 info = NULL;
574 goto again;
575 } else {
576 spin_unlock(&block_group->tree_lock);
577
578 /* no pre-allocated info, allocate a new one */
579 if (!info) {
580 info = kzalloc(sizeof(struct btrfs_free_space),
581 GFP_NOFS);
582 if (!info) {
583 spin_lock(&block_group->tree_lock);
584 ret = -ENOMEM;
585 goto out;
586 }
587 }
588
589 /* allocate the bitmap */
590 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
591 spin_lock(&block_group->tree_lock);
592 if (!info->bitmap) {
593 ret = -ENOMEM;
594 goto out;
595 }
596 goto again;
597 }
598
599out:
600 if (info) {
601 if (info->bitmap)
602 kfree(info->bitmap);
603 kfree(info);
604 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400605
606 return ret;
607}
608
Josef Bacik6226cb02009-04-03 10:14:18 -0400609int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
610 u64 offset, u64 bytes)
Josef Bacik0f9dd462008-09-23 13:14:11 -0400611{
Josef Bacik96303082009-07-13 21:29:25 -0400612 struct btrfs_free_space *right_info = NULL;
613 struct btrfs_free_space *left_info = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400614 struct btrfs_free_space *info = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400615 int ret = 0;
616
Josef Bacik6226cb02009-04-03 10:14:18 -0400617 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
618 if (!info)
619 return -ENOMEM;
620
621 info->offset = offset;
622 info->bytes = bytes;
623
624 spin_lock(&block_group->tree_lock);
625
Josef Bacik0f9dd462008-09-23 13:14:11 -0400626 /*
627 * first we want to see if there is free space adjacent to the range we
628 * are adding, if there is remove that struct and add a new one to
629 * cover the entire range
630 */
Josef Bacik96303082009-07-13 21:29:25 -0400631 right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
632 if (right_info && rb_prev(&right_info->offset_index))
633 left_info = rb_entry(rb_prev(&right_info->offset_index),
634 struct btrfs_free_space, offset_index);
635 else
636 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400637
Josef Bacik96303082009-07-13 21:29:25 -0400638 /*
639 * If there was no extent directly to the left or right of this new
640 * extent then we know we're going to have to allocate a new extent, so
641 * before we do that see if we need to drop this into a bitmap
642 */
643 if ((!left_info || left_info->bitmap) &&
644 (!right_info || right_info->bitmap)) {
645 ret = insert_into_bitmap(block_group, info);
646
647 if (ret < 0) {
648 goto out;
649 } else if (ret) {
650 ret = 0;
651 goto out;
652 }
653 }
654
655 if (right_info && !right_info->bitmap) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400656 unlink_free_space(block_group, right_info);
Josef Bacik6226cb02009-04-03 10:14:18 -0400657 info->bytes += right_info->bytes;
658 kfree(right_info);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400659 }
660
Josef Bacik96303082009-07-13 21:29:25 -0400661 if (left_info && !left_info->bitmap &&
662 left_info->offset + left_info->bytes == offset) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400663 unlink_free_space(block_group, left_info);
Josef Bacik6226cb02009-04-03 10:14:18 -0400664 info->offset = left_info->offset;
665 info->bytes += left_info->bytes;
666 kfree(left_info);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400667 }
668
Josef Bacik0f9dd462008-09-23 13:14:11 -0400669 ret = link_free_space(block_group, info);
670 if (ret)
671 kfree(info);
Josef Bacik96303082009-07-13 21:29:25 -0400672out:
Josef Bacik6226cb02009-04-03 10:14:18 -0400673 spin_unlock(&block_group->tree_lock);
674
Josef Bacik0f9dd462008-09-23 13:14:11 -0400675 if (ret) {
Josef Bacik96303082009-07-13 21:29:25 -0400676 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -0400677 BUG_ON(ret == -EEXIST);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400678 }
679
Josef Bacik0f9dd462008-09-23 13:14:11 -0400680 return ret;
681}
682
Josef Bacik6226cb02009-04-03 10:14:18 -0400683int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
684 u64 offset, u64 bytes)
Josef Bacik0f9dd462008-09-23 13:14:11 -0400685{
686 struct btrfs_free_space *info;
Josef Bacik96303082009-07-13 21:29:25 -0400687 struct btrfs_free_space *next_info = NULL;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400688 int ret = 0;
689
Josef Bacik6226cb02009-04-03 10:14:18 -0400690 spin_lock(&block_group->tree_lock);
691
Josef Bacik96303082009-07-13 21:29:25 -0400692again:
693 info = tree_search_offset(block_group, offset, 0, 0);
694 if (!info) {
Josef Bacik6606bb92009-07-31 11:03:58 -0400695 /*
696 * oops didn't find an extent that matched the space we wanted
697 * to remove, look for a bitmap instead
698 */
699 info = tree_search_offset(block_group,
700 offset_to_bitmap(block_group, offset),
701 1, 0);
702 if (!info) {
703 WARN_ON(1);
704 goto out_lock;
705 }
Josef Bacik96303082009-07-13 21:29:25 -0400706 }
707
708 if (info->bytes < bytes && rb_next(&info->offset_index)) {
709 u64 end;
710 next_info = rb_entry(rb_next(&info->offset_index),
711 struct btrfs_free_space,
712 offset_index);
713
714 if (next_info->bitmap)
715 end = next_info->offset + BITS_PER_BITMAP *
716 block_group->sectorsize - 1;
717 else
718 end = next_info->offset + next_info->bytes;
719
720 if (next_info->bytes < bytes ||
721 next_info->offset > offset || offset > end) {
722 printk(KERN_CRIT "Found free space at %llu, size %llu,"
723 " trying to use %llu\n",
724 (unsigned long long)info->offset,
725 (unsigned long long)info->bytes,
726 (unsigned long long)bytes);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400727 WARN_ON(1);
728 ret = -EINVAL;
Josef Bacik96303082009-07-13 21:29:25 -0400729 goto out_lock;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400730 }
Josef Bacik96303082009-07-13 21:29:25 -0400731
732 info = next_info;
733 }
734
735 if (info->bytes == bytes) {
Josef Bacik0f9dd462008-09-23 13:14:11 -0400736 unlink_free_space(block_group, info);
Josef Bacik96303082009-07-13 21:29:25 -0400737 if (info->bitmap) {
738 kfree(info->bitmap);
739 block_group->total_bitmaps--;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400740 }
Josef Bacik96303082009-07-13 21:29:25 -0400741 kfree(info);
742 goto out_lock;
743 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400744
Josef Bacik96303082009-07-13 21:29:25 -0400745 if (!info->bitmap && info->offset == offset) {
746 unlink_free_space(block_group, info);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400747 info->offset += bytes;
748 info->bytes -= bytes;
Josef Bacik96303082009-07-13 21:29:25 -0400749 link_free_space(block_group, info);
750 goto out_lock;
751 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400752
Josef Bacik96303082009-07-13 21:29:25 -0400753 if (!info->bitmap && info->offset <= offset &&
754 info->offset + info->bytes >= offset + bytes) {
Chris Mason9b49c9b2008-09-24 11:23:25 -0400755 u64 old_start = info->offset;
756 /*
757 * we're freeing space in the middle of the info,
758 * this can happen during tree log replay
759 *
760 * first unlink the old info and then
761 * insert it again after the hole we're creating
762 */
763 unlink_free_space(block_group, info);
764 if (offset + bytes < info->offset + info->bytes) {
765 u64 old_end = info->offset + info->bytes;
766
767 info->offset = offset + bytes;
768 info->bytes = old_end - info->offset;
769 ret = link_free_space(block_group, info);
Josef Bacik96303082009-07-13 21:29:25 -0400770 WARN_ON(ret);
771 if (ret)
772 goto out_lock;
Chris Mason9b49c9b2008-09-24 11:23:25 -0400773 } else {
774 /* the hole we're creating ends at the end
775 * of the info struct, just free the info
776 */
777 kfree(info);
778 }
Josef Bacik6226cb02009-04-03 10:14:18 -0400779 spin_unlock(&block_group->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -0400780
781 /* step two, insert a new info struct to cover
782 * anything before the hole
Chris Mason9b49c9b2008-09-24 11:23:25 -0400783 */
Josef Bacik6226cb02009-04-03 10:14:18 -0400784 ret = btrfs_add_free_space(block_group, old_start,
785 offset - old_start);
Josef Bacik96303082009-07-13 21:29:25 -0400786 WARN_ON(ret);
787 goto out;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400788 }
Josef Bacik96303082009-07-13 21:29:25 -0400789
790 ret = remove_from_bitmap(block_group, info, &offset, &bytes);
791 if (ret == -EAGAIN)
792 goto again;
793 BUG_ON(ret);
794out_lock:
795 spin_unlock(&block_group->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400796out:
Josef Bacik25179202008-10-29 14:49:05 -0400797 return ret;
798}
799
Josef Bacik0f9dd462008-09-23 13:14:11 -0400800void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
801 u64 bytes)
802{
803 struct btrfs_free_space *info;
804 struct rb_node *n;
805 int count = 0;
806
807 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
808 info = rb_entry(n, struct btrfs_free_space, offset_index);
809 if (info->bytes >= bytes)
810 count++;
Josef Bacik96303082009-07-13 21:29:25 -0400811 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
Joel Becker21380932009-04-21 12:38:29 -0700812 (unsigned long long)info->offset,
Josef Bacik96303082009-07-13 21:29:25 -0400813 (unsigned long long)info->bytes,
814 (info->bitmap) ? "yes" : "no");
Josef Bacik0f9dd462008-09-23 13:14:11 -0400815 }
Josef Bacik96303082009-07-13 21:29:25 -0400816 printk(KERN_INFO "block group has cluster?: %s\n",
817 list_empty(&block_group->cluster_list) ? "no" : "yes");
Josef Bacik0f9dd462008-09-23 13:14:11 -0400818 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
819 "\n", count);
820}
821
822u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
823{
824 struct btrfs_free_space *info;
825 struct rb_node *n;
826 u64 ret = 0;
827
828 for (n = rb_first(&block_group->free_space_offset); n;
829 n = rb_next(n)) {
830 info = rb_entry(n, struct btrfs_free_space, offset_index);
831 ret += info->bytes;
832 }
833
834 return ret;
835}
836
Chris Masonfa9c0d792009-04-03 09:47:43 -0400837/*
838 * for a given cluster, put all of its extents back into the free
839 * space cache. If the block group passed doesn't match the block group
840 * pointed to by the cluster, someone else raced in and freed the
841 * cluster already. In that case, we just return without changing anything
842 */
843static int
844__btrfs_return_cluster_to_free_space(
845 struct btrfs_block_group_cache *block_group,
846 struct btrfs_free_cluster *cluster)
847{
848 struct btrfs_free_space *entry;
849 struct rb_node *node;
Josef Bacik96303082009-07-13 21:29:25 -0400850 bool bitmap;
Chris Masonfa9c0d792009-04-03 09:47:43 -0400851
852 spin_lock(&cluster->lock);
853 if (cluster->block_group != block_group)
854 goto out;
855
Josef Bacik96303082009-07-13 21:29:25 -0400856 bitmap = cluster->points_to_bitmap;
857 cluster->block_group = NULL;
Chris Masonfa9c0d792009-04-03 09:47:43 -0400858 cluster->window_start = 0;
Josef Bacik96303082009-07-13 21:29:25 -0400859 list_del_init(&cluster->block_group_list);
860 cluster->points_to_bitmap = false;
861
862 if (bitmap)
863 goto out;
864
Chris Masonfa9c0d792009-04-03 09:47:43 -0400865 node = rb_first(&cluster->root);
Josef Bacik96303082009-07-13 21:29:25 -0400866 while (node) {
Chris Masonfa9c0d792009-04-03 09:47:43 -0400867 entry = rb_entry(node, struct btrfs_free_space, offset_index);
868 node = rb_next(&entry->offset_index);
869 rb_erase(&entry->offset_index, &cluster->root);
Josef Bacik96303082009-07-13 21:29:25 -0400870 BUG_ON(entry->bitmap);
871 tree_insert_offset(&block_group->free_space_offset,
872 entry->offset, &entry->offset_index, 0);
Chris Masonfa9c0d792009-04-03 09:47:43 -0400873 }
Eric Paris6bef4d32010-02-23 19:43:04 +0000874 cluster->root = RB_ROOT;
Josef Bacik96303082009-07-13 21:29:25 -0400875
Chris Masonfa9c0d792009-04-03 09:47:43 -0400876out:
877 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -0400878 btrfs_put_block_group(block_group);
Chris Masonfa9c0d792009-04-03 09:47:43 -0400879 return 0;
880}
881
Josef Bacik0f9dd462008-09-23 13:14:11 -0400882void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
883{
884 struct btrfs_free_space *info;
885 struct rb_node *node;
Chris Masonfa9c0d792009-04-03 09:47:43 -0400886 struct btrfs_free_cluster *cluster;
Josef Bacik96303082009-07-13 21:29:25 -0400887 struct list_head *head;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400888
Josef Bacik6226cb02009-04-03 10:14:18 -0400889 spin_lock(&block_group->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -0400890 while ((head = block_group->cluster_list.next) !=
891 &block_group->cluster_list) {
892 cluster = list_entry(head, struct btrfs_free_cluster,
893 block_group_list);
Chris Masonfa9c0d792009-04-03 09:47:43 -0400894
895 WARN_ON(cluster->block_group != block_group);
896 __btrfs_return_cluster_to_free_space(block_group, cluster);
Josef Bacik96303082009-07-13 21:29:25 -0400897 if (need_resched()) {
898 spin_unlock(&block_group->tree_lock);
899 cond_resched();
900 spin_lock(&block_group->tree_lock);
901 }
Chris Masonfa9c0d792009-04-03 09:47:43 -0400902 }
903
Josef Bacik96303082009-07-13 21:29:25 -0400904 while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
905 info = rb_entry(node, struct btrfs_free_space, offset_index);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400906 unlink_free_space(block_group, info);
Josef Bacik96303082009-07-13 21:29:25 -0400907 if (info->bitmap)
908 kfree(info->bitmap);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400909 kfree(info);
910 if (need_resched()) {
Josef Bacik6226cb02009-04-03 10:14:18 -0400911 spin_unlock(&block_group->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400912 cond_resched();
Josef Bacik6226cb02009-04-03 10:14:18 -0400913 spin_lock(&block_group->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400914 }
915 }
Josef Bacik96303082009-07-13 21:29:25 -0400916
Josef Bacik6226cb02009-04-03 10:14:18 -0400917 spin_unlock(&block_group->tree_lock);
Josef Bacik0f9dd462008-09-23 13:14:11 -0400918}
919
Josef Bacik6226cb02009-04-03 10:14:18 -0400920u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
921 u64 offset, u64 bytes, u64 empty_size)
Josef Bacik0f9dd462008-09-23 13:14:11 -0400922{
Josef Bacik6226cb02009-04-03 10:14:18 -0400923 struct btrfs_free_space *entry = NULL;
Josef Bacik96303082009-07-13 21:29:25 -0400924 u64 bytes_search = bytes + empty_size;
Josef Bacik6226cb02009-04-03 10:14:18 -0400925 u64 ret = 0;
Josef Bacik0f9dd462008-09-23 13:14:11 -0400926
Josef Bacik6226cb02009-04-03 10:14:18 -0400927 spin_lock(&block_group->tree_lock);
Josef Bacik96303082009-07-13 21:29:25 -0400928 entry = find_free_space(block_group, &offset, &bytes_search, 0);
Josef Bacik6226cb02009-04-03 10:14:18 -0400929 if (!entry)
Josef Bacik96303082009-07-13 21:29:25 -0400930 goto out;
931
932 ret = offset;
933 if (entry->bitmap) {
Josef Bacik817d52f2009-07-13 21:29:25 -0400934 bitmap_clear_bits(block_group, entry, offset, bytes);
Josef Bacik96303082009-07-13 21:29:25 -0400935 if (!entry->bytes) {
936 unlink_free_space(block_group, entry);
937 kfree(entry->bitmap);
938 kfree(entry);
939 block_group->total_bitmaps--;
940 recalculate_thresholds(block_group);
941 }
942 } else {
Josef Bacik6226cb02009-04-03 10:14:18 -0400943 unlink_free_space(block_group, entry);
Josef Bacik6226cb02009-04-03 10:14:18 -0400944 entry->offset += bytes;
945 entry->bytes -= bytes;
Josef Bacik6226cb02009-04-03 10:14:18 -0400946 if (!entry->bytes)
947 kfree(entry);
948 else
949 link_free_space(block_group, entry);
950 }
Josef Bacik0f9dd462008-09-23 13:14:11 -0400951
Josef Bacik96303082009-07-13 21:29:25 -0400952out:
953 spin_unlock(&block_group->tree_lock);
Josef Bacik817d52f2009-07-13 21:29:25 -0400954
Josef Bacik0f9dd462008-09-23 13:14:11 -0400955 return ret;
956}
Chris Masonfa9c0d792009-04-03 09:47:43 -0400957
958/*
959 * given a cluster, put all of its extents back into the free space
960 * cache. If a block group is passed, this function will only free
961 * a cluster that belongs to the passed block group.
962 *
963 * Otherwise, it'll get a reference on the block group pointed to by the
964 * cluster and remove the cluster from it.
965 */
966int btrfs_return_cluster_to_free_space(
967 struct btrfs_block_group_cache *block_group,
968 struct btrfs_free_cluster *cluster)
969{
970 int ret;
971
972 /* first, get a safe pointer to the block group */
973 spin_lock(&cluster->lock);
974 if (!block_group) {
975 block_group = cluster->block_group;
976 if (!block_group) {
977 spin_unlock(&cluster->lock);
978 return 0;
979 }
980 } else if (cluster->block_group != block_group) {
981 /* someone else has already freed it don't redo their work */
982 spin_unlock(&cluster->lock);
983 return 0;
984 }
985 atomic_inc(&block_group->count);
986 spin_unlock(&cluster->lock);
987
988 /* now return any extents the cluster had on it */
989 spin_lock(&block_group->tree_lock);
990 ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
991 spin_unlock(&block_group->tree_lock);
992
993 /* finally drop our ref */
994 btrfs_put_block_group(block_group);
995 return ret;
996}
997
Josef Bacik96303082009-07-13 21:29:25 -0400998static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
999 struct btrfs_free_cluster *cluster,
1000 u64 bytes, u64 min_start)
1001{
1002 struct btrfs_free_space *entry;
1003 int err;
1004 u64 search_start = cluster->window_start;
1005 u64 search_bytes = bytes;
1006 u64 ret = 0;
1007
1008 spin_lock(&block_group->tree_lock);
1009 spin_lock(&cluster->lock);
1010
1011 if (!cluster->points_to_bitmap)
1012 goto out;
1013
1014 if (cluster->block_group != block_group)
1015 goto out;
1016
Josef Bacik6606bb92009-07-31 11:03:58 -04001017 /*
1018 * search_start is the beginning of the bitmap, but at some point it may
1019 * be a good idea to point to the actual start of the free area in the
1020 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1021 * to 1 to make sure we get the bitmap entry
1022 */
1023 entry = tree_search_offset(block_group,
1024 offset_to_bitmap(block_group, search_start),
1025 1, 0);
Josef Bacik96303082009-07-13 21:29:25 -04001026 if (!entry || !entry->bitmap)
1027 goto out;
1028
1029 search_start = min_start;
1030 search_bytes = bytes;
1031
1032 err = search_bitmap(block_group, entry, &search_start,
1033 &search_bytes);
1034 if (err)
1035 goto out;
1036
1037 ret = search_start;
Josef Bacik817d52f2009-07-13 21:29:25 -04001038 bitmap_clear_bits(block_group, entry, ret, bytes);
Josef Bacik96303082009-07-13 21:29:25 -04001039out:
1040 spin_unlock(&cluster->lock);
1041 spin_unlock(&block_group->tree_lock);
1042
1043 return ret;
1044}
1045
Chris Masonfa9c0d792009-04-03 09:47:43 -04001046/*
1047 * given a cluster, try to allocate 'bytes' from it, returns 0
1048 * if it couldn't find anything suitably large, or a logical disk offset
1049 * if things worked out
1050 */
1051u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1052 struct btrfs_free_cluster *cluster, u64 bytes,
1053 u64 min_start)
1054{
1055 struct btrfs_free_space *entry = NULL;
1056 struct rb_node *node;
1057 u64 ret = 0;
1058
Josef Bacik96303082009-07-13 21:29:25 -04001059 if (cluster->points_to_bitmap)
1060 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1061 min_start);
1062
Chris Masonfa9c0d792009-04-03 09:47:43 -04001063 spin_lock(&cluster->lock);
1064 if (bytes > cluster->max_size)
1065 goto out;
1066
1067 if (cluster->block_group != block_group)
1068 goto out;
1069
1070 node = rb_first(&cluster->root);
1071 if (!node)
1072 goto out;
1073
1074 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1075
1076 while(1) {
1077 if (entry->bytes < bytes || entry->offset < min_start) {
1078 struct rb_node *node;
1079
1080 node = rb_next(&entry->offset_index);
1081 if (!node)
1082 break;
1083 entry = rb_entry(node, struct btrfs_free_space,
1084 offset_index);
1085 continue;
1086 }
1087 ret = entry->offset;
1088
1089 entry->offset += bytes;
1090 entry->bytes -= bytes;
1091
1092 if (entry->bytes == 0) {
1093 rb_erase(&entry->offset_index, &cluster->root);
1094 kfree(entry);
1095 }
1096 break;
1097 }
1098out:
1099 spin_unlock(&cluster->lock);
Josef Bacik96303082009-07-13 21:29:25 -04001100
Chris Masonfa9c0d792009-04-03 09:47:43 -04001101 return ret;
1102}
1103
Josef Bacik96303082009-07-13 21:29:25 -04001104static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1105 struct btrfs_free_space *entry,
1106 struct btrfs_free_cluster *cluster,
1107 u64 offset, u64 bytes, u64 min_bytes)
1108{
1109 unsigned long next_zero;
1110 unsigned long i;
1111 unsigned long search_bits;
1112 unsigned long total_bits;
1113 unsigned long found_bits;
1114 unsigned long start = 0;
1115 unsigned long total_found = 0;
1116 bool found = false;
1117
1118 i = offset_to_bit(entry->offset, block_group->sectorsize,
1119 max_t(u64, offset, entry->offset));
1120 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1121 total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1122
1123again:
1124 found_bits = 0;
1125 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1126 i < BITS_PER_BITMAP;
1127 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1128 next_zero = find_next_zero_bit(entry->bitmap,
1129 BITS_PER_BITMAP, i);
1130 if (next_zero - i >= search_bits) {
1131 found_bits = next_zero - i;
1132 break;
1133 }
1134 i = next_zero;
1135 }
1136
1137 if (!found_bits)
1138 return -1;
1139
1140 if (!found) {
1141 start = i;
1142 found = true;
1143 }
1144
1145 total_found += found_bits;
1146
1147 if (cluster->max_size < found_bits * block_group->sectorsize)
1148 cluster->max_size = found_bits * block_group->sectorsize;
1149
1150 if (total_found < total_bits) {
1151 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1152 if (i - start > total_bits * 2) {
1153 total_found = 0;
1154 cluster->max_size = 0;
1155 found = false;
1156 }
1157 goto again;
1158 }
1159
1160 cluster->window_start = start * block_group->sectorsize +
1161 entry->offset;
1162 cluster->points_to_bitmap = true;
1163
1164 return 0;
1165}
1166
Chris Masonfa9c0d792009-04-03 09:47:43 -04001167/*
1168 * here we try to find a cluster of blocks in a block group. The goal
1169 * is to find at least bytes free and up to empty_size + bytes free.
1170 * We might not find them all in one contiguous area.
1171 *
1172 * returns zero and sets up cluster if things worked out, otherwise
1173 * it returns -enospc
1174 */
1175int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
Chris Mason451d7582009-06-09 20:28:34 -04001176 struct btrfs_root *root,
Chris Masonfa9c0d792009-04-03 09:47:43 -04001177 struct btrfs_block_group_cache *block_group,
1178 struct btrfs_free_cluster *cluster,
1179 u64 offset, u64 bytes, u64 empty_size)
1180{
1181 struct btrfs_free_space *entry = NULL;
1182 struct rb_node *node;
1183 struct btrfs_free_space *next;
Josef Bacik96303082009-07-13 21:29:25 -04001184 struct btrfs_free_space *last = NULL;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001185 u64 min_bytes;
1186 u64 window_start;
1187 u64 window_free;
1188 u64 max_extent = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001189 bool found_bitmap = false;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001190 int ret;
1191
1192 /* for metadata, allow allocates with more holes */
Chris Mason451d7582009-06-09 20:28:34 -04001193 if (btrfs_test_opt(root, SSD_SPREAD)) {
1194 min_bytes = bytes + empty_size;
1195 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04001196 /*
1197 * we want to do larger allocations when we are
1198 * flushing out the delayed refs, it helps prevent
1199 * making more work as we go along.
1200 */
1201 if (trans->transaction->delayed_refs.flushing)
1202 min_bytes = max(bytes, (bytes + empty_size) >> 1);
1203 else
1204 min_bytes = max(bytes, (bytes + empty_size) >> 4);
1205 } else
1206 min_bytes = max(bytes, (bytes + empty_size) >> 2);
1207
1208 spin_lock(&block_group->tree_lock);
1209 spin_lock(&cluster->lock);
1210
1211 /* someone already found a cluster, hooray */
1212 if (cluster->block_group) {
1213 ret = 0;
1214 goto out;
1215 }
1216again:
Josef Bacik96303082009-07-13 21:29:25 -04001217 entry = tree_search_offset(block_group, offset, found_bitmap, 1);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001218 if (!entry) {
1219 ret = -ENOSPC;
1220 goto out;
1221 }
Josef Bacik96303082009-07-13 21:29:25 -04001222
1223 /*
1224 * If found_bitmap is true, we exhausted our search for extent entries,
1225 * and we just want to search all of the bitmaps that we can find, and
1226 * ignore any extent entries we find.
1227 */
1228 while (entry->bitmap || found_bitmap ||
1229 (!entry->bitmap && entry->bytes < min_bytes)) {
1230 struct rb_node *node = rb_next(&entry->offset_index);
1231
1232 if (entry->bitmap && entry->bytes > bytes + empty_size) {
1233 ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1234 offset, bytes + empty_size,
1235 min_bytes);
1236 if (!ret)
1237 goto got_it;
1238 }
1239
1240 if (!node) {
1241 ret = -ENOSPC;
1242 goto out;
1243 }
1244 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1245 }
1246
1247 /*
1248 * We already searched all the extent entries from the passed in offset
1249 * to the end and didn't find enough space for the cluster, and we also
1250 * didn't find any bitmaps that met our criteria, just go ahead and exit
1251 */
1252 if (found_bitmap) {
1253 ret = -ENOSPC;
1254 goto out;
1255 }
1256
1257 cluster->points_to_bitmap = false;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001258 window_start = entry->offset;
1259 window_free = entry->bytes;
1260 last = entry;
1261 max_extent = entry->bytes;
1262
Josef Bacik96303082009-07-13 21:29:25 -04001263 while (1) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04001264 /* out window is just right, lets fill it */
1265 if (window_free >= bytes + empty_size)
1266 break;
1267
1268 node = rb_next(&last->offset_index);
1269 if (!node) {
Josef Bacik96303082009-07-13 21:29:25 -04001270 if (found_bitmap)
1271 goto again;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001272 ret = -ENOSPC;
1273 goto out;
1274 }
1275 next = rb_entry(node, struct btrfs_free_space, offset_index);
1276
1277 /*
Josef Bacik96303082009-07-13 21:29:25 -04001278 * we found a bitmap, so if this search doesn't result in a
1279 * cluster, we know to go and search again for the bitmaps and
1280 * start looking for space there
1281 */
1282 if (next->bitmap) {
1283 if (!found_bitmap)
1284 offset = next->offset;
1285 found_bitmap = true;
1286 last = next;
1287 continue;
1288 }
1289
1290 /*
Chris Masonfa9c0d792009-04-03 09:47:43 -04001291 * we haven't filled the empty size and the window is
1292 * very large. reset and try again
1293 */
Chris Masonc6044802009-06-09 18:35:15 -04001294 if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
1295 next->offset - window_start > (bytes + empty_size) * 2) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04001296 entry = next;
1297 window_start = entry->offset;
1298 window_free = entry->bytes;
1299 last = entry;
Josef Bacik01dea1e2009-11-10 21:23:48 -05001300 max_extent = entry->bytes;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001301 } else {
1302 last = next;
1303 window_free += next->bytes;
1304 if (entry->bytes > max_extent)
1305 max_extent = entry->bytes;
1306 }
1307 }
1308
1309 cluster->window_start = entry->offset;
1310
1311 /*
1312 * now we've found our entries, pull them out of the free space
1313 * cache and put them into the cluster rbtree
1314 *
1315 * The cluster includes an rbtree, but only uses the offset index
1316 * of each free space cache entry.
1317 */
Josef Bacik96303082009-07-13 21:29:25 -04001318 while (1) {
Chris Masonfa9c0d792009-04-03 09:47:43 -04001319 node = rb_next(&entry->offset_index);
Josef Bacik96303082009-07-13 21:29:25 -04001320 if (entry->bitmap && node) {
1321 entry = rb_entry(node, struct btrfs_free_space,
1322 offset_index);
1323 continue;
1324 } else if (entry->bitmap && !node) {
1325 break;
1326 }
1327
1328 rb_erase(&entry->offset_index, &block_group->free_space_offset);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001329 ret = tree_insert_offset(&cluster->root, entry->offset,
Josef Bacik96303082009-07-13 21:29:25 -04001330 &entry->offset_index, 0);
Chris Masonfa9c0d792009-04-03 09:47:43 -04001331 BUG_ON(ret);
1332
1333 if (!node || entry == last)
1334 break;
1335
1336 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1337 }
Josef Bacik96303082009-07-13 21:29:25 -04001338
Chris Masonfa9c0d792009-04-03 09:47:43 -04001339 cluster->max_size = max_extent;
Josef Bacik96303082009-07-13 21:29:25 -04001340got_it:
1341 ret = 0;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001342 atomic_inc(&block_group->count);
1343 list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
1344 cluster->block_group = block_group;
1345out:
1346 spin_unlock(&cluster->lock);
1347 spin_unlock(&block_group->tree_lock);
1348
1349 return ret;
1350}
1351
1352/*
1353 * simple code to zero out a cluster
1354 */
1355void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1356{
1357 spin_lock_init(&cluster->lock);
1358 spin_lock_init(&cluster->refill_lock);
Eric Paris6bef4d32010-02-23 19:43:04 +00001359 cluster->root = RB_ROOT;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001360 cluster->max_size = 0;
Josef Bacik96303082009-07-13 21:29:25 -04001361 cluster->points_to_bitmap = false;
Chris Masonfa9c0d792009-04-03 09:47:43 -04001362 INIT_LIST_HEAD(&cluster->block_group_list);
1363 cluster->block_group = NULL;
1364}
1365