blob: 53be251585603067c515f14bef2109b4ca34b64a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/ext2/balloc.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 * Big-endian to little-endian byte-swapping/bitmaps by
11 * David S. Miller (davem@caip.rutgers.edu), 1995
12 */
13
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include "ext2.h"
15#include <linux/quotaops.h>
16#include <linux/sched.h>
17#include <linux/buffer_head.h>
Randy Dunlap16f7e0f2006-01-11 12:17:46 -080018#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019
20/*
21 * balloc.c contains the blocks allocation and deallocation routines
22 */
23
24/*
25 * The free blocks are managed by bitmaps. A file system contains several
26 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
27 * block for inodes, N blocks for the inode table and data blocks.
28 *
29 * The file system contains group descriptors which are located after the
30 * super block. Each descriptor contains the number of the bitmap block and
31 * the free blocks count in the block. The descriptors are loaded in memory
Aneesh Kumar K.Ve6274322007-02-20 13:57:58 -080032 * when a file system is mounted (see ext2_fill_super).
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 */
34
35
36#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
37
38struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
39 unsigned int block_group,
40 struct buffer_head ** bh)
41{
42 unsigned long group_desc;
43 unsigned long offset;
44 struct ext2_group_desc * desc;
45 struct ext2_sb_info *sbi = EXT2_SB(sb);
46
47 if (block_group >= sbi->s_groups_count) {
48 ext2_error (sb, "ext2_get_group_desc",
49 "block_group >= groups_count - "
50 "block_group = %d, groups_count = %lu",
51 block_group, sbi->s_groups_count);
52
53 return NULL;
54 }
55
56 group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
57 offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
58 if (!sbi->s_group_desc[group_desc]) {
59 ext2_error (sb, "ext2_get_group_desc",
60 "Group descriptor not loaded - "
61 "block_group = %d, group_desc = %lu, desc = %lu",
62 block_group, group_desc, offset);
63 return NULL;
64 }
65
66 desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
67 if (bh)
68 *bh = sbi->s_group_desc[group_desc];
69 return desc + offset;
70}
71
72/*
73 * Read the bitmap for a given block_group, reading into the specified
74 * slot in the superblock's bitmap cache.
75 *
76 * Return buffer_head on success or NULL in case of failure.
77 */
78static struct buffer_head *
79read_block_bitmap(struct super_block *sb, unsigned int block_group)
80{
81 struct ext2_group_desc * desc;
82 struct buffer_head * bh = NULL;
83
84 desc = ext2_get_group_desc (sb, block_group, NULL);
85 if (!desc)
86 goto error_out;
87 bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
88 if (!bh)
89 ext2_error (sb, "read_block_bitmap",
90 "Cannot read block bitmap - "
91 "block_group = %d, block_bitmap = %u",
92 block_group, le32_to_cpu(desc->bg_block_bitmap));
93error_out:
94 return bh;
95}
96
97/*
98 * Set sb->s_dirt here because the superblock was "logically" altered. We
99 * need to recalculate its free blocks count and flush it out.
100 */
101static int reserve_blocks(struct super_block *sb, int count)
102{
103 struct ext2_sb_info *sbi = EXT2_SB(sb);
104 struct ext2_super_block *es = sbi->s_es;
105 unsigned free_blocks;
106 unsigned root_blocks;
107
108 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
109 root_blocks = le32_to_cpu(es->s_r_blocks_count);
110
111 if (free_blocks < count)
112 count = free_blocks;
113
114 if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
115 sbi->s_resuid != current->fsuid &&
116 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
117 /*
118 * We are too close to reserve and we are not privileged.
119 * Can we allocate anything at all?
120 */
121 if (free_blocks > root_blocks)
122 count = free_blocks - root_blocks;
123 else
124 return 0;
125 }
126
Peter Zijlstraaa0dff22007-10-16 23:25:42 -0700127 percpu_counter_add(&sbi->s_freeblocks_counter, -count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 sb->s_dirt = 1;
129 return count;
130}
131
132static void release_blocks(struct super_block *sb, int count)
133{
134 if (count) {
135 struct ext2_sb_info *sbi = EXT2_SB(sb);
136
Peter Zijlstraaa0dff22007-10-16 23:25:42 -0700137 percpu_counter_add(&sbi->s_freeblocks_counter, count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 sb->s_dirt = 1;
139 }
140}
141
142static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
143 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
144{
145 unsigned free_blocks;
146
147 if (!desc->bg_free_blocks_count)
148 return 0;
149
150 spin_lock(sb_bgl_lock(sbi, group_no));
151 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
152 if (free_blocks < count)
153 count = free_blocks;
154 desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
155 spin_unlock(sb_bgl_lock(sbi, group_no));
156 mark_buffer_dirty(bh);
157 return count;
158}
159
160static void group_release_blocks(struct super_block *sb, int group_no,
161 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
162{
163 if (count) {
164 struct ext2_sb_info *sbi = EXT2_SB(sb);
165 unsigned free_blocks;
166
167 spin_lock(sb_bgl_lock(sbi, group_no));
168 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
169 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
170 spin_unlock(sb_bgl_lock(sbi, group_no));
171 sb->s_dirt = 1;
172 mark_buffer_dirty(bh);
173 }
174}
175
176/* Free given blocks, update quota and i_blocks field */
177void ext2_free_blocks (struct inode * inode, unsigned long block,
178 unsigned long count)
179{
180 struct buffer_head *bitmap_bh = NULL;
181 struct buffer_head * bh2;
182 unsigned long block_group;
183 unsigned long bit;
184 unsigned long i;
185 unsigned long overflow;
186 struct super_block * sb = inode->i_sb;
187 struct ext2_sb_info * sbi = EXT2_SB(sb);
188 struct ext2_group_desc * desc;
189 struct ext2_super_block * es = sbi->s_es;
190 unsigned freed = 0, group_freed;
191
192 if (block < le32_to_cpu(es->s_first_data_block) ||
193 block + count < block ||
194 block + count > le32_to_cpu(es->s_blocks_count)) {
195 ext2_error (sb, "ext2_free_blocks",
196 "Freeing blocks not in datazone - "
197 "block = %lu, count = %lu", block, count);
198 goto error_return;
199 }
200
201 ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
202
203do_more:
204 overflow = 0;
205 block_group = (block - le32_to_cpu(es->s_first_data_block)) /
206 EXT2_BLOCKS_PER_GROUP(sb);
207 bit = (block - le32_to_cpu(es->s_first_data_block)) %
208 EXT2_BLOCKS_PER_GROUP(sb);
209 /*
210 * Check to see if we are freeing blocks across a group
211 * boundary.
212 */
213 if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
214 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
215 count -= overflow;
216 }
217 brelse(bitmap_bh);
218 bitmap_bh = read_block_bitmap(sb, block_group);
219 if (!bitmap_bh)
220 goto error_return;
221
222 desc = ext2_get_group_desc (sb, block_group, &bh2);
223 if (!desc)
224 goto error_return;
225
226 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
227 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
228 in_range (block, le32_to_cpu(desc->bg_inode_table),
229 sbi->s_itb_per_group) ||
230 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
231 sbi->s_itb_per_group))
232 ext2_error (sb, "ext2_free_blocks",
233 "Freeing blocks in system zones - "
234 "Block = %lu, count = %lu",
235 block, count);
236
237 for (i = 0, group_freed = 0; i < count; i++) {
238 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
239 bit + i, bitmap_bh->b_data)) {
240 ext2_error(sb, __FUNCTION__,
241 "bit already cleared for block %lu", block + i);
242 } else {
243 group_freed++;
244 }
245 }
246
247 mark_buffer_dirty(bitmap_bh);
248 if (sb->s_flags & MS_SYNCHRONOUS)
249 sync_dirty_buffer(bitmap_bh);
250
251 group_release_blocks(sb, block_group, desc, bh2, group_freed);
252 freed += group_freed;
253
254 if (overflow) {
255 block += count;
256 count = overflow;
257 goto do_more;
258 }
259error_return:
260 brelse(bitmap_bh);
261 release_blocks(sb, freed);
262 DQUOT_FREE_BLOCK(inode, freed);
263}
264
265static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
266{
267 int k;
268 char *p, *r;
269
270 if (!ext2_test_bit(goal, map))
271 goto got_it;
272
273repeat:
274 if (goal) {
275 /*
276 * The goal was occupied; search forward for a free
277 * block within the next XX blocks.
278 *
279 * end_goal is more or less random, but it has to be
280 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
281 * next 64-bit boundary is simple..
282 */
283 k = (goal + 63) & ~63;
284 goal = ext2_find_next_zero_bit(map, k, goal);
285 if (goal < k)
286 goto got_it;
287 /*
288 * Search in the remainder of the current group.
289 */
290 }
291
292 p = map + (goal >> 3);
293 r = memscan(p, 0, (size - goal + 7) >> 3);
294 k = (r - map) << 3;
295 if (k < size) {
296 /*
297 * We have succeeded in finding a free byte in the block
298 * bitmap. Now search backwards to find the start of this
299 * group of free blocks - won't take more than 7 iterations.
300 */
301 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
302 ;
303 goto got_it;
304 }
305
306 k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
307 if (k < size) {
308 goal = k;
309 goto got_it;
310 }
311 return -1;
312got_it:
313 if (ext2_set_bit_atomic(lock, goal, (void *) map))
314 goto repeat;
315 return goal;
316}
317
318/*
319 * ext2_new_block uses a goal block to assist allocation. If the goal is
320 * free, or there is a free block within 32 blocks of the goal, that block
321 * is allocated. Otherwise a forward search is made for a free block; within
322 * each block group the search first looks for an entire free byte in the block
323 * bitmap, and then for any free bit if that fails.
324 * This function also updates quota and i_blocks field.
325 */
326int ext2_new_block(struct inode *inode, unsigned long goal,
327 u32 *prealloc_count, u32 *prealloc_block, int *err)
328{
329 struct buffer_head *bitmap_bh = NULL;
330 struct buffer_head *gdp_bh; /* bh2 */
331 struct ext2_group_desc *desc;
332 int group_no; /* i */
333 int ret_block; /* j */
334 int group_idx; /* k */
335 int target_block; /* tmp */
336 int block = 0;
337 struct super_block *sb = inode->i_sb;
338 struct ext2_sb_info *sbi = EXT2_SB(sb);
339 struct ext2_super_block *es = sbi->s_es;
340 unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
341 unsigned prealloc_goal = es->s_prealloc_blocks;
342 unsigned group_alloc = 0, es_alloc, dq_alloc;
343 int nr_scanned_groups;
344
345 if (!prealloc_goal--)
346 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
347 if (!prealloc_count || *prealloc_count)
348 prealloc_goal = 0;
349
350 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
351 *err = -EDQUOT;
352 goto out;
353 }
354
355 while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
356 prealloc_goal--;
357
358 dq_alloc = prealloc_goal + 1;
359 es_alloc = reserve_blocks(sb, dq_alloc);
360 if (!es_alloc) {
361 *err = -ENOSPC;
362 goto out_dquot;
363 }
364
365 ext2_debug ("goal=%lu.\n", goal);
366
367 if (goal < le32_to_cpu(es->s_first_data_block) ||
368 goal >= le32_to_cpu(es->s_blocks_count))
369 goal = le32_to_cpu(es->s_first_data_block);
370 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
371 desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
372 if (!desc) {
373 /*
374 * gdp_bh may still be uninitialised. But group_release_blocks
375 * will not touch it because group_alloc is zero.
376 */
377 goto io_error;
378 }
379
380 group_alloc = group_reserve_blocks(sbi, group_no, desc,
381 gdp_bh, es_alloc);
382 if (group_alloc) {
383 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
384 group_size);
385 brelse(bitmap_bh);
386 bitmap_bh = read_block_bitmap(sb, group_no);
387 if (!bitmap_bh)
388 goto io_error;
389
390 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
391
392 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
393 bitmap_bh->b_data, group_size, ret_block);
394 if (ret_block >= 0)
395 goto got_block;
396 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
397 group_alloc = 0;
398 }
399
400 ext2_debug ("Bit not found in block group %d.\n", group_no);
401
402 /*
403 * Now search the rest of the groups. We assume that
404 * i and desc correctly point to the last group visited.
405 */
406 nr_scanned_groups = 0;
407retry:
408 for (group_idx = 0; !group_alloc &&
409 group_idx < sbi->s_groups_count; group_idx++) {
410 group_no++;
411 if (group_no >= sbi->s_groups_count)
412 group_no = 0;
413 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
414 if (!desc)
415 goto io_error;
416 group_alloc = group_reserve_blocks(sbi, group_no, desc,
417 gdp_bh, es_alloc);
418 }
419 if (!group_alloc) {
420 *err = -ENOSPC;
421 goto out_release;
422 }
423 brelse(bitmap_bh);
424 bitmap_bh = read_block_bitmap(sb, group_no);
425 if (!bitmap_bh)
426 goto io_error;
427
428 ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
429 group_size, 0);
430 if (ret_block < 0) {
431 /*
432 * If a free block counter is corrupted we can loop inifintely.
433 * Detect that here.
434 */
435 nr_scanned_groups++;
436 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
437 ext2_error(sb, "ext2_new_block",
438 "corrupted free blocks counters");
439 goto io_error;
440 }
441 /*
442 * Someone else grabbed the last free block in this blockgroup
443 * before us. Retry the scan.
444 */
445 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
446 group_alloc = 0;
447 goto retry;
448 }
449
450got_block:
451 ext2_debug("using block group %d(%d)\n",
452 group_no, desc->bg_free_blocks_count);
453
454 target_block = ret_block + group_no * group_size +
455 le32_to_cpu(es->s_first_data_block);
456
457 if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
458 target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
459 in_range(target_block, le32_to_cpu(desc->bg_inode_table),
460 sbi->s_itb_per_group))
461 ext2_error (sb, "ext2_new_block",
462 "Allocating block in system zone - "
463 "block = %u", target_block);
464
465 if (target_block >= le32_to_cpu(es->s_blocks_count)) {
466 ext2_error (sb, "ext2_new_block",
467 "block(%d) >= blocks count(%d) - "
468 "block_group = %d, es == %p ", ret_block,
469 le32_to_cpu(es->s_blocks_count), group_no, es);
470 goto io_error;
471 }
472 block = target_block;
473
474 /* OK, we _had_ allocated something */
475 ext2_debug("found bit %d\n", ret_block);
476
477 dq_alloc--;
478 es_alloc--;
479 group_alloc--;
480
481 /*
482 * Do block preallocation now if required.
483 */
484 write_lock(&EXT2_I(inode)->i_meta_lock);
485 if (group_alloc && !*prealloc_count) {
486 unsigned n;
487
488 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
489 if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
490 ret_block,
491 (void*) bitmap_bh->b_data))
492 break;
493 }
494 *prealloc_block = block + 1;
495 *prealloc_count = n;
496 es_alloc -= n;
497 dq_alloc -= n;
498 group_alloc -= n;
499 }
500 write_unlock(&EXT2_I(inode)->i_meta_lock);
501
502 mark_buffer_dirty(bitmap_bh);
503 if (sb->s_flags & MS_SYNCHRONOUS)
504 sync_dirty_buffer(bitmap_bh);
505
506 ext2_debug ("allocating block %d. ", block);
507
508 *err = 0;
509out_release:
510 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
511 release_blocks(sb, es_alloc);
512out_dquot:
513 DQUOT_FREE_BLOCK(inode, dq_alloc);
514out:
515 brelse(bitmap_bh);
516 return block;
517
518io_error:
519 *err = -EIO;
520 goto out_release;
521}
522
Valerie Henson21730ee2006-06-25 05:48:12 -0700523#ifdef EXT2FS_DEBUG
524
525static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
526
527unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
528{
529 unsigned int i;
530 unsigned long sum = 0;
531
532 if (!map)
533 return (0);
534 for (i = 0; i < numchars; i++)
535 sum += nibblemap[map->b_data[i] & 0xf] +
536 nibblemap[(map->b_data[i] >> 4) & 0xf];
537 return (sum);
538}
539
540#endif /* EXT2FS_DEBUG */
541
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542unsigned long ext2_count_free_blocks (struct super_block * sb)
543{
544 struct ext2_group_desc * desc;
545 unsigned long desc_count = 0;
546 int i;
547#ifdef EXT2FS_DEBUG
548 unsigned long bitmap_count, x;
549 struct ext2_super_block *es;
550
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 es = EXT2_SB(sb)->s_es;
552 desc_count = 0;
553 bitmap_count = 0;
554 desc = NULL;
555 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
556 struct buffer_head *bitmap_bh;
557 desc = ext2_get_group_desc (sb, i, NULL);
558 if (!desc)
559 continue;
560 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
561 bitmap_bh = read_block_bitmap(sb, i);
562 if (!bitmap_bh)
563 continue;
564
565 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
566 printk ("group %d: stored = %d, counted = %lu\n",
567 i, le16_to_cpu(desc->bg_free_blocks_count), x);
568 bitmap_count += x;
569 brelse(bitmap_bh);
570 }
571 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
572 (long)le32_to_cpu(es->s_free_blocks_count),
573 desc_count, bitmap_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 return bitmap_count;
575#else
576 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
577 desc = ext2_get_group_desc (sb, i, NULL);
578 if (!desc)
579 continue;
580 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
581 }
582 return desc_count;
583#endif
584}
585
586static inline int
587block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
588{
589 return ext2_test_bit ((block -
590 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
591 EXT2_BLOCKS_PER_GROUP(sb), map);
592}
593
594static inline int test_root(int a, int b)
595{
596 int num = b;
597
598 while (a > num)
599 num *= b;
600 return num == a;
601}
602
603static int ext2_group_sparse(int group)
604{
605 if (group <= 1)
606 return 1;
607 return (test_root(group, 3) || test_root(group, 5) ||
608 test_root(group, 7));
609}
610
611/**
612 * ext2_bg_has_super - number of blocks used by the superblock in group
613 * @sb: superblock for filesystem
614 * @group: group number to check
615 *
616 * Return the number of blocks used by the superblock (primary or backup)
617 * in this group. Currently this will be only 0 or 1.
618 */
619int ext2_bg_has_super(struct super_block *sb, int group)
620{
621 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
622 !ext2_group_sparse(group))
623 return 0;
624 return 1;
625}
626
627/**
628 * ext2_bg_num_gdb - number of blocks used by the group table in group
629 * @sb: superblock for filesystem
630 * @group: group number to check
631 *
632 * Return the number of blocks used by the group descriptor table
633 * (primary or backup) in this group. In the future there may be a
634 * different number of descriptor blocks in each group.
635 */
636unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
637{
638 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
639 !ext2_group_sparse(group))
640 return 0;
641 return EXT2_SB(sb)->s_gdb_count;
642}
643