blob: 433a213a8bd9450f7f1ab3e7926002217d6cf1f9 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/ext2/balloc.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10 * Big-endian to little-endian byte-swapping/bitmaps by
11 * David S. Miller (davem@caip.rutgers.edu), 1995
12 */
13
14#include <linux/config.h>
15#include "ext2.h"
16#include <linux/quotaops.h>
17#include <linux/sched.h>
18#include <linux/buffer_head.h>
Randy Dunlap16f7e0f2006-01-11 12:17:46 -080019#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
21/*
22 * balloc.c contains the blocks allocation and deallocation routines
23 */
24
25/*
26 * The free blocks are managed by bitmaps. A file system contains several
27 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
28 * block for inodes, N blocks for the inode table and data blocks.
29 *
30 * The file system contains group descriptors which are located after the
31 * super block. Each descriptor contains the number of the bitmap block and
32 * the free blocks count in the block. The descriptors are loaded in memory
33 * when a file system is mounted (see ext2_read_super).
34 */
35
36
37#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
38
39struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
40 unsigned int block_group,
41 struct buffer_head ** bh)
42{
43 unsigned long group_desc;
44 unsigned long offset;
45 struct ext2_group_desc * desc;
46 struct ext2_sb_info *sbi = EXT2_SB(sb);
47
48 if (block_group >= sbi->s_groups_count) {
49 ext2_error (sb, "ext2_get_group_desc",
50 "block_group >= groups_count - "
51 "block_group = %d, groups_count = %lu",
52 block_group, sbi->s_groups_count);
53
54 return NULL;
55 }
56
57 group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
58 offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
59 if (!sbi->s_group_desc[group_desc]) {
60 ext2_error (sb, "ext2_get_group_desc",
61 "Group descriptor not loaded - "
62 "block_group = %d, group_desc = %lu, desc = %lu",
63 block_group, group_desc, offset);
64 return NULL;
65 }
66
67 desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
68 if (bh)
69 *bh = sbi->s_group_desc[group_desc];
70 return desc + offset;
71}
72
73/*
74 * Read the bitmap for a given block_group, reading into the specified
75 * slot in the superblock's bitmap cache.
76 *
77 * Return buffer_head on success or NULL in case of failure.
78 */
79static struct buffer_head *
80read_block_bitmap(struct super_block *sb, unsigned int block_group)
81{
82 struct ext2_group_desc * desc;
83 struct buffer_head * bh = NULL;
84
85 desc = ext2_get_group_desc (sb, block_group, NULL);
86 if (!desc)
87 goto error_out;
88 bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
89 if (!bh)
90 ext2_error (sb, "read_block_bitmap",
91 "Cannot read block bitmap - "
92 "block_group = %d, block_bitmap = %u",
93 block_group, le32_to_cpu(desc->bg_block_bitmap));
94error_out:
95 return bh;
96}
97
98/*
99 * Set sb->s_dirt here because the superblock was "logically" altered. We
100 * need to recalculate its free blocks count and flush it out.
101 */
102static int reserve_blocks(struct super_block *sb, int count)
103{
104 struct ext2_sb_info *sbi = EXT2_SB(sb);
105 struct ext2_super_block *es = sbi->s_es;
106 unsigned free_blocks;
107 unsigned root_blocks;
108
109 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
110 root_blocks = le32_to_cpu(es->s_r_blocks_count);
111
112 if (free_blocks < count)
113 count = free_blocks;
114
115 if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
116 sbi->s_resuid != current->fsuid &&
117 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
118 /*
119 * We are too close to reserve and we are not privileged.
120 * Can we allocate anything at all?
121 */
122 if (free_blocks > root_blocks)
123 count = free_blocks - root_blocks;
124 else
125 return 0;
126 }
127
128 percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
129 sb->s_dirt = 1;
130 return count;
131}
132
133static void release_blocks(struct super_block *sb, int count)
134{
135 if (count) {
136 struct ext2_sb_info *sbi = EXT2_SB(sb);
137
138 percpu_counter_mod(&sbi->s_freeblocks_counter, count);
139 sb->s_dirt = 1;
140 }
141}
142
143static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
144 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
145{
146 unsigned free_blocks;
147
148 if (!desc->bg_free_blocks_count)
149 return 0;
150
151 spin_lock(sb_bgl_lock(sbi, group_no));
152 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
153 if (free_blocks < count)
154 count = free_blocks;
155 desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
156 spin_unlock(sb_bgl_lock(sbi, group_no));
157 mark_buffer_dirty(bh);
158 return count;
159}
160
161static void group_release_blocks(struct super_block *sb, int group_no,
162 struct ext2_group_desc *desc, struct buffer_head *bh, int count)
163{
164 if (count) {
165 struct ext2_sb_info *sbi = EXT2_SB(sb);
166 unsigned free_blocks;
167
168 spin_lock(sb_bgl_lock(sbi, group_no));
169 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
170 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
171 spin_unlock(sb_bgl_lock(sbi, group_no));
172 sb->s_dirt = 1;
173 mark_buffer_dirty(bh);
174 }
175}
176
177/* Free given blocks, update quota and i_blocks field */
178void ext2_free_blocks (struct inode * inode, unsigned long block,
179 unsigned long count)
180{
181 struct buffer_head *bitmap_bh = NULL;
182 struct buffer_head * bh2;
183 unsigned long block_group;
184 unsigned long bit;
185 unsigned long i;
186 unsigned long overflow;
187 struct super_block * sb = inode->i_sb;
188 struct ext2_sb_info * sbi = EXT2_SB(sb);
189 struct ext2_group_desc * desc;
190 struct ext2_super_block * es = sbi->s_es;
191 unsigned freed = 0, group_freed;
192
193 if (block < le32_to_cpu(es->s_first_data_block) ||
194 block + count < block ||
195 block + count > le32_to_cpu(es->s_blocks_count)) {
196 ext2_error (sb, "ext2_free_blocks",
197 "Freeing blocks not in datazone - "
198 "block = %lu, count = %lu", block, count);
199 goto error_return;
200 }
201
202 ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
203
204do_more:
205 overflow = 0;
206 block_group = (block - le32_to_cpu(es->s_first_data_block)) /
207 EXT2_BLOCKS_PER_GROUP(sb);
208 bit = (block - le32_to_cpu(es->s_first_data_block)) %
209 EXT2_BLOCKS_PER_GROUP(sb);
210 /*
211 * Check to see if we are freeing blocks across a group
212 * boundary.
213 */
214 if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
215 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
216 count -= overflow;
217 }
218 brelse(bitmap_bh);
219 bitmap_bh = read_block_bitmap(sb, block_group);
220 if (!bitmap_bh)
221 goto error_return;
222
223 desc = ext2_get_group_desc (sb, block_group, &bh2);
224 if (!desc)
225 goto error_return;
226
227 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
228 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
229 in_range (block, le32_to_cpu(desc->bg_inode_table),
230 sbi->s_itb_per_group) ||
231 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
232 sbi->s_itb_per_group))
233 ext2_error (sb, "ext2_free_blocks",
234 "Freeing blocks in system zones - "
235 "Block = %lu, count = %lu",
236 block, count);
237
238 for (i = 0, group_freed = 0; i < count; i++) {
239 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
240 bit + i, bitmap_bh->b_data)) {
241 ext2_error(sb, __FUNCTION__,
242 "bit already cleared for block %lu", block + i);
243 } else {
244 group_freed++;
245 }
246 }
247
248 mark_buffer_dirty(bitmap_bh);
249 if (sb->s_flags & MS_SYNCHRONOUS)
250 sync_dirty_buffer(bitmap_bh);
251
252 group_release_blocks(sb, block_group, desc, bh2, group_freed);
253 freed += group_freed;
254
255 if (overflow) {
256 block += count;
257 count = overflow;
258 goto do_more;
259 }
260error_return:
261 brelse(bitmap_bh);
262 release_blocks(sb, freed);
263 DQUOT_FREE_BLOCK(inode, freed);
264}
265
266static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
267{
268 int k;
269 char *p, *r;
270
271 if (!ext2_test_bit(goal, map))
272 goto got_it;
273
274repeat:
275 if (goal) {
276 /*
277 * The goal was occupied; search forward for a free
278 * block within the next XX blocks.
279 *
280 * end_goal is more or less random, but it has to be
281 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
282 * next 64-bit boundary is simple..
283 */
284 k = (goal + 63) & ~63;
285 goal = ext2_find_next_zero_bit(map, k, goal);
286 if (goal < k)
287 goto got_it;
288 /*
289 * Search in the remainder of the current group.
290 */
291 }
292
293 p = map + (goal >> 3);
294 r = memscan(p, 0, (size - goal + 7) >> 3);
295 k = (r - map) << 3;
296 if (k < size) {
297 /*
298 * We have succeeded in finding a free byte in the block
299 * bitmap. Now search backwards to find the start of this
300 * group of free blocks - won't take more than 7 iterations.
301 */
302 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
303 ;
304 goto got_it;
305 }
306
307 k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
308 if (k < size) {
309 goal = k;
310 goto got_it;
311 }
312 return -1;
313got_it:
314 if (ext2_set_bit_atomic(lock, goal, (void *) map))
315 goto repeat;
316 return goal;
317}
318
319/*
320 * ext2_new_block uses a goal block to assist allocation. If the goal is
321 * free, or there is a free block within 32 blocks of the goal, that block
322 * is allocated. Otherwise a forward search is made for a free block; within
323 * each block group the search first looks for an entire free byte in the block
324 * bitmap, and then for any free bit if that fails.
325 * This function also updates quota and i_blocks field.
326 */
327int ext2_new_block(struct inode *inode, unsigned long goal,
328 u32 *prealloc_count, u32 *prealloc_block, int *err)
329{
330 struct buffer_head *bitmap_bh = NULL;
331 struct buffer_head *gdp_bh; /* bh2 */
332 struct ext2_group_desc *desc;
333 int group_no; /* i */
334 int ret_block; /* j */
335 int group_idx; /* k */
336 int target_block; /* tmp */
337 int block = 0;
338 struct super_block *sb = inode->i_sb;
339 struct ext2_sb_info *sbi = EXT2_SB(sb);
340 struct ext2_super_block *es = sbi->s_es;
341 unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
342 unsigned prealloc_goal = es->s_prealloc_blocks;
343 unsigned group_alloc = 0, es_alloc, dq_alloc;
344 int nr_scanned_groups;
345
346 if (!prealloc_goal--)
347 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
348 if (!prealloc_count || *prealloc_count)
349 prealloc_goal = 0;
350
351 if (DQUOT_ALLOC_BLOCK(inode, 1)) {
352 *err = -EDQUOT;
353 goto out;
354 }
355
356 while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
357 prealloc_goal--;
358
359 dq_alloc = prealloc_goal + 1;
360 es_alloc = reserve_blocks(sb, dq_alloc);
361 if (!es_alloc) {
362 *err = -ENOSPC;
363 goto out_dquot;
364 }
365
366 ext2_debug ("goal=%lu.\n", goal);
367
368 if (goal < le32_to_cpu(es->s_first_data_block) ||
369 goal >= le32_to_cpu(es->s_blocks_count))
370 goal = le32_to_cpu(es->s_first_data_block);
371 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
372 desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
373 if (!desc) {
374 /*
375 * gdp_bh may still be uninitialised. But group_release_blocks
376 * will not touch it because group_alloc is zero.
377 */
378 goto io_error;
379 }
380
381 group_alloc = group_reserve_blocks(sbi, group_no, desc,
382 gdp_bh, es_alloc);
383 if (group_alloc) {
384 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
385 group_size);
386 brelse(bitmap_bh);
387 bitmap_bh = read_block_bitmap(sb, group_no);
388 if (!bitmap_bh)
389 goto io_error;
390
391 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
392
393 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
394 bitmap_bh->b_data, group_size, ret_block);
395 if (ret_block >= 0)
396 goto got_block;
397 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
398 group_alloc = 0;
399 }
400
401 ext2_debug ("Bit not found in block group %d.\n", group_no);
402
403 /*
404 * Now search the rest of the groups. We assume that
405 * i and desc correctly point to the last group visited.
406 */
407 nr_scanned_groups = 0;
408retry:
409 for (group_idx = 0; !group_alloc &&
410 group_idx < sbi->s_groups_count; group_idx++) {
411 group_no++;
412 if (group_no >= sbi->s_groups_count)
413 group_no = 0;
414 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
415 if (!desc)
416 goto io_error;
417 group_alloc = group_reserve_blocks(sbi, group_no, desc,
418 gdp_bh, es_alloc);
419 }
420 if (!group_alloc) {
421 *err = -ENOSPC;
422 goto out_release;
423 }
424 brelse(bitmap_bh);
425 bitmap_bh = read_block_bitmap(sb, group_no);
426 if (!bitmap_bh)
427 goto io_error;
428
429 ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
430 group_size, 0);
431 if (ret_block < 0) {
432 /*
433 * If a free block counter is corrupted we can loop inifintely.
434 * Detect that here.
435 */
436 nr_scanned_groups++;
437 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
438 ext2_error(sb, "ext2_new_block",
439 "corrupted free blocks counters");
440 goto io_error;
441 }
442 /*
443 * Someone else grabbed the last free block in this blockgroup
444 * before us. Retry the scan.
445 */
446 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
447 group_alloc = 0;
448 goto retry;
449 }
450
451got_block:
452 ext2_debug("using block group %d(%d)\n",
453 group_no, desc->bg_free_blocks_count);
454
455 target_block = ret_block + group_no * group_size +
456 le32_to_cpu(es->s_first_data_block);
457
458 if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
459 target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
460 in_range(target_block, le32_to_cpu(desc->bg_inode_table),
461 sbi->s_itb_per_group))
462 ext2_error (sb, "ext2_new_block",
463 "Allocating block in system zone - "
464 "block = %u", target_block);
465
466 if (target_block >= le32_to_cpu(es->s_blocks_count)) {
467 ext2_error (sb, "ext2_new_block",
468 "block(%d) >= blocks count(%d) - "
469 "block_group = %d, es == %p ", ret_block,
470 le32_to_cpu(es->s_blocks_count), group_no, es);
471 goto io_error;
472 }
473 block = target_block;
474
475 /* OK, we _had_ allocated something */
476 ext2_debug("found bit %d\n", ret_block);
477
478 dq_alloc--;
479 es_alloc--;
480 group_alloc--;
481
482 /*
483 * Do block preallocation now if required.
484 */
485 write_lock(&EXT2_I(inode)->i_meta_lock);
486 if (group_alloc && !*prealloc_count) {
487 unsigned n;
488
489 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
490 if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
491 ret_block,
492 (void*) bitmap_bh->b_data))
493 break;
494 }
495 *prealloc_block = block + 1;
496 *prealloc_count = n;
497 es_alloc -= n;
498 dq_alloc -= n;
499 group_alloc -= n;
500 }
501 write_unlock(&EXT2_I(inode)->i_meta_lock);
502
503 mark_buffer_dirty(bitmap_bh);
504 if (sb->s_flags & MS_SYNCHRONOUS)
505 sync_dirty_buffer(bitmap_bh);
506
507 ext2_debug ("allocating block %d. ", block);
508
509 *err = 0;
510out_release:
511 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
512 release_blocks(sb, es_alloc);
513out_dquot:
514 DQUOT_FREE_BLOCK(inode, dq_alloc);
515out:
516 brelse(bitmap_bh);
517 return block;
518
519io_error:
520 *err = -EIO;
521 goto out_release;
522}
523
Valerie Henson21730ee2006-06-25 05:48:12 -0700524#ifdef EXT2FS_DEBUG
525
526static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
527
528unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
529{
530 unsigned int i;
531 unsigned long sum = 0;
532
533 if (!map)
534 return (0);
535 for (i = 0; i < numchars; i++)
536 sum += nibblemap[map->b_data[i] & 0xf] +
537 nibblemap[(map->b_data[i] >> 4) & 0xf];
538 return (sum);
539}
540
541#endif /* EXT2FS_DEBUG */
542
543/* Superblock must be locked */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544unsigned long ext2_count_free_blocks (struct super_block * sb)
545{
546 struct ext2_group_desc * desc;
547 unsigned long desc_count = 0;
548 int i;
549#ifdef EXT2FS_DEBUG
550 unsigned long bitmap_count, x;
551 struct ext2_super_block *es;
552
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 es = EXT2_SB(sb)->s_es;
554 desc_count = 0;
555 bitmap_count = 0;
556 desc = NULL;
557 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
558 struct buffer_head *bitmap_bh;
559 desc = ext2_get_group_desc (sb, i, NULL);
560 if (!desc)
561 continue;
562 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
563 bitmap_bh = read_block_bitmap(sb, i);
564 if (!bitmap_bh)
565 continue;
566
567 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
568 printk ("group %d: stored = %d, counted = %lu\n",
569 i, le16_to_cpu(desc->bg_free_blocks_count), x);
570 bitmap_count += x;
571 brelse(bitmap_bh);
572 }
573 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
574 (long)le32_to_cpu(es->s_free_blocks_count),
575 desc_count, bitmap_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 return bitmap_count;
577#else
578 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
579 desc = ext2_get_group_desc (sb, i, NULL);
580 if (!desc)
581 continue;
582 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
583 }
584 return desc_count;
585#endif
586}
587
588static inline int
589block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
590{
591 return ext2_test_bit ((block -
592 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
593 EXT2_BLOCKS_PER_GROUP(sb), map);
594}
595
596static inline int test_root(int a, int b)
597{
598 int num = b;
599
600 while (a > num)
601 num *= b;
602 return num == a;
603}
604
605static int ext2_group_sparse(int group)
606{
607 if (group <= 1)
608 return 1;
609 return (test_root(group, 3) || test_root(group, 5) ||
610 test_root(group, 7));
611}
612
613/**
614 * ext2_bg_has_super - number of blocks used by the superblock in group
615 * @sb: superblock for filesystem
616 * @group: group number to check
617 *
618 * Return the number of blocks used by the superblock (primary or backup)
619 * in this group. Currently this will be only 0 or 1.
620 */
621int ext2_bg_has_super(struct super_block *sb, int group)
622{
623 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
624 !ext2_group_sparse(group))
625 return 0;
626 return 1;
627}
628
629/**
630 * ext2_bg_num_gdb - number of blocks used by the group table in group
631 * @sb: superblock for filesystem
632 * @group: group number to check
633 *
634 * Return the number of blocks used by the group descriptor table
635 * (primary or backup) in this group. In the future there may be a
636 * different number of descriptor blocks in each group.
637 */
638unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
639{
640 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
641 !ext2_group_sparse(group))
642 return 0;
643 return EXT2_SB(sb)->s_gdb_count;
644}
645