blob: 1022347a211f80099dd06f1879e26c7945c853c7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4/* Reiserfs block (de)allocator, bitmap-based. */
5
Linus Torvalds1da177e2005-04-16 15:20:36 -07006#include <linux/time.h>
7#include <linux/reiserfs_fs.h>
8#include <linux/errno.h>
9#include <linux/buffer_head.h>
10#include <linux/kernel.h>
11#include <linux/pagemap.h>
12#include <linux/reiserfs_fs_sb.h>
13#include <linux/reiserfs_fs_i.h>
14#include <linux/quotaops.h>
15
16#define PREALLOCATION_SIZE 9
17
18/* different reiserfs block allocator options */
19
20#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)
21
22#define _ALLOC_concentrating_formatted_nodes 0
23#define _ALLOC_displacing_large_files 1
24#define _ALLOC_displacing_new_packing_localities 2
25#define _ALLOC_old_hashed_relocation 3
26#define _ALLOC_new_hashed_relocation 4
27#define _ALLOC_skip_busy 5
28#define _ALLOC_displace_based_on_dirid 6
29#define _ALLOC_hashed_formatted_nodes 7
30#define _ALLOC_old_way 8
31#define _ALLOC_hundredth_slices 9
32#define _ALLOC_dirid_groups 10
33#define _ALLOC_oid_groups 11
34#define _ALLOC_packing_groups 12
35
36#define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
37#define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
38#define displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
39
40#define SET_OPTION(optname) \
41 do { \
42 reiserfs_warning(s, "reiserfs: option \"%s\" is set", #optname); \
43 set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
44 } while(0)
45#define TEST_OPTION(optname, s) \
46 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
47
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070048static inline void get_bit_address(struct super_block *s,
49 b_blocknr_t block, int *bmap_nr, int *offset)
Linus Torvalds1da177e2005-04-16 15:20:36 -070050{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070051 /* It is in the bitmap block number equal to the block
52 * number divided by the number of bits in a block. */
Jeff Mahoneye1fabd32006-09-30 23:28:40 -070053 *bmap_nr = block >> (s->s_blocksize_bits + 3);
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070054 /* Within that bitmap block it is located at bit offset *offset. */
55 *offset = block & ((s->s_blocksize << 3) - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -070056}
57
58#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070059int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
Linus Torvalds1da177e2005-04-16 15:20:36 -070060{
Jeff Mahoneye1fabd32006-09-30 23:28:40 -070061 int bmap, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070063 if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
64 reiserfs_warning(s,
65 "vs-4010: is_reusable: block number is out of range %lu (%u)",
66 block, SB_BLOCK_COUNT(s));
67 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
Jeff Mahoneye1fabd32006-09-30 23:28:40 -070070 get_bit_address(s, block, &bmap, &offset);
71
72 /* Old format filesystem? Unlikely, but the bitmaps are all up front so
73 * we need to account for it. */
74 if (unlikely(test_bit(REISERFS_OLD_FORMAT,
75 &(REISERFS_SB(s)->s_properties)))) {
76 b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
77 if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) {
78 reiserfs_warning(s, "vs: 4019: is_reusable: "
79 "bitmap block %lu(%u) can't be freed or reused",
80 block, SB_BMAP_NR(s));
81 return 0;
82 }
83 } else {
84 if (offset == 0) {
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070085 reiserfs_warning(s, "vs: 4020: is_reusable: "
86 "bitmap block %lu(%u) can't be freed or reused",
87 block, SB_BMAP_NR(s));
88 return 0;
89 }
Jeff Mahoneye1fabd32006-09-30 23:28:40 -070090 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070091
Jeff Mahoneye1fabd32006-09-30 23:28:40 -070092 if (bmap >= SB_BMAP_NR(s)) {
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070093 reiserfs_warning(s,
94 "vs-4030: is_reusable: there is no so many bitmap blocks: "
Jeff Mahoneye1fabd32006-09-30 23:28:40 -070095 "block=%lu, bitmap_nr=%d", block, bmap);
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070096 return 0;
97 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070099 if ((bit_value == 0 &&
Jeff Mahoneye1fabd32006-09-30 23:28:40 -0700100 reiserfs_test_le_bit(offset, SB_AP_BITMAP(s)[bmap].bh->b_data)) ||
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700101 (bit_value == 1 &&
Jeff Mahoneye1fabd32006-09-30 23:28:40 -0700102 reiserfs_test_le_bit(offset, SB_AP_BITMAP(s)[bmap].bh->b_data) == 0)) {
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700103 reiserfs_warning(s,
104 "vs-4040: is_reusable: corresponding bit of block %lu does not "
Jeff Mahoneye1fabd32006-09-30 23:28:40 -0700105 "match required value (bmap==%d, offset==%d) test_bit==%d",
106 block, bmap, offset, reiserfs_test_le_bit(offset,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700107 SB_AP_BITMAP
Jeff Mahoneye1fabd32006-09-30 23:28:40 -0700108 (s)[bmap].bh->
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700109 b_data));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700111 return 0;
112 }
113
114 if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
115 reiserfs_warning(s,
116 "vs-4050: is_reusable: this is root block (%u), "
117 "it must be busy", SB_ROOT_BLOCK(s));
118 return 0;
119 }
120
121 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122}
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700123#endif /* CONFIG_REISERFS_CHECK */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
125/* searches in journal structures for a given block number (bmap, off). If block
126 is found in reiserfs journal it suggests next free block candidate to test. */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700127static inline int is_block_in_journal(struct super_block *s, int bmap, int
128 off, int *next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700130 b_blocknr_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700132 if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
133 if (tmp) { /* hint supplied */
134 *next = tmp;
135 PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
136 } else {
137 (*next) = off + 1; /* inc offset to avoid looping. */
138 PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
139 }
140 PROC_INFO_INC(s, scan_bitmap.retry);
141 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700143 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144}
145
146/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
147 * block; */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700148static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
149 int bmap_n, int *beg, int boundary, int min,
150 int max, int unfm)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700152 struct super_block *s = th->t_super;
153 struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
154 int end, next;
155 int org = *beg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700157 BUG_ON(!th->t_trans_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700159 RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)",
160 bmap_n, SB_BMAP_NR(s) - 1);
161 PROC_INFO_INC(s, scan_bitmap.bmap);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162/* this is unclear and lacks comments, explain how journal bitmaps
163 work here for the reader. Convey a sense of the design here. What
164 is a window? */
165/* - I mean `a window of zero bits' as in description of this function - Zam. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700167 if (!bi) {
168 reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d",
169 bmap_n);
170 return 0;
171 }
172 if (buffer_locked(bi->bh)) {
173 PROC_INFO_INC(s, scan_bitmap.wait);
174 __wait_on_buffer(bi->bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 }
176
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700177 while (1) {
178 cont:
179 if (bi->free_count < min)
180 return 0; // No free blocks in this bitmap
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700182 /* search for a first zero bit -- beggining of a window */
183 *beg = reiserfs_find_next_zero_le_bit
184 ((unsigned long *)(bi->bh->b_data), boundary, *beg);
185
186 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
187 * cannot contain a zero window of minimum size */
188 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700191 if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
192 continue;
193 /* first zero bit found; we check next bits */
194 for (end = *beg + 1;; end++) {
195 if (end >= *beg + max || end >= boundary
196 || reiserfs_test_le_bit(end, bi->bh->b_data)) {
197 next = end;
198 break;
199 }
200 /* finding the other end of zero bit window requires looking into journal structures (in
201 * case of searching for free blocks for unformatted nodes) */
202 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
203 break;
204 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700206 /* now (*beg) points to beginning of zero bits window,
207 * (end) points to one bit after the window end */
208 if (end - *beg >= min) { /* it seems we have found window of proper size */
209 int i;
210 reiserfs_prepare_for_journal(s, bi->bh, 1);
211 /* try to set all blocks used checking are they still free */
212 for (i = *beg; i < end; i++) {
213 /* It seems that we should not check in journal again. */
214 if (reiserfs_test_and_set_le_bit
215 (i, bi->bh->b_data)) {
216 /* bit was set by another process
217 * while we slept in prepare_for_journal() */
218 PROC_INFO_INC(s, scan_bitmap.stolen);
219 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks,
220 * if length of this set is more or equal to `min' */
221 end = i;
222 break;
223 }
224 /* otherwise we clear all bit were set ... */
225 while (--i >= *beg)
226 reiserfs_test_and_clear_le_bit
227 (i, bi->bh->b_data);
228 reiserfs_restore_prepared_buffer(s,
229 bi->
230 bh);
231 *beg = org;
232 /* ... and search again in current block from beginning */
233 goto cont;
234 }
235 }
236 bi->free_count -= (end - *beg);
237 journal_mark_dirty(th, s, bi->bh);
238
239 /* free block count calculation */
240 reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
241 1);
242 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
243 journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));
244
245 return end - (*beg);
246 } else {
247 *beg = next;
248 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250}
251
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700252static int bmap_hash_id(struct super_block *s, u32 id)
253{
254 char *hash_in = NULL;
255 unsigned long hash;
256 unsigned bm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700258 if (id <= 2) {
259 bm = 1;
260 } else {
261 hash_in = (char *)(&id);
262 hash = keyed_hash(hash_in, 4);
263 bm = hash % SB_BMAP_NR(s);
264 if (!bm)
265 bm = 1;
266 }
267 /* this can only be true when SB_BMAP_NR = 1 */
268 if (bm >= SB_BMAP_NR(s))
269 bm = 0;
270 return bm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271}
272
273/*
274 * hashes the id and then returns > 0 if the block group for the
275 * corresponding hash is full
276 */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700277static inline int block_group_used(struct super_block *s, u32 id)
278{
279 int bm;
280 bm = bmap_hash_id(s, id);
281 if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100)) {
282 return 0;
283 }
284 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285}
286
287/*
288 * the packing is returned in disk byte order
289 */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700290__le32 reiserfs_choose_packing(struct inode * dir)
Al Viro3e8962b2005-05-01 08:59:18 -0700291{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700292 __le32 packing;
293 if (TEST_OPTION(packing_groups, dir->i_sb)) {
294 u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
295 /*
296 * some versions of reiserfsck expect packing locality 1 to be
297 * special
298 */
299 if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
300 packing = INODE_PKEY(dir)->k_objectid;
301 else
302 packing = INODE_PKEY(dir)->k_dir_id;
303 } else
304 packing = INODE_PKEY(dir)->k_objectid;
305 return packing;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306}
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700307
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308/* Tries to find contiguous zero bit window (given size) in given region of
309 * bitmap and place new blocks there. Returns number of allocated blocks. */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700310static int scan_bitmap(struct reiserfs_transaction_handle *th,
311 b_blocknr_t * start, b_blocknr_t finish,
312 int min, int max, int unfm, unsigned long file_block)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700314 int nr_allocated = 0;
315 struct super_block *s = th->t_super;
316 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
317 * - Hans, it is not a block number - Zam. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700319 int bm, off;
320 int end_bm, end_off;
321 int off_max = s->s_blocksize << 3;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700323 BUG_ON(!th->t_trans_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700325 PROC_INFO_INC(s, scan_bitmap.call);
326 if (SB_FREE_BLOCKS(s) <= 0)
327 return 0; // No point in looking for more free blocks
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700329 get_bit_address(s, *start, &bm, &off);
330 get_bit_address(s, finish, &end_bm, &end_off);
331 if (bm > SB_BMAP_NR(s))
332 return 0;
333 if (end_bm > SB_BMAP_NR(s))
334 end_bm = SB_BMAP_NR(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700336 /* When the bitmap is more than 10% free, anyone can allocate.
337 * When it's less than 10% free, only files that already use the
338 * bitmap are allowed. Once we pass 80% full, this restriction
339 * is lifted.
340 *
341 * We do this so that files that grow later still have space close to
342 * their original allocation. This improves locality, and presumably
343 * performance as a result.
344 *
345 * This is only an allocation policy and does not make up for getting a
346 * bad hint. Decent hinting must be implemented for this to work well.
347 */
348 if (TEST_OPTION(skip_busy, s)
349 && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
350 for (; bm < end_bm; bm++, off = 0) {
351 if ((off && (!unfm || (file_block != 0)))
352 || SB_AP_BITMAP(s)[bm].free_count >
353 (s->s_blocksize << 3) / 10)
354 nr_allocated =
355 scan_bitmap_block(th, bm, &off, off_max,
356 min, max, unfm);
357 if (nr_allocated)
358 goto ret;
359 }
360 /* we know from above that start is a reasonable number */
361 get_bit_address(s, *start, &bm, &off);
362 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700364 for (; bm < end_bm; bm++, off = 0) {
365 nr_allocated =
366 scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
367 if (nr_allocated)
368 goto ret;
369 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700371 nr_allocated =
372 scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
373
374 ret:
375 *start = bm * off_max + off;
376 return nr_allocated;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377
378}
379
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700380static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
381 struct inode *inode, b_blocknr_t block,
382 int for_unformatted)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700384 struct super_block *s = th->t_super;
385 struct reiserfs_super_block *rs;
386 struct buffer_head *sbh;
387 struct reiserfs_bitmap_info *apbi;
388 int nr, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700390 BUG_ON(!th->t_trans_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700392 PROC_INFO_INC(s, free_block);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700394 rs = SB_DISK_SUPER_BLOCK(s);
395 sbh = SB_BUFFER_WITH_SB(s);
396 apbi = SB_AP_BITMAP(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700398 get_bit_address(s, block, &nr, &offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700400 if (nr >= sb_bmap_nr(rs)) {
401 reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
402 "block %lu is out of range on %s",
403 block, reiserfs_bdevname(s));
404 return;
405 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700407 reiserfs_prepare_for_journal(s, apbi[nr].bh, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700409 /* clear bit for the given block in bit map */
410 if (!reiserfs_test_and_clear_le_bit(offset, apbi[nr].bh->b_data)) {
411 reiserfs_warning(s, "vs-4080: reiserfs_free_block: "
412 "free_block (%s:%lu)[dev:blocknr]: bit already cleared",
413 reiserfs_bdevname(s), block);
414 }
415 apbi[nr].free_count++;
416 journal_mark_dirty(th, s, apbi[nr].bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700418 reiserfs_prepare_for_journal(s, sbh, 1);
419 /* update super block */
420 set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700422 journal_mark_dirty(th, s, sbh);
423 if (for_unformatted)
424 DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425}
426
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700427void reiserfs_free_block(struct reiserfs_transaction_handle *th,
428 struct inode *inode, b_blocknr_t block,
429 int for_unformatted)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700431 struct super_block *s = th->t_super;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700433 BUG_ON(!th->t_trans_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700435 RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
436 RFALSE(is_reusable(s, block, 1) == 0,
437 "vs-4071: can not free such block");
438 /* mark it before we clear it, just in case */
439 journal_mark_freed(th, s, block);
440 _reiserfs_free_block(th, inode, block, for_unformatted);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441}
442
443/* preallocated blocks don't need to be run through journal_mark_freed */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700444static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
445 struct inode *inode, b_blocknr_t block)
446{
447 RFALSE(!th->t_super,
448 "vs-4060: trying to free block on nonexistent device");
449 RFALSE(is_reusable(th->t_super, block, 1) == 0,
450 "vs-4070: can not free such block");
451 BUG_ON(!th->t_trans_id);
452 _reiserfs_free_block(th, inode, block, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453}
454
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700455static void __discard_prealloc(struct reiserfs_transaction_handle *th,
456 struct reiserfs_inode_info *ei)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700458 unsigned long save = ei->i_prealloc_block;
459 int dirty = 0;
460 struct inode *inode = &ei->vfs_inode;
461 BUG_ON(!th->t_trans_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700463 if (ei->i_prealloc_count < 0)
464 reiserfs_warning(th->t_super,
465 "zam-4001:%s: inode has negative prealloc blocks count.",
466 __FUNCTION__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700468 while (ei->i_prealloc_count > 0) {
469 reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
470 ei->i_prealloc_block++;
471 ei->i_prealloc_count--;
472 dirty = 1;
473 }
474 if (dirty)
475 reiserfs_update_sd(th, inode);
476 ei->i_prealloc_block = save;
477 list_del_init(&(ei->i_prealloc_list));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478}
479
480/* FIXME: It should be inline function */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700481void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
482 struct inode *inode)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700484 struct reiserfs_inode_info *ei = REISERFS_I(inode);
485 BUG_ON(!th->t_trans_id);
486 if (ei->i_prealloc_count)
487 __discard_prealloc(th, ei);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488}
489
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700490void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700492 struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700494 BUG_ON(!th->t_trans_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700496 while (!list_empty(plist)) {
497 struct reiserfs_inode_info *ei;
498 ei = list_entry(plist->next, struct reiserfs_inode_info,
499 i_prealloc_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700501 if (!ei->i_prealloc_count) {
502 reiserfs_warning(th->t_super,
503 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.",
504 __FUNCTION__);
505 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700507 __discard_prealloc(th, ei);
508 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509}
510
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700511void reiserfs_init_alloc_options(struct super_block *s)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700513 set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
514 set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
515 set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516}
517
518/* block allocator related options are parsed here */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700519int reiserfs_parse_alloc_options(struct super_block *s, char *options)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700521 char *this_char, *value;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700523 REISERFS_SB(s)->s_alloc_options.bits = 0; /* clear default settings */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700525 while ((this_char = strsep(&options, ":")) != NULL) {
526 if ((value = strchr(this_char, '=')) != NULL)
527 *value++ = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700529 if (!strcmp(this_char, "concentrating_formatted_nodes")) {
530 int temp;
531 SET_OPTION(concentrating_formatted_nodes);
532 temp = (value
533 && *value) ? simple_strtoul(value, &value,
534 0) : 10;
535 if (temp <= 0 || temp > 100) {
536 REISERFS_SB(s)->s_alloc_options.border = 10;
537 } else {
538 REISERFS_SB(s)->s_alloc_options.border =
539 100 / temp;
540 }
541 continue;
542 }
543 if (!strcmp(this_char, "displacing_large_files")) {
544 SET_OPTION(displacing_large_files);
545 REISERFS_SB(s)->s_alloc_options.large_file_size =
546 (value
547 && *value) ? simple_strtoul(value, &value, 0) : 16;
548 continue;
549 }
550 if (!strcmp(this_char, "displacing_new_packing_localities")) {
551 SET_OPTION(displacing_new_packing_localities);
552 continue;
553 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700555 if (!strcmp(this_char, "old_hashed_relocation")) {
556 SET_OPTION(old_hashed_relocation);
557 continue;
558 }
559
560 if (!strcmp(this_char, "new_hashed_relocation")) {
561 SET_OPTION(new_hashed_relocation);
562 continue;
563 }
564
565 if (!strcmp(this_char, "dirid_groups")) {
566 SET_OPTION(dirid_groups);
567 continue;
568 }
569 if (!strcmp(this_char, "oid_groups")) {
570 SET_OPTION(oid_groups);
571 continue;
572 }
573 if (!strcmp(this_char, "packing_groups")) {
574 SET_OPTION(packing_groups);
575 continue;
576 }
577 if (!strcmp(this_char, "hashed_formatted_nodes")) {
578 SET_OPTION(hashed_formatted_nodes);
579 continue;
580 }
581
582 if (!strcmp(this_char, "skip_busy")) {
583 SET_OPTION(skip_busy);
584 continue;
585 }
586
587 if (!strcmp(this_char, "hundredth_slices")) {
588 SET_OPTION(hundredth_slices);
589 continue;
590 }
591
592 if (!strcmp(this_char, "old_way")) {
593 SET_OPTION(old_way);
594 continue;
595 }
596
597 if (!strcmp(this_char, "displace_based_on_dirid")) {
598 SET_OPTION(displace_based_on_dirid);
599 continue;
600 }
601
602 if (!strcmp(this_char, "preallocmin")) {
603 REISERFS_SB(s)->s_alloc_options.preallocmin =
604 (value
605 && *value) ? simple_strtoul(value, &value, 0) : 4;
606 continue;
607 }
608
609 if (!strcmp(this_char, "preallocsize")) {
610 REISERFS_SB(s)->s_alloc_options.preallocsize =
611 (value
612 && *value) ? simple_strtoul(value, &value,
613 0) :
614 PREALLOCATION_SIZE;
615 continue;
616 }
617
618 reiserfs_warning(s, "zam-4001: %s : unknown option - %s",
619 __FUNCTION__, this_char);
620 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 }
622
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700623 reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
624 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700627static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
628{
629 char *hash_in;
630 if (hint->formatted_node) {
631 hash_in = (char *)&hint->key.k_dir_id;
632 } else {
633 if (!hint->inode) {
634 //hint->search_start = hint->beg;
635 hash_in = (char *)&hint->key.k_dir_id;
636 } else
637 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
638 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
639 else
640 hash_in =
641 (char *)(&INODE_PKEY(hint->inode)->k_objectid);
642 }
643
644 hint->search_start =
645 hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646}
647
648/*
649 * Relocation based on dirid, hashing them into a given bitmap block
650 * files. Formatted nodes are unaffected, a seperate policy covers them
651 */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700652static void dirid_groups(reiserfs_blocknr_hint_t * hint)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700654 unsigned long hash;
655 __u32 dirid = 0;
656 int bm = 0;
657 struct super_block *sb = hint->th->t_super;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 if (hint->inode)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700659 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
660 else if (hint->formatted_node)
661 dirid = hint->key.k_dir_id;
662
663 if (dirid) {
664 bm = bmap_hash_id(sb, dirid);
665 hash = bm * (sb->s_blocksize << 3);
666 /* give a portion of the block group to metadata */
667 if (hint->inode)
668 hash += sb->s_blocksize / 2;
669 hint->search_start = hash;
670 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671}
672
673/*
674 * Relocation based on oid, hashing them into a given bitmap block
675 * files. Formatted nodes are unaffected, a seperate policy covers them
676 */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700677static void oid_groups(reiserfs_blocknr_hint_t * hint)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700679 if (hint->inode) {
680 unsigned long hash;
681 __u32 oid;
682 __u32 dirid;
683 int bm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700685 dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700687 /* keep the root dir and it's first set of subdirs close to
688 * the start of the disk
689 */
690 if (dirid <= 2)
691 hash = (hint->inode->i_sb->s_blocksize << 3);
692 else {
693 oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
694 bm = bmap_hash_id(hint->inode->i_sb, oid);
695 hash = bm * (hint->inode->i_sb->s_blocksize << 3);
696 }
697 hint->search_start = hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699}
700
701/* returns 1 if it finds an indirect item and gets valid hint info
702 * from it, otherwise 0
703 */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700704static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700706 struct path *path;
707 struct buffer_head *bh;
708 struct item_head *ih;
709 int pos_in_item;
710 __le32 *item;
711 int ret = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700713 if (!hint->path) /* reiserfs code can call this function w/o pointer to path
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 * structure supplied; then we rely on supplied search_start */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700715 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700717 path = hint->path;
718 bh = get_last_bh(path);
719 RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
720 ih = get_ih(path);
721 pos_in_item = path->pos_in_item;
722 item = get_item(path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700724 hint->search_start = bh->b_blocknr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700726 if (!hint->formatted_node && is_indirect_le_ih(ih)) {
727 /* for indirect item: go to left and look for the first non-hole entry
728 in the indirect item */
729 if (pos_in_item == I_UNFM_NUM(ih))
730 pos_in_item--;
731// pos_in_item = I_UNFM_NUM (ih) - 1;
732 while (pos_in_item >= 0) {
733 int t = get_block_num(item, pos_in_item);
734 if (t) {
735 hint->search_start = t;
736 ret = 1;
737 break;
738 }
739 pos_in_item--;
740 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700743 /* does result value fit into specified region? */
744 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745}
746
747/* should be, if formatted node, then try to put on first part of the device
748 specified as number of percent with mount option device, else try to put
749 on last of device. This is not to say it is good code to do so,
750 but the effect should be measured. */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700751static inline void set_border_in_hint(struct super_block *s,
752 reiserfs_blocknr_hint_t * hint)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700754 b_blocknr_t border =
755 SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700757 if (hint->formatted_node)
758 hint->end = border - 1;
759 else
760 hint->beg = border;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761}
762
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700763static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700765 if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
766 hint->search_start =
767 hint->beg +
768 keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
769 4) % (hint->end - hint->beg);
770 else
771 hint->search_start =
772 hint->beg +
773 keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
774 4) % (hint->end - hint->beg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775}
776
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700777static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700779 char *hash_in;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700781 if (!hint->inode)
782 hash_in = (char *)&hint->key.k_dir_id;
783 else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
784 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
785 else
786 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700788 hint->search_start =
789 hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790}
791
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700792static inline int
793this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
794 hint)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700796 return hint->block ==
797 REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798}
799
800#ifdef DISPLACE_NEW_PACKING_LOCALITIES
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700801static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700803 struct in_core_key *key = &hint->key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700805 hint->th->displace_new_blocks = 0;
806 hint->search_start =
807 hint->beg + keyed_hash((char *)(&key->k_objectid),
808 4) % (hint->end - hint->beg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700812static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
813{
814 b_blocknr_t border;
815 u32 hash_in;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700817 if (hint->formatted_node || hint->inode == NULL) {
818 return 0;
819 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700821 hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
822 border =
823 hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
824 4) % (hint->end - hint->beg - 1);
825 if (border > hint->search_start)
826 hint->search_start = border;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700828 return 1;
829}
830
831static inline int old_way(reiserfs_blocknr_hint_t * hint)
832{
833 b_blocknr_t border;
834
835 if (hint->formatted_node || hint->inode == NULL) {
836 return 0;
837 }
838
839 border =
840 hint->beg +
841 le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
842 hint->beg);
843 if (border > hint->search_start)
844 hint->search_start = border;
845
846 return 1;
847}
848
849static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
850{
851 struct in_core_key *key = &hint->key;
852 b_blocknr_t slice_start;
853
854 slice_start =
855 (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
856 if (slice_start > hint->search_start
857 || slice_start + (hint->end / 100) <= hint->search_start) {
858 hint->search_start = slice_start;
859 }
860}
861
862static void determine_search_start(reiserfs_blocknr_hint_t * hint,
863 int amount_needed)
864{
865 struct super_block *s = hint->th->t_super;
866 int unfm_hint;
867
868 hint->beg = 0;
869 hint->end = SB_BLOCK_COUNT(s) - 1;
870
871 /* This is former border algorithm. Now with tunable border offset */
872 if (concentrating_formatted_nodes(s))
873 set_border_in_hint(s, hint);
874
875#ifdef DISPLACE_NEW_PACKING_LOCALITIES
876 /* whenever we create a new directory, we displace it. At first we will
877 hash for location, later we might look for a moderately empty place for
878 it */
879 if (displacing_new_packing_localities(s)
880 && hint->th->displace_new_blocks) {
881 displace_new_packing_locality(hint);
882
883 /* we do not continue determine_search_start,
884 * if new packing locality is being displaced */
885 return;
886 }
887#endif
888
889 /* all persons should feel encouraged to add more special cases here and
890 * test them */
891
892 if (displacing_large_files(s) && !hint->formatted_node
893 && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
894 displace_large_file(hint);
895 return;
896 }
897
898 /* if none of our special cases is relevant, use the left neighbor in the
899 tree order of the new node we are allocating for */
900 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
901 hash_formatted_node(hint);
902 return;
903 }
904
905 unfm_hint = get_left_neighbor(hint);
906
907 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
908 new blocks are displaced based on directory ID. Also, if suggested search_start
909 is less than last preallocated block, we start searching from it, assuming that
910 HDD dataflow is faster in forward direction */
911 if (TEST_OPTION(old_way, s)) {
912 if (!hint->formatted_node) {
913 if (!reiserfs_hashed_relocation(s))
914 old_way(hint);
915 else if (!reiserfs_no_unhashed_relocation(s))
916 old_hashed_relocation(hint);
917
918 if (hint->inode
919 && hint->search_start <
920 REISERFS_I(hint->inode)->i_prealloc_block)
921 hint->search_start =
922 REISERFS_I(hint->inode)->i_prealloc_block;
923 }
924 return;
925 }
926
927 /* This is an approach proposed by Hans */
928 if (TEST_OPTION(hundredth_slices, s)
929 && !(displacing_large_files(s) && !hint->formatted_node)) {
930 hundredth_slices(hint);
931 return;
932 }
933
934 /* old_hashed_relocation only works on unformatted */
935 if (!unfm_hint && !hint->formatted_node &&
936 TEST_OPTION(old_hashed_relocation, s)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 old_hashed_relocation(hint);
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700938 }
939 /* new_hashed_relocation works with both formatted/unformatted nodes */
940 if ((!unfm_hint || hint->formatted_node) &&
941 TEST_OPTION(new_hashed_relocation, s)) {
942 new_hashed_relocation(hint);
943 }
944 /* dirid grouping works only on unformatted nodes */
945 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
946 dirid_groups(hint);
947 }
948#ifdef DISPLACE_NEW_PACKING_LOCALITIES
949 if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
950 dirid_groups(hint);
951 }
952#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700954 /* oid grouping works only on unformatted nodes */
955 if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
956 oid_groups(hint);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 }
958 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959}
960
961static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
962{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700963 /* make minimum size a mount option and benchmark both ways */
964 /* we preallocate blocks only for regular files, specific size */
965 /* benchmark preallocating always and see what happens */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700967 hint->prealloc_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700969 if (!hint->formatted_node && hint->preallocate) {
970 if (S_ISREG(hint->inode->i_mode)
971 && hint->inode->i_size >=
972 REISERFS_SB(hint->th->t_super)->s_alloc_options.
973 preallocmin * hint->inode->i_sb->s_blocksize)
974 hint->prealloc_size =
975 REISERFS_SB(hint->th->t_super)->s_alloc_options.
976 preallocsize - 1;
977 }
978 return CARRY_ON;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979}
980
981/* XXX I know it could be merged with upper-level function;
982 but may be result function would be too complex. */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700983static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
984 b_blocknr_t * new_blocknrs,
985 b_blocknr_t start,
986 b_blocknr_t finish, int min,
987 int amount_needed,
988 int prealloc_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700990 int rest = amount_needed;
991 int nr_allocated;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700993 while (rest > 0 && start <= finish) {
994 nr_allocated = scan_bitmap(hint->th, &start, finish, min,
995 rest + prealloc_size,
996 !hint->formatted_node, hint->block);
997
998 if (nr_allocated == 0) /* no new blocks allocated, return */
999 break;
1000
1001 /* fill free_blocknrs array first */
1002 while (rest > 0 && nr_allocated > 0) {
1003 *new_blocknrs++ = start++;
1004 rest--;
1005 nr_allocated--;
1006 }
1007
1008 /* do we have something to fill prealloc. array also ? */
1009 if (nr_allocated > 0) {
1010 /* it means prealloc_size was greater that 0 and we do preallocation */
1011 list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
1012 &SB_JOURNAL(hint->th->t_super)->
1013 j_prealloc_list);
1014 REISERFS_I(hint->inode)->i_prealloc_block = start;
1015 REISERFS_I(hint->inode)->i_prealloc_count =
1016 nr_allocated;
1017 break;
1018 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 }
1020
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001021 return (amount_needed - rest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022}
1023
1024static inline int blocknrs_and_prealloc_arrays_from_search_start
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001025 (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
1026 int amount_needed) {
1027 struct super_block *s = hint->th->t_super;
1028 b_blocknr_t start = hint->search_start;
1029 b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
1030 int passno = 0;
1031 int nr_allocated = 0;
1032 int bigalloc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001034 determine_prealloc_size(hint);
1035 if (!hint->formatted_node) {
1036 int quota_ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037#ifdef REISERQUOTA_DEBUG
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001038 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1039 "reiserquota: allocating %d blocks id=%u",
1040 amount_needed, hint->inode->i_uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001042 quota_ret =
1043 DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
1044 if (quota_ret) /* Quota exceeded? */
1045 return QUOTA_EXCEEDED;
1046 if (hint->preallocate && hint->prealloc_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047#ifdef REISERQUOTA_DEBUG
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001048 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1049 "reiserquota: allocating (prealloc) %d blocks id=%u",
1050 hint->prealloc_size, hint->inode->i_uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001052 quota_ret =
1053 DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
1054 hint->prealloc_size);
1055 if (quota_ret)
1056 hint->preallocate = hint->prealloc_size = 0;
1057 }
1058 /* for unformatted nodes, force large allocations */
1059 bigalloc = amount_needed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001062 do {
1063 /* in bigalloc mode, nr_allocated should stay zero until
1064 * the entire allocation is filled
1065 */
1066 if (unlikely(bigalloc && nr_allocated)) {
1067 reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n",
1068 bigalloc, nr_allocated);
1069 /* reset things to a sane value */
1070 bigalloc = amount_needed - nr_allocated;
1071 }
1072 /*
1073 * try pass 0 and pass 1 looking for a nice big
1074 * contiguous allocation. Then reset and look
1075 * for anything you can find.
1076 */
1077 if (passno == 2 && bigalloc) {
1078 passno = 0;
1079 bigalloc = 0;
1080 }
1081 switch (passno++) {
1082 case 0: /* Search from hint->search_start to end of disk */
1083 start = hint->search_start;
1084 finish = SB_BLOCK_COUNT(s) - 1;
1085 break;
1086 case 1: /* Search from hint->beg to hint->search_start */
1087 start = hint->beg;
1088 finish = hint->search_start;
1089 break;
1090 case 2: /* Last chance: Search from 0 to hint->beg */
1091 start = 0;
1092 finish = hint->beg;
1093 break;
1094 default: /* We've tried searching everywhere, not enough space */
1095 /* Free the blocks */
1096 if (!hint->formatted_node) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097#ifdef REISERQUOTA_DEBUG
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001098 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1099 "reiserquota: freeing (nospace) %d blocks id=%u",
1100 amount_needed +
1101 hint->prealloc_size -
1102 nr_allocated,
1103 hint->inode->i_uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001105 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated); /* Free not allocated blocks */
1106 }
1107 while (nr_allocated--)
1108 reiserfs_free_block(hint->th, hint->inode,
1109 new_blocknrs[nr_allocated],
1110 !hint->formatted_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001112 return NO_DISK_SPACE;
1113 }
1114 } while ((nr_allocated += allocate_without_wrapping_disk(hint,
1115 new_blocknrs +
1116 nr_allocated,
1117 start, finish,
1118 bigalloc ?
1119 bigalloc : 1,
1120 amount_needed -
1121 nr_allocated,
1122 hint->
1123 prealloc_size))
1124 < amount_needed);
1125 if (!hint->formatted_node &&
1126 amount_needed + hint->prealloc_size >
1127 nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
1128 /* Some of preallocation blocks were not allocated */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129#ifdef REISERQUOTA_DEBUG
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001130 reiserfs_debug(s, REISERFS_DEBUG_CODE,
1131 "reiserquota: freeing (failed prealloc) %d blocks id=%u",
1132 amount_needed + hint->prealloc_size -
1133 nr_allocated -
1134 REISERFS_I(hint->inode)->i_prealloc_count,
1135 hint->inode->i_uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001137 DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed +
1138 hint->prealloc_size - nr_allocated -
1139 REISERFS_I(hint->inode)->
1140 i_prealloc_count);
1141 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001143 return CARRY_ON;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144}
1145
1146/* grab new blocknrs from preallocated list */
1147/* return amount still needed after using them */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001148static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
1149 b_blocknr_t * new_blocknrs,
1150 int amount_needed)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001152 struct inode *inode = hint->inode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001154 if (REISERFS_I(inode)->i_prealloc_count > 0) {
1155 while (amount_needed) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001157 *new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
1158 REISERFS_I(inode)->i_prealloc_count--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001160 amount_needed--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001162 if (REISERFS_I(inode)->i_prealloc_count <= 0) {
1163 list_del(&REISERFS_I(inode)->i_prealloc_list);
1164 break;
1165 }
1166 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001168 /* return amount still needed after using preallocated blocks */
1169 return amount_needed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170}
1171
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001172int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us /* Amount of blocks we have
1173 already reserved */ )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001175 int initial_amount_needed = amount_needed;
1176 int ret;
1177 struct super_block *s = hint->th->t_super;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001179 /* Check if there is enough space, taking into account reserved space */
1180 if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
1181 amount_needed - reserved_by_us)
1182 return NO_DISK_SPACE;
1183 /* should this be if !hint->inode && hint->preallocate? */
1184 /* do you mean hint->formatted_node can be removed ? - Zam */
1185 /* hint->formatted_node cannot be removed because we try to access
1186 inode information here, and there is often no inode assotiated with
1187 metadata allocations - green */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001189 if (!hint->formatted_node && hint->preallocate) {
1190 amount_needed = use_preallocated_list_if_available
1191 (hint, new_blocknrs, amount_needed);
1192 if (amount_needed == 0) /* all blocknrs we need we got from
1193 prealloc. list */
1194 return CARRY_ON;
1195 new_blocknrs += (initial_amount_needed - amount_needed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001197
1198 /* find search start and save it in hint structure */
1199 determine_search_start(hint, amount_needed);
1200 if (hint->search_start >= SB_BLOCK_COUNT(s))
1201 hint->search_start = SB_BLOCK_COUNT(s) - 1;
1202
1203 /* allocation itself; fill new_blocknrs and preallocation arrays */
1204 ret = blocknrs_and_prealloc_arrays_from_search_start
1205 (hint, new_blocknrs, amount_needed);
1206
1207 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
1208 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
1209 * variant) */
1210
1211 if (ret != CARRY_ON) {
1212 while (amount_needed++ < initial_amount_needed) {
1213 reiserfs_free_block(hint->th, hint->inode,
1214 *(--new_blocknrs), 1);
1215 }
1216 }
1217 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218}
1219
1220/* These 2 functions are here to provide blocks reservation to the rest of kernel */
1221/* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
1222 there are actually this much blocks on the FS available */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001223void reiserfs_claim_blocks_to_be_allocated(struct super_block *sb, /* super block of
1224 filesystem where
1225 blocks should be
1226 reserved */
1227 int blocks /* How much to reserve */
1228 )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229{
1230
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001231 /* Fast case, if reservation is zero - exit immediately. */
1232 if (!blocks)
1233 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001235 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1236 REISERFS_SB(sb)->reserved_blocks += blocks;
1237 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238}
1239
1240/* Unreserve @blocks amount of blocks in fs pointed by @sb */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001241void reiserfs_release_claimed_blocks(struct super_block *sb, /* super block of
1242 filesystem where
1243 blocks should be
1244 reserved */
1245 int blocks /* How much to unreserve */
1246 )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247{
1248
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001249 /* Fast case, if unreservation is zero - exit immediately. */
1250 if (!blocks)
1251 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001253 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1254 REISERFS_SB(sb)->reserved_blocks -= blocks;
1255 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1256 RFALSE(REISERFS_SB(sb)->reserved_blocks < 0,
1257 "amount of blocks reserved became zero?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258}
1259
1260/* This function estimates how much pages we will be able to write to FS
1261 used for reiserfs_file_write() purposes for now. */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001262int reiserfs_can_fit_pages(struct super_block *sb /* superblock of filesystem
1263 to estimate space */ )
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264{
1265 int space;
1266
1267 spin_lock(&REISERFS_SB(sb)->bitmap_lock);
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001268 space =
1269 (SB_FREE_BLOCKS(sb) -
1270 REISERFS_SB(sb)->reserved_blocks) >> (PAGE_CACHE_SHIFT -
1271 sb->s_blocksize_bits);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
1273
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001274 return space > 0 ? space : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275}