blob: c739626f5bf1181fd29920333b64f152e495d3a2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) International Business Machines Corp., 2000-2004
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include <linux/fs.h>
20#include "jfs_incore.h"
21#include "jfs_superblock.h"
22#include "jfs_dmap.h"
23#include "jfs_imap.h"
24#include "jfs_lock.h"
25#include "jfs_metapage.h"
26#include "jfs_debug.h"
27
28/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070029 * SERIALIZATION of the Block Allocation Map.
30 *
31 * the working state of the block allocation map is accessed in
32 * two directions:
33 *
34 * 1) allocation and free requests that start at the dmap
35 * level and move up through the dmap control pages (i.e.
36 * the vast majority of requests).
37 *
38 * 2) allocation requests that start at dmap control page
39 * level and work down towards the dmaps.
40 *
41 * the serialization scheme used here is as follows.
42 *
43 * requests which start at the bottom are serialized against each
44 * other through buffers and each requests holds onto its buffers
45 * as it works it way up from a single dmap to the required level
46 * of dmap control page.
47 * requests that start at the top are serialized against each other
48 * and request that start from the bottom by the multiple read/single
49 * write inode lock of the bmap inode. requests starting at the top
50 * take this lock in write mode while request starting at the bottom
51 * take the lock in read mode. a single top-down request may proceed
52 * exclusively while multiple bottoms-up requests may proceed
53 * simultaneously (under the protection of busy buffers).
54 *
55 * in addition to information found in dmaps and dmap control pages,
56 * the working state of the block allocation map also includes read/
57 * write information maintained in the bmap descriptor (i.e. total
58 * free block count, allocation group level free block counts).
59 * a single exclusive lock (BMAP_LOCK) is used to guard this information
60 * in the face of multiple-bottoms up requests.
61 * (lock ordering: IREAD_LOCK, BMAP_LOCK);
62 *
63 * accesses to the persistent state of the block allocation map (limited
64 * to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
65 */
66
67#define BMAP_LOCK_INIT(bmp) init_MUTEX(&bmp->db_bmaplock)
68#define BMAP_LOCK(bmp) down(&bmp->db_bmaplock)
69#define BMAP_UNLOCK(bmp) up(&bmp->db_bmaplock)
70
71/*
72 * forward references
73 */
74static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
75 int nblocks);
76static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
77static void dbBackSplit(dmtree_t * tp, int leafno);
Dave Kleikamp56d12542005-07-15 09:43:36 -050078static int dbJoin(dmtree_t * tp, int leafno, int newval);
Linus Torvalds1da177e2005-04-16 15:20:36 -070079static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
80static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
81 int level);
82static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results);
83static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
84 int nblocks);
85static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno,
86 int nblocks,
87 int l2nb, s64 * results);
88static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
89 int nblocks);
90static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks,
91 int l2nb,
92 s64 * results);
93static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb,
94 s64 * results);
95static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno,
96 s64 * results);
97static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
98static int dbFindBits(u32 word, int l2nb);
99static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
100static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
Dave Kleikamp56d12542005-07-15 09:43:36 -0500101static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
102 int nblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
104 int nblocks);
105static int dbMaxBud(u8 * cp);
106s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
107static int blkstol2(s64 nb);
108
109static int cntlz(u32 value);
110static int cnttz(u32 word);
111
112static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
113 int nblocks);
114static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks);
115static int dbInitDmapTree(struct dmap * dp);
116static int dbInitTree(struct dmaptree * dtp);
117static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i);
118static int dbGetL2AGSize(s64 nblocks);
119
120/*
121 * buddy table
122 *
123 * table used for determining buddy sizes within characters of
124 * dmap bitmap words. the characters themselves serve as indexes
125 * into the table, with the table elements yielding the maximum
126 * binary buddy of free bits within the character.
127 */
128static s8 budtab[256] = {
129 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
135 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
136 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
137 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
139 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
140 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
141 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
143 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
144 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
145};
146
147
148/*
149 * NAME: dbMount()
150 *
151 * FUNCTION: initializate the block allocation map.
152 *
153 * memory is allocated for the in-core bmap descriptor and
154 * the in-core descriptor is initialized from disk.
155 *
156 * PARAMETERS:
157 * ipbmap - pointer to in-core inode for the block map.
158 *
159 * RETURN VALUES:
160 * 0 - success
161 * -ENOMEM - insufficient memory
162 * -EIO - i/o error
163 */
164int dbMount(struct inode *ipbmap)
165{
166 struct bmap *bmp;
167 struct dbmap_disk *dbmp_le;
168 struct metapage *mp;
169 int i;
170
171 /*
172 * allocate/initialize the in-memory bmap descriptor
173 */
174 /* allocate memory for the in-memory bmap descriptor */
175 bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL);
176 if (bmp == NULL)
177 return -ENOMEM;
178
179 /* read the on-disk bmap descriptor. */
180 mp = read_metapage(ipbmap,
181 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
182 PSIZE, 0);
183 if (mp == NULL) {
184 kfree(bmp);
185 return -EIO;
186 }
187
188 /* copy the on-disk bmap descriptor to its in-memory version. */
189 dbmp_le = (struct dbmap_disk *) mp->data;
190 bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
191 bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
192 bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
193 bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
194 bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
195 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
196 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
197 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
198 bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth);
199 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
200 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
201 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
202 for (i = 0; i < MAXAG; i++)
203 bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
204 bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
205 bmp->db_maxfreebud = dbmp_le->dn_maxfreebud;
206
207 /* release the buffer. */
208 release_metapage(mp);
209
210 /* bind the bmap inode and the bmap descriptor to each other. */
211 bmp->db_ipbmap = ipbmap;
212 JFS_SBI(ipbmap->i_sb)->bmap = bmp;
213
214 memset(bmp->db_active, 0, sizeof(bmp->db_active));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
216 /*
217 * allocate/initialize the bmap lock
218 */
219 BMAP_LOCK_INIT(bmp);
220
221 return (0);
222}
223
224
225/*
226 * NAME: dbUnmount()
227 *
228 * FUNCTION: terminate the block allocation map in preparation for
229 * file system unmount.
230 *
231 * the in-core bmap descriptor is written to disk and
232 * the memory for this descriptor is freed.
233 *
234 * PARAMETERS:
235 * ipbmap - pointer to in-core inode for the block map.
236 *
237 * RETURN VALUES:
238 * 0 - success
239 * -EIO - i/o error
240 */
241int dbUnmount(struct inode *ipbmap, int mounterror)
242{
243 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244
245 if (!(mounterror || isReadOnly(ipbmap)))
246 dbSync(ipbmap);
247
248 /*
249 * Invalidate the page cache buffers
250 */
251 truncate_inode_pages(ipbmap->i_mapping, 0);
252
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 /* free the memory for the in-memory bmap. */
254 kfree(bmp);
255
256 return (0);
257}
258
259/*
260 * dbSync()
261 */
262int dbSync(struct inode *ipbmap)
263{
264 struct dbmap_disk *dbmp_le;
265 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
266 struct metapage *mp;
267 int i;
268
269 /*
270 * write bmap global control page
271 */
272 /* get the buffer for the on-disk bmap descriptor. */
273 mp = read_metapage(ipbmap,
274 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
275 PSIZE, 0);
276 if (mp == NULL) {
277 jfs_err("dbSync: read_metapage failed!");
278 return -EIO;
279 }
280 /* copy the in-memory version of the bmap to the on-disk version */
281 dbmp_le = (struct dbmap_disk *) mp->data;
282 dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize);
283 dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree);
284 dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage);
285 dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag);
286 dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel);
287 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
288 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
289 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
290 dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth);
291 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
292 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
293 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
294 for (i = 0; i < MAXAG; i++)
295 dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]);
296 dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize);
297 dbmp_le->dn_maxfreebud = bmp->db_maxfreebud;
298
299 /* write the buffer */
300 write_metapage(mp);
301
302 /*
303 * write out dirty pages of bmap
304 */
305 filemap_fdatawrite(ipbmap->i_mapping);
306 filemap_fdatawait(ipbmap->i_mapping);
307
308 ipbmap->i_state |= I_DIRTY;
309 diWriteSpecial(ipbmap, 0);
310
311 return (0);
312}
313
314
315/*
316 * NAME: dbFree()
317 *
318 * FUNCTION: free the specified block range from the working block
319 * allocation map.
320 *
321 * the blocks will be free from the working map one dmap
322 * at a time.
323 *
324 * PARAMETERS:
325 * ip - pointer to in-core inode;
326 * blkno - starting block number to be freed.
327 * nblocks - number of blocks to be freed.
328 *
329 * RETURN VALUES:
330 * 0 - success
331 * -EIO - i/o error
332 */
333int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
334{
335 struct metapage *mp;
336 struct dmap *dp;
337 int nb, rc;
338 s64 lblkno, rem;
339 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
340 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
341
342 IREAD_LOCK(ipbmap);
343
344 /* block to be freed better be within the mapsize. */
345 if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) {
346 IREAD_UNLOCK(ipbmap);
347 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
348 (unsigned long long) blkno,
349 (unsigned long long) nblocks);
350 jfs_error(ip->i_sb,
351 "dbFree: block to be freed is outside the map");
352 return -EIO;
353 }
354
355 /*
356 * free the blocks a dmap at a time.
357 */
358 mp = NULL;
359 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
360 /* release previous dmap if any */
361 if (mp) {
362 write_metapage(mp);
363 }
364
365 /* get the buffer for the current dmap. */
366 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
367 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
368 if (mp == NULL) {
369 IREAD_UNLOCK(ipbmap);
370 return -EIO;
371 }
372 dp = (struct dmap *) mp->data;
373
374 /* determine the number of blocks to be freed from
375 * this dmap.
376 */
377 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
378
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 /* free the blocks. */
380 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
Dave Kleikamp56d12542005-07-15 09:43:36 -0500381 jfs_error(ip->i_sb, "dbFree: error in block map\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 release_metapage(mp);
383 IREAD_UNLOCK(ipbmap);
384 return (rc);
385 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 }
387
388 /* write the last buffer. */
389 write_metapage(mp);
390
391 IREAD_UNLOCK(ipbmap);
392
393 return (0);
394}
395
396
397/*
398 * NAME: dbUpdatePMap()
399 *
400 * FUNCTION: update the allocation state (free or allocate) of the
401 * specified block range in the persistent block allocation map.
402 *
403 * the blocks will be updated in the persistent map one
404 * dmap at a time.
405 *
406 * PARAMETERS:
407 * ipbmap - pointer to in-core inode for the block map.
408 * free - TRUE if block range is to be freed from the persistent
409 * map; FALSE if it is to be allocated.
410 * blkno - starting block number of the range.
411 * nblocks - number of contiguous blocks in the range.
412 * tblk - transaction block;
413 *
414 * RETURN VALUES:
415 * 0 - success
416 * -EIO - i/o error
417 */
418int
419dbUpdatePMap(struct inode *ipbmap,
420 int free, s64 blkno, s64 nblocks, struct tblock * tblk)
421{
422 int nblks, dbitno, wbitno, rbits;
423 int word, nbits, nwords;
424 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
425 s64 lblkno, rem, lastlblkno;
426 u32 mask;
427 struct dmap *dp;
428 struct metapage *mp;
429 struct jfs_log *log;
430 int lsn, difft, diffp;
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600431 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
433 /* the blocks better be within the mapsize. */
434 if (blkno + nblocks > bmp->db_mapsize) {
435 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
436 (unsigned long long) blkno,
437 (unsigned long long) nblocks);
438 jfs_error(ipbmap->i_sb,
439 "dbUpdatePMap: blocks are outside the map");
440 return -EIO;
441 }
442
443 /* compute delta of transaction lsn from log syncpt */
444 lsn = tblk->lsn;
445 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
446 logdiff(difft, lsn, log);
447
448 /*
449 * update the block state a dmap at a time.
450 */
451 mp = NULL;
452 lastlblkno = 0;
453 for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) {
454 /* get the buffer for the current dmap. */
455 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
456 if (lblkno != lastlblkno) {
457 if (mp) {
458 write_metapage(mp);
459 }
460
461 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE,
462 0);
463 if (mp == NULL)
464 return -EIO;
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600465 metapage_wait_for_io(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 }
467 dp = (struct dmap *) mp->data;
468
469 /* determine the bit number and word within the dmap of
470 * the starting block. also determine how many blocks
471 * are to be updated within this dmap.
472 */
473 dbitno = blkno & (BPERDMAP - 1);
474 word = dbitno >> L2DBWORD;
475 nblks = min(rem, (s64)BPERDMAP - dbitno);
476
477 /* update the bits of the dmap words. the first and last
478 * words may only have a subset of their bits updated. if
479 * this is the case, we'll work against that word (i.e.
480 * partial first and/or last) only in a single pass. a
481 * single pass will also be used to update all words that
482 * are to have all their bits updated.
483 */
484 for (rbits = nblks; rbits > 0;
485 rbits -= nbits, dbitno += nbits) {
486 /* determine the bit number within the word and
487 * the number of bits within the word.
488 */
489 wbitno = dbitno & (DBWORD - 1);
490 nbits = min(rbits, DBWORD - wbitno);
491
492 /* check if only part of the word is to be updated. */
493 if (nbits < DBWORD) {
494 /* update (free or allocate) the bits
495 * in this word.
496 */
497 mask =
498 (ONES << (DBWORD - nbits) >> wbitno);
499 if (free)
500 dp->pmap[word] &=
501 cpu_to_le32(~mask);
502 else
503 dp->pmap[word] |=
504 cpu_to_le32(mask);
505
506 word += 1;
507 } else {
508 /* one or more words are to have all
509 * their bits updated. determine how
510 * many words and how many bits.
511 */
512 nwords = rbits >> L2DBWORD;
513 nbits = nwords << L2DBWORD;
514
515 /* update (free or allocate) the bits
516 * in these words.
517 */
518 if (free)
519 memset(&dp->pmap[word], 0,
520 nwords * 4);
521 else
522 memset(&dp->pmap[word], (int) ONES,
523 nwords * 4);
524
525 word += nwords;
526 }
527 }
528
529 /*
530 * update dmap lsn
531 */
532 if (lblkno == lastlblkno)
533 continue;
534
535 lastlblkno = lblkno;
536
537 if (mp->lsn != 0) {
538 /* inherit older/smaller lsn */
539 logdiff(diffp, mp->lsn, log);
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600540 LOGSYNC_LOCK(log, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 if (difft < diffp) {
542 mp->lsn = lsn;
543
544 /* move bp after tblock in logsync list */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 list_move(&mp->synclist, &tblk->synclist);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 }
547
548 /* inherit younger/larger clsn */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 logdiff(difft, tblk->clsn, log);
550 logdiff(diffp, mp->clsn, log);
551 if (difft > diffp)
552 mp->clsn = tblk->clsn;
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600553 LOGSYNC_UNLOCK(log, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 } else {
555 mp->log = log;
556 mp->lsn = lsn;
557
558 /* insert bp after tblock in logsync list */
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600559 LOGSYNC_LOCK(log, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
561 log->count++;
562 list_add(&mp->synclist, &tblk->synclist);
563
564 mp->clsn = tblk->clsn;
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600565 LOGSYNC_UNLOCK(log, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 }
567 }
568
569 /* write the last buffer. */
570 if (mp) {
571 write_metapage(mp);
572 }
573
574 return (0);
575}
576
577
578/*
579 * NAME: dbNextAG()
580 *
581 * FUNCTION: find the preferred allocation group for new allocations.
582 *
583 * Within the allocation groups, we maintain a preferred
584 * allocation group which consists of a group with at least
585 * average free space. It is the preferred group that we target
586 * new inode allocation towards. The tie-in between inode
587 * allocation and block allocation occurs as we allocate the
588 * first (data) block of an inode and specify the inode (block)
589 * as the allocation hint for this block.
590 *
591 * We try to avoid having more than one open file growing in
592 * an allocation group, as this will lead to fragmentation.
593 * This differs from the old OS/2 method of trying to keep
594 * empty ags around for large allocations.
595 *
596 * PARAMETERS:
597 * ipbmap - pointer to in-core inode for the block map.
598 *
599 * RETURN VALUES:
600 * the preferred allocation group number.
601 */
602int dbNextAG(struct inode *ipbmap)
603{
604 s64 avgfree;
605 int agpref;
606 s64 hwm = 0;
607 int i;
608 int next_best = -1;
609 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
610
611 BMAP_LOCK(bmp);
612
613 /* determine the average number of free blocks within the ags. */
614 avgfree = (u32)bmp->db_nfree / bmp->db_numag;
615
616 /*
617 * if the current preferred ag does not have an active allocator
618 * and has at least average freespace, return it
619 */
620 agpref = bmp->db_agpref;
621 if ((atomic_read(&bmp->db_active[agpref]) == 0) &&
622 (bmp->db_agfree[agpref] >= avgfree))
623 goto unlock;
624
625 /* From the last preferred ag, find the next one with at least
626 * average free space.
627 */
628 for (i = 0 ; i < bmp->db_numag; i++, agpref++) {
629 if (agpref == bmp->db_numag)
630 agpref = 0;
631
632 if (atomic_read(&bmp->db_active[agpref]))
633 /* open file is currently growing in this ag */
634 continue;
635 if (bmp->db_agfree[agpref] >= avgfree) {
636 /* Return this one */
637 bmp->db_agpref = agpref;
638 goto unlock;
639 } else if (bmp->db_agfree[agpref] > hwm) {
640 /* Less than avg. freespace, but best so far */
641 hwm = bmp->db_agfree[agpref];
642 next_best = agpref;
643 }
644 }
645
646 /*
647 * If no inactive ag was found with average freespace, use the
648 * next best
649 */
650 if (next_best != -1)
651 bmp->db_agpref = next_best;
652 /* else leave db_agpref unchanged */
653unlock:
654 BMAP_UNLOCK(bmp);
655
656 /* return the preferred group.
657 */
658 return (bmp->db_agpref);
659}
660
661/*
662 * NAME: dbAlloc()
663 *
664 * FUNCTION: attempt to allocate a specified number of contiguous free
665 * blocks from the working allocation block map.
666 *
667 * the block allocation policy uses hints and a multi-step
668 * approach.
669 *
670 * for allocation requests smaller than the number of blocks
671 * per dmap, we first try to allocate the new blocks
672 * immediately following the hint. if these blocks are not
673 * available, we try to allocate blocks near the hint. if
674 * no blocks near the hint are available, we next try to
675 * allocate within the same dmap as contains the hint.
676 *
677 * if no blocks are available in the dmap or the allocation
678 * request is larger than the dmap size, we try to allocate
679 * within the same allocation group as contains the hint. if
680 * this does not succeed, we finally try to allocate anywhere
681 * within the aggregate.
682 *
683 * we also try to allocate anywhere within the aggregate for
684 * for allocation requests larger than the allocation group
685 * size or requests that specify no hint value.
686 *
687 * PARAMETERS:
688 * ip - pointer to in-core inode;
689 * hint - allocation hint.
690 * nblocks - number of contiguous blocks in the range.
691 * results - on successful return, set to the starting block number
692 * of the newly allocated contiguous range.
693 *
694 * RETURN VALUES:
695 * 0 - success
696 * -ENOSPC - insufficient disk resources
697 * -EIO - i/o error
698 */
699int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
700{
701 int rc, agno;
702 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
703 struct bmap *bmp;
704 struct metapage *mp;
705 s64 lblkno, blkno;
706 struct dmap *dp;
707 int l2nb;
708 s64 mapSize;
709 int writers;
710
711 /* assert that nblocks is valid */
712 assert(nblocks > 0);
713
714#ifdef _STILL_TO_PORT
715 /* DASD limit check F226941 */
716 if (OVER_LIMIT(ip, nblocks))
717 return -ENOSPC;
718#endif /* _STILL_TO_PORT */
719
720 /* get the log2 number of blocks to be allocated.
721 * if the number of blocks is not a log2 multiple,
722 * it will be rounded up to the next log2 multiple.
723 */
724 l2nb = BLKSTOL2(nblocks);
725
726 bmp = JFS_SBI(ip->i_sb)->bmap;
727
728//retry: /* serialize w.r.t.extendfs() */
729 mapSize = bmp->db_mapsize;
730
731 /* the hint should be within the map */
732 if (hint >= mapSize) {
733 jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map");
734 return -EIO;
735 }
736
737 /* if the number of blocks to be allocated is greater than the
738 * allocation group size, try to allocate anywhere.
739 */
740 if (l2nb > bmp->db_agl2size) {
741 IWRITE_LOCK(ipbmap);
742
743 rc = dbAllocAny(bmp, nblocks, l2nb, results);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
745 goto write_unlock;
746 }
747
748 /*
749 * If no hint, let dbNextAG recommend an allocation group
750 */
751 if (hint == 0)
752 goto pref_ag;
753
754 /* we would like to allocate close to the hint. adjust the
755 * hint to the block following the hint since the allocators
756 * will start looking for free space starting at this point.
757 */
758 blkno = hint + 1;
759
760 if (blkno >= bmp->db_mapsize)
761 goto pref_ag;
762
763 agno = blkno >> bmp->db_agl2size;
764
765 /* check if blkno crosses over into a new allocation group.
766 * if so, check if we should allow allocations within this
767 * allocation group.
768 */
769 if ((blkno & (bmp->db_agsize - 1)) == 0)
770 /* check if the AG is currenly being written to.
771 * if so, call dbNextAG() to find a non-busy
772 * AG with sufficient free space.
773 */
774 if (atomic_read(&bmp->db_active[agno]))
775 goto pref_ag;
776
777 /* check if the allocation request size can be satisfied from a
778 * single dmap. if so, try to allocate from the dmap containing
779 * the hint using a tiered strategy.
780 */
781 if (nblocks <= BPERDMAP) {
782 IREAD_LOCK(ipbmap);
783
784 /* get the buffer for the dmap containing the hint.
785 */
786 rc = -EIO;
787 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
788 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
789 if (mp == NULL)
790 goto read_unlock;
791
792 dp = (struct dmap *) mp->data;
793
794 /* first, try to satisfy the allocation request with the
795 * blocks beginning at the hint.
796 */
797 if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks))
798 != -ENOSPC) {
799 if (rc == 0) {
800 *results = blkno;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 mark_metapage_dirty(mp);
802 }
803
804 release_metapage(mp);
805 goto read_unlock;
806 }
807
808 writers = atomic_read(&bmp->db_active[agno]);
809 if ((writers > 1) ||
810 ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) {
811 /*
812 * Someone else is writing in this allocation
813 * group. To avoid fragmenting, try another ag
814 */
815 release_metapage(mp);
816 IREAD_UNLOCK(ipbmap);
817 goto pref_ag;
818 }
819
820 /* next, try to satisfy the allocation request with blocks
821 * near the hint.
822 */
823 if ((rc =
824 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
825 != -ENOSPC) {
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500826 if (rc == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 mark_metapage_dirty(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
829 release_metapage(mp);
830 goto read_unlock;
831 }
832
833 /* try to satisfy the allocation request with blocks within
834 * the same dmap as the hint.
835 */
836 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
837 != -ENOSPC) {
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500838 if (rc == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 mark_metapage_dirty(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
841 release_metapage(mp);
842 goto read_unlock;
843 }
844
845 release_metapage(mp);
846 IREAD_UNLOCK(ipbmap);
847 }
848
849 /* try to satisfy the allocation request with blocks within
850 * the same allocation group as the hint.
851 */
852 IWRITE_LOCK(ipbmap);
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500853 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 goto write_unlock;
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500855
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 IWRITE_UNLOCK(ipbmap);
857
858
859 pref_ag:
860 /*
861 * Let dbNextAG recommend a preferred allocation group
862 */
863 agno = dbNextAG(ipbmap);
864 IWRITE_LOCK(ipbmap);
865
866 /* Try to allocate within this allocation group. if that fails, try to
867 * allocate anywhere in the map.
868 */
869 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
870 rc = dbAllocAny(bmp, nblocks, l2nb, results);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
872 write_unlock:
873 IWRITE_UNLOCK(ipbmap);
874
875 return (rc);
876
877 read_unlock:
878 IREAD_UNLOCK(ipbmap);
879
880 return (rc);
881}
882
883#ifdef _NOTYET
884/*
885 * NAME: dbAllocExact()
886 *
887 * FUNCTION: try to allocate the requested extent;
888 *
889 * PARAMETERS:
890 * ip - pointer to in-core inode;
891 * blkno - extent address;
892 * nblocks - extent length;
893 *
894 * RETURN VALUES:
895 * 0 - success
896 * -ENOSPC - insufficient disk resources
897 * -EIO - i/o error
898 */
899int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
900{
901 int rc;
902 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
903 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
904 struct dmap *dp;
905 s64 lblkno;
906 struct metapage *mp;
907
908 IREAD_LOCK(ipbmap);
909
910 /*
911 * validate extent request:
912 *
913 * note: defragfs policy:
914 * max 64 blocks will be moved.
915 * allocation request size must be satisfied from a single dmap.
916 */
917 if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) {
918 IREAD_UNLOCK(ipbmap);
919 return -EINVAL;
920 }
921
922 if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) {
923 /* the free space is no longer available */
924 IREAD_UNLOCK(ipbmap);
925 return -ENOSPC;
926 }
927
928 /* read in the dmap covering the extent */
929 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
930 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
931 if (mp == NULL) {
932 IREAD_UNLOCK(ipbmap);
933 return -EIO;
934 }
935 dp = (struct dmap *) mp->data;
936
937 /* try to allocate the requested extent */
938 rc = dbAllocNext(bmp, dp, blkno, nblocks);
939
940 IREAD_UNLOCK(ipbmap);
941
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500942 if (rc == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 mark_metapage_dirty(mp);
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500944
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 release_metapage(mp);
946
947 return (rc);
948}
949#endif /* _NOTYET */
950
951/*
952 * NAME: dbReAlloc()
953 *
954 * FUNCTION: attempt to extend a current allocation by a specified
955 * number of blocks.
956 *
957 * this routine attempts to satisfy the allocation request
958 * by first trying to extend the existing allocation in
959 * place by allocating the additional blocks as the blocks
960 * immediately following the current allocation. if these
961 * blocks are not available, this routine will attempt to
962 * allocate a new set of contiguous blocks large enough
963 * to cover the existing allocation plus the additional
964 * number of blocks required.
965 *
966 * PARAMETERS:
967 * ip - pointer to in-core inode requiring allocation.
968 * blkno - starting block of the current allocation.
969 * nblocks - number of contiguous blocks within the current
970 * allocation.
971 * addnblocks - number of blocks to add to the allocation.
972 * results - on successful return, set to the starting block number
973 * of the existing allocation if the existing allocation
974 * was extended in place or to a newly allocated contiguous
975 * range if the existing allocation could not be extended
976 * in place.
977 *
978 * RETURN VALUES:
979 * 0 - success
980 * -ENOSPC - insufficient disk resources
981 * -EIO - i/o error
982 */
983int
984dbReAlloc(struct inode *ip,
985 s64 blkno, s64 nblocks, s64 addnblocks, s64 * results)
986{
987 int rc;
988
989 /* try to extend the allocation in place.
990 */
991 if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) {
992 *results = blkno;
993 return (0);
994 } else {
995 if (rc != -ENOSPC)
996 return (rc);
997 }
998
999 /* could not extend the allocation in place, so allocate a
1000 * new set of blocks for the entire request (i.e. try to get
1001 * a range of contiguous blocks large enough to cover the
1002 * existing allocation plus the additional blocks.)
1003 */
1004 return (dbAlloc
1005 (ip, blkno + nblocks - 1, addnblocks + nblocks, results));
1006}
1007
1008
1009/*
1010 * NAME: dbExtend()
1011 *
1012 * FUNCTION: attempt to extend a current allocation by a specified
1013 * number of blocks.
1014 *
1015 * this routine attempts to satisfy the allocation request
1016 * by first trying to extend the existing allocation in
1017 * place by allocating the additional blocks as the blocks
1018 * immediately following the current allocation.
1019 *
1020 * PARAMETERS:
1021 * ip - pointer to in-core inode requiring allocation.
1022 * blkno - starting block of the current allocation.
1023 * nblocks - number of contiguous blocks within the current
1024 * allocation.
1025 * addnblocks - number of blocks to add to the allocation.
1026 *
1027 * RETURN VALUES:
1028 * 0 - success
1029 * -ENOSPC - insufficient disk resources
1030 * -EIO - i/o error
1031 */
1032static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1033{
1034 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
1035 s64 lblkno, lastblkno, extblkno;
1036 uint rel_block;
1037 struct metapage *mp;
1038 struct dmap *dp;
1039 int rc;
1040 struct inode *ipbmap = sbi->ipbmap;
1041 struct bmap *bmp;
1042
1043 /*
1044 * We don't want a non-aligned extent to cross a page boundary
1045 */
1046 if (((rel_block = blkno & (sbi->nbperpage - 1))) &&
1047 (rel_block + nblocks + addnblocks > sbi->nbperpage))
1048 return -ENOSPC;
1049
1050 /* get the last block of the current allocation */
1051 lastblkno = blkno + nblocks - 1;
1052
1053 /* determine the block number of the block following
1054 * the existing allocation.
1055 */
1056 extblkno = lastblkno + 1;
1057
1058 IREAD_LOCK(ipbmap);
1059
1060 /* better be within the file system */
1061 bmp = sbi->bmap;
1062 if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) {
1063 IREAD_UNLOCK(ipbmap);
1064 jfs_error(ip->i_sb,
1065 "dbExtend: the block is outside the filesystem");
1066 return -EIO;
1067 }
1068
1069 /* we'll attempt to extend the current allocation in place by
1070 * allocating the additional blocks as the blocks immediately
1071 * following the current allocation. we only try to extend the
1072 * current allocation in place if the number of additional blocks
1073 * can fit into a dmap, the last block of the current allocation
1074 * is not the last block of the file system, and the start of the
1075 * inplace extension is not on an allocation group boundary.
1076 */
1077 if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize ||
1078 (extblkno & (bmp->db_agsize - 1)) == 0) {
1079 IREAD_UNLOCK(ipbmap);
1080 return -ENOSPC;
1081 }
1082
1083 /* get the buffer for the dmap containing the first block
1084 * of the extension.
1085 */
1086 lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage);
1087 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
1088 if (mp == NULL) {
1089 IREAD_UNLOCK(ipbmap);
1090 return -EIO;
1091 }
1092
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 dp = (struct dmap *) mp->data;
1094
1095 /* try to allocate the blocks immediately following the
1096 * current allocation.
1097 */
1098 rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks);
1099
1100 IREAD_UNLOCK(ipbmap);
1101
1102 /* were we successful ? */
Dave Kleikampb38a3ab2005-06-27 15:35:37 -05001103 if (rc == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 write_metapage(mp);
Dave Kleikampb38a3ab2005-06-27 15:35:37 -05001105 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 /* we were not successful */
1107 release_metapage(mp);
1108
1109
1110 return (rc);
1111}
1112
1113
1114/*
1115 * NAME: dbAllocNext()
1116 *
1117 * FUNCTION: attempt to allocate the blocks of the specified block
1118 * range within a dmap.
1119 *
1120 * PARAMETERS:
1121 * bmp - pointer to bmap descriptor
1122 * dp - pointer to dmap.
1123 * blkno - starting block number of the range.
1124 * nblocks - number of contiguous free blocks of the range.
1125 *
1126 * RETURN VALUES:
1127 * 0 - success
1128 * -ENOSPC - insufficient disk resources
1129 * -EIO - i/o error
1130 *
1131 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1132 */
1133static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
1134 int nblocks)
1135{
1136 int dbitno, word, rembits, nb, nwords, wbitno, nw;
1137 int l2size;
1138 s8 *leaf;
1139 u32 mask;
1140
1141 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
1142 jfs_error(bmp->db_ipbmap->i_sb,
1143 "dbAllocNext: Corrupt dmap page");
1144 return -EIO;
1145 }
1146
1147 /* pick up a pointer to the leaves of the dmap tree.
1148 */
1149 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
1150
1151 /* determine the bit number and word within the dmap of the
1152 * starting block.
1153 */
1154 dbitno = blkno & (BPERDMAP - 1);
1155 word = dbitno >> L2DBWORD;
1156
1157 /* check if the specified block range is contained within
1158 * this dmap.
1159 */
1160 if (dbitno + nblocks > BPERDMAP)
1161 return -ENOSPC;
1162
1163 /* check if the starting leaf indicates that anything
1164 * is free.
1165 */
1166 if (leaf[word] == NOFREE)
1167 return -ENOSPC;
1168
1169 /* check the dmaps words corresponding to block range to see
1170 * if the block range is free. not all bits of the first and
1171 * last words may be contained within the block range. if this
1172 * is the case, we'll work against those words (i.e. partial first
1173 * and/or last) on an individual basis (a single pass) and examine
1174 * the actual bits to determine if they are free. a single pass
1175 * will be used for all dmap words fully contained within the
1176 * specified range. within this pass, the leaves of the dmap
1177 * tree will be examined to determine if the blocks are free. a
1178 * single leaf may describe the free space of multiple dmap
1179 * words, so we may visit only a subset of the actual leaves
1180 * corresponding to the dmap words of the block range.
1181 */
1182 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
1183 /* determine the bit number within the word and
1184 * the number of bits within the word.
1185 */
1186 wbitno = dbitno & (DBWORD - 1);
1187 nb = min(rembits, DBWORD - wbitno);
1188
1189 /* check if only part of the word is to be examined.
1190 */
1191 if (nb < DBWORD) {
1192 /* check if the bits are free.
1193 */
1194 mask = (ONES << (DBWORD - nb) >> wbitno);
1195 if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask)
1196 return -ENOSPC;
1197
1198 word += 1;
1199 } else {
1200 /* one or more dmap words are fully contained
1201 * within the block range. determine how many
1202 * words and how many bits.
1203 */
1204 nwords = rembits >> L2DBWORD;
1205 nb = nwords << L2DBWORD;
1206
1207 /* now examine the appropriate leaves to determine
1208 * if the blocks are free.
1209 */
1210 while (nwords > 0) {
1211 /* does the leaf describe any free space ?
1212 */
1213 if (leaf[word] < BUDMIN)
1214 return -ENOSPC;
1215
1216 /* determine the l2 number of bits provided
1217 * by this leaf.
1218 */
1219 l2size =
1220 min((int)leaf[word], NLSTOL2BSZ(nwords));
1221
1222 /* determine how many words were handled.
1223 */
1224 nw = BUDSIZE(l2size, BUDMIN);
1225
1226 nwords -= nw;
1227 word += nw;
1228 }
1229 }
1230 }
1231
1232 /* allocate the blocks.
1233 */
1234 return (dbAllocDmap(bmp, dp, blkno, nblocks));
1235}
1236
1237
1238/*
1239 * NAME: dbAllocNear()
1240 *
1241 * FUNCTION: attempt to allocate a number of contiguous free blocks near
1242 * a specified block (hint) within a dmap.
1243 *
1244 * starting with the dmap leaf that covers the hint, we'll
1245 * check the next four contiguous leaves for sufficient free
1246 * space. if sufficient free space is found, we'll allocate
1247 * the desired free space.
1248 *
1249 * PARAMETERS:
1250 * bmp - pointer to bmap descriptor
1251 * dp - pointer to dmap.
1252 * blkno - block number to allocate near.
1253 * nblocks - actual number of contiguous free blocks desired.
1254 * l2nb - log2 number of contiguous free blocks desired.
1255 * results - on successful return, set to the starting block number
1256 * of the newly allocated range.
1257 *
1258 * RETURN VALUES:
1259 * 0 - success
1260 * -ENOSPC - insufficient disk resources
1261 * -EIO - i/o error
1262 *
1263 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1264 */
1265static int
1266dbAllocNear(struct bmap * bmp,
1267 struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results)
1268{
1269 int word, lword, rc;
1270 s8 *leaf;
1271
1272 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
1273 jfs_error(bmp->db_ipbmap->i_sb,
1274 "dbAllocNear: Corrupt dmap page");
1275 return -EIO;
1276 }
1277
1278 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
1279
1280 /* determine the word within the dmap that holds the hint
1281 * (i.e. blkno). also, determine the last word in the dmap
1282 * that we'll include in our examination.
1283 */
1284 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
1285 lword = min(word + 4, LPERDMAP);
1286
1287 /* examine the leaves for sufficient free space.
1288 */
1289 for (; word < lword; word++) {
1290 /* does the leaf describe sufficient free space ?
1291 */
1292 if (leaf[word] < l2nb)
1293 continue;
1294
1295 /* determine the block number within the file system
1296 * of the first block described by this dmap word.
1297 */
1298 blkno = le64_to_cpu(dp->start) + (word << L2DBWORD);
1299
1300 /* if not all bits of the dmap word are free, get the
1301 * starting bit number within the dmap word of the required
1302 * string of free bits and adjust the block number with the
1303 * value.
1304 */
1305 if (leaf[word] < BUDMIN)
1306 blkno +=
1307 dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb);
1308
1309 /* allocate the blocks.
1310 */
1311 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
1312 *results = blkno;
1313
1314 return (rc);
1315 }
1316
1317 return -ENOSPC;
1318}
1319
1320
1321/*
1322 * NAME: dbAllocAG()
1323 *
1324 * FUNCTION: attempt to allocate the specified number of contiguous
1325 * free blocks within the specified allocation group.
1326 *
1327 * unless the allocation group size is equal to the number
1328 * of blocks per dmap, the dmap control pages will be used to
1329 * find the required free space, if available. we start the
1330 * search at the highest dmap control page level which
1331 * distinctly describes the allocation group's free space
1332 * (i.e. the highest level at which the allocation group's
1333 * free space is not mixed in with that of any other group).
1334 * in addition, we start the search within this level at a
1335 * height of the dmapctl dmtree at which the nodes distinctly
1336 * describe the allocation group's free space. at this height,
1337 * the allocation group's free space may be represented by 1
1338 * or two sub-trees, depending on the allocation group size.
1339 * we search the top nodes of these subtrees left to right for
1340 * sufficient free space. if sufficient free space is found,
1341 * the subtree is searched to find the leftmost leaf that
1342 * has free space. once we have made it to the leaf, we
1343 * move the search to the next lower level dmap control page
1344 * corresponding to this leaf. we continue down the dmap control
1345 * pages until we find the dmap that contains or starts the
1346 * sufficient free space and we allocate at this dmap.
1347 *
1348 * if the allocation group size is equal to the dmap size,
1349 * we'll start at the dmap corresponding to the allocation
1350 * group and attempt the allocation at this level.
1351 *
1352 * the dmap control page search is also not performed if the
1353 * allocation group is completely free and we go to the first
1354 * dmap of the allocation group to do the allocation. this is
1355 * done because the allocation group may be part (not the first
1356 * part) of a larger binary buddy system, causing the dmap
1357 * control pages to indicate no free space (NOFREE) within
1358 * the allocation group.
1359 *
1360 * PARAMETERS:
1361 * bmp - pointer to bmap descriptor
1362 * agno - allocation group number.
1363 * nblocks - actual number of contiguous free blocks desired.
1364 * l2nb - log2 number of contiguous free blocks desired.
1365 * results - on successful return, set to the starting block number
1366 * of the newly allocated range.
1367 *
1368 * RETURN VALUES:
1369 * 0 - success
1370 * -ENOSPC - insufficient disk resources
1371 * -EIO - i/o error
1372 *
1373 * note: IWRITE_LOCK(ipmap) held on entry/exit;
1374 */
1375static int
1376dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1377{
1378 struct metapage *mp;
1379 struct dmapctl *dcp;
1380 int rc, ti, i, k, m, n, agperlev;
1381 s64 blkno, lblkno;
1382 int budmin;
1383
1384 /* allocation request should not be for more than the
1385 * allocation group size.
1386 */
1387 if (l2nb > bmp->db_agl2size) {
1388 jfs_error(bmp->db_ipbmap->i_sb,
1389 "dbAllocAG: allocation request is larger than the "
1390 "allocation group size");
1391 return -EIO;
1392 }
1393
1394 /* determine the starting block number of the allocation
1395 * group.
1396 */
1397 blkno = (s64) agno << bmp->db_agl2size;
1398
1399 /* check if the allocation group size is the minimum allocation
1400 * group size or if the allocation group is completely free. if
1401 * the allocation group size is the minimum size of BPERDMAP (i.e.
1402 * 1 dmap), there is no need to search the dmap control page (below)
1403 * that fully describes the allocation group since the allocation
1404 * group is already fully described by a dmap. in this case, we
1405 * just call dbAllocCtl() to search the dmap tree and allocate the
1406 * required space if available.
1407 *
1408 * if the allocation group is completely free, dbAllocCtl() is
1409 * also called to allocate the required space. this is done for
1410 * two reasons. first, it makes no sense searching the dmap control
1411 * pages for free space when we know that free space exists. second,
1412 * the dmap control pages may indicate that the allocation group
1413 * has no free space if the allocation group is part (not the first
1414 * part) of a larger binary buddy system.
1415 */
1416 if (bmp->db_agsize == BPERDMAP
1417 || bmp->db_agfree[agno] == bmp->db_agsize) {
1418 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1419 if ((rc == -ENOSPC) &&
1420 (bmp->db_agfree[agno] == bmp->db_agsize)) {
1421 printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n",
1422 (unsigned long long) blkno,
1423 (unsigned long long) nblocks);
1424 jfs_error(bmp->db_ipbmap->i_sb,
1425 "dbAllocAG: dbAllocCtl failed in free AG");
1426 }
1427 return (rc);
1428 }
1429
1430 /* the buffer for the dmap control page that fully describes the
1431 * allocation group.
1432 */
1433 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel);
1434 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1435 if (mp == NULL)
1436 return -EIO;
1437 dcp = (struct dmapctl *) mp->data;
1438 budmin = dcp->budmin;
1439
1440 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
1441 jfs_error(bmp->db_ipbmap->i_sb,
1442 "dbAllocAG: Corrupt dmapctl page");
1443 release_metapage(mp);
1444 return -EIO;
1445 }
1446
1447 /* search the subtree(s) of the dmap control page that describes
1448 * the allocation group, looking for sufficient free space. to begin,
1449 * determine how many allocation groups are represented in a dmap
1450 * control page at the control page level (i.e. L0, L1, L2) that
1451 * fully describes an allocation group. next, determine the starting
1452 * tree index of this allocation group within the control page.
1453 */
1454 agperlev =
1455 (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth;
1456 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
1457
1458 /* dmap control page trees fan-out by 4 and a single allocation
1459 * group may be described by 1 or 2 subtrees within the ag level
1460 * dmap control page, depending upon the ag size. examine the ag's
1461 * subtrees for sufficient free space, starting with the leftmost
1462 * subtree.
1463 */
1464 for (i = 0; i < bmp->db_agwidth; i++, ti++) {
1465 /* is there sufficient free space ?
1466 */
1467 if (l2nb > dcp->stree[ti])
1468 continue;
1469
1470 /* sufficient free space found in a subtree. now search down
1471 * the subtree to find the leftmost leaf that describes this
1472 * free space.
1473 */
1474 for (k = bmp->db_agheigth; k > 0; k--) {
1475 for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
1476 if (l2nb <= dcp->stree[m + n]) {
1477 ti = m + n;
1478 break;
1479 }
1480 }
1481 if (n == 4) {
1482 jfs_error(bmp->db_ipbmap->i_sb,
1483 "dbAllocAG: failed descending stree");
1484 release_metapage(mp);
1485 return -EIO;
1486 }
1487 }
1488
1489 /* determine the block number within the file system
1490 * that corresponds to this leaf.
1491 */
1492 if (bmp->db_aglevel == 2)
1493 blkno = 0;
1494 else if (bmp->db_aglevel == 1)
1495 blkno &= ~(MAXL1SIZE - 1);
1496 else /* bmp->db_aglevel == 0 */
1497 blkno &= ~(MAXL0SIZE - 1);
1498
1499 blkno +=
1500 ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin;
1501
1502 /* release the buffer in preparation for going down
1503 * the next level of dmap control pages.
1504 */
1505 release_metapage(mp);
1506
1507 /* check if we need to continue to search down the lower
1508 * level dmap control pages. we need to if the number of
1509 * blocks required is less than maximum number of blocks
1510 * described at the next lower level.
1511 */
1512 if (l2nb < budmin) {
1513
1514 /* search the lower level dmap control pages to get
1515 * the starting block number of the the dmap that
1516 * contains or starts off the free space.
1517 */
1518 if ((rc =
1519 dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1,
1520 &blkno))) {
1521 if (rc == -ENOSPC) {
1522 jfs_error(bmp->db_ipbmap->i_sb,
1523 "dbAllocAG: control page "
1524 "inconsistent");
1525 return -EIO;
1526 }
1527 return (rc);
1528 }
1529 }
1530
1531 /* allocate the blocks.
1532 */
1533 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1534 if (rc == -ENOSPC) {
1535 jfs_error(bmp->db_ipbmap->i_sb,
1536 "dbAllocAG: unable to allocate blocks");
1537 rc = -EIO;
1538 }
1539 return (rc);
1540 }
1541
1542 /* no space in the allocation group. release the buffer and
1543 * return -ENOSPC.
1544 */
1545 release_metapage(mp);
1546
1547 return -ENOSPC;
1548}
1549
1550
1551/*
1552 * NAME: dbAllocAny()
1553 *
1554 * FUNCTION: attempt to allocate the specified number of contiguous
1555 * free blocks anywhere in the file system.
1556 *
1557 * dbAllocAny() attempts to find the sufficient free space by
1558 * searching down the dmap control pages, starting with the
1559 * highest level (i.e. L0, L1, L2) control page. if free space
1560 * large enough to satisfy the desired free space is found, the
1561 * desired free space is allocated.
1562 *
1563 * PARAMETERS:
1564 * bmp - pointer to bmap descriptor
1565 * nblocks - actual number of contiguous free blocks desired.
1566 * l2nb - log2 number of contiguous free blocks desired.
1567 * results - on successful return, set to the starting block number
1568 * of the newly allocated range.
1569 *
1570 * RETURN VALUES:
1571 * 0 - success
1572 * -ENOSPC - insufficient disk resources
1573 * -EIO - i/o error
1574 *
1575 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1576 */
1577static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1578{
1579 int rc;
1580 s64 blkno = 0;
1581
1582 /* starting with the top level dmap control page, search
1583 * down the dmap control levels for sufficient free space.
1584 * if free space is found, dbFindCtl() returns the starting
1585 * block number of the dmap that contains or starts off the
1586 * range of free space.
1587 */
1588 if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno)))
1589 return (rc);
1590
1591 /* allocate the blocks.
1592 */
1593 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1594 if (rc == -ENOSPC) {
1595 jfs_error(bmp->db_ipbmap->i_sb,
1596 "dbAllocAny: unable to allocate blocks");
1597 return -EIO;
1598 }
1599 return (rc);
1600}
1601
1602
1603/*
1604 * NAME: dbFindCtl()
1605 *
1606 * FUNCTION: starting at a specified dmap control page level and block
1607 * number, search down the dmap control levels for a range of
1608 * contiguous free blocks large enough to satisfy an allocation
1609 * request for the specified number of free blocks.
1610 *
1611 * if sufficient contiguous free blocks are found, this routine
1612 * returns the starting block number within a dmap page that
1613 * contains or starts a range of contiqious free blocks that
1614 * is sufficient in size.
1615 *
1616 * PARAMETERS:
1617 * bmp - pointer to bmap descriptor
1618 * level - starting dmap control page level.
1619 * l2nb - log2 number of contiguous free blocks desired.
1620 * *blkno - on entry, starting block number for conducting the search.
1621 * on successful return, the first block within a dmap page
1622 * that contains or starts a range of contiguous free blocks.
1623 *
1624 * RETURN VALUES:
1625 * 0 - success
1626 * -ENOSPC - insufficient disk resources
1627 * -EIO - i/o error
1628 *
1629 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1630 */
1631static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1632{
1633 int rc, leafidx, lev;
1634 s64 b, lblkno;
1635 struct dmapctl *dcp;
1636 int budmin;
1637 struct metapage *mp;
1638
1639 /* starting at the specified dmap control page level and block
1640 * number, search down the dmap control levels for the starting
1641 * block number of a dmap page that contains or starts off
1642 * sufficient free blocks.
1643 */
1644 for (lev = level, b = *blkno; lev >= 0; lev--) {
1645 /* get the buffer of the dmap control page for the block
1646 * number and level (i.e. L0, L1, L2).
1647 */
1648 lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev);
1649 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1650 if (mp == NULL)
1651 return -EIO;
1652 dcp = (struct dmapctl *) mp->data;
1653 budmin = dcp->budmin;
1654
1655 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
1656 jfs_error(bmp->db_ipbmap->i_sb,
1657 "dbFindCtl: Corrupt dmapctl page");
1658 release_metapage(mp);
1659 return -EIO;
1660 }
1661
1662 /* search the tree within the dmap control page for
1663 * sufficent free space. if sufficient free space is found,
1664 * dbFindLeaf() returns the index of the leaf at which
1665 * free space was found.
1666 */
1667 rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx);
1668
1669 /* release the buffer.
1670 */
1671 release_metapage(mp);
1672
1673 /* space found ?
1674 */
1675 if (rc) {
1676 if (lev != level) {
1677 jfs_error(bmp->db_ipbmap->i_sb,
1678 "dbFindCtl: dmap inconsistent");
1679 return -EIO;
1680 }
1681 return -ENOSPC;
1682 }
1683
1684 /* adjust the block number to reflect the location within
1685 * the dmap control page (i.e. the leaf) at which free
1686 * space was found.
1687 */
1688 b += (((s64) leafidx) << budmin);
1689
1690 /* we stop the search at this dmap control page level if
1691 * the number of blocks required is greater than or equal
1692 * to the maximum number of blocks described at the next
1693 * (lower) level.
1694 */
1695 if (l2nb >= budmin)
1696 break;
1697 }
1698
1699 *blkno = b;
1700 return (0);
1701}
1702
1703
1704/*
1705 * NAME: dbAllocCtl()
1706 *
1707 * FUNCTION: attempt to allocate a specified number of contiguous
1708 * blocks starting within a specific dmap.
1709 *
1710 * this routine is called by higher level routines that search
1711 * the dmap control pages above the actual dmaps for contiguous
1712 * free space. the result of successful searches by these
1713 * routines are the starting block numbers within dmaps, with
1714 * the dmaps themselves containing the desired contiguous free
1715 * space or starting a contiguous free space of desired size
1716 * that is made up of the blocks of one or more dmaps. these
1717 * calls should not fail due to insufficent resources.
1718 *
1719 * this routine is called in some cases where it is not known
1720 * whether it will fail due to insufficient resources. more
1721 * specifically, this occurs when allocating from an allocation
1722 * group whose size is equal to the number of blocks per dmap.
1723 * in this case, the dmap control pages are not examined prior
1724 * to calling this routine (to save pathlength) and the call
1725 * might fail.
1726 *
1727 * for a request size that fits within a dmap, this routine relies
1728 * upon the dmap's dmtree to find the requested contiguous free
1729 * space. for request sizes that are larger than a dmap, the
1730 * requested free space will start at the first block of the
1731 * first dmap (i.e. blkno).
1732 *
1733 * PARAMETERS:
1734 * bmp - pointer to bmap descriptor
1735 * nblocks - actual number of contiguous free blocks to allocate.
1736 * l2nb - log2 number of contiguous free blocks to allocate.
1737 * blkno - starting block number of the dmap to start the allocation
1738 * from.
1739 * results - on successful return, set to the starting block number
1740 * of the newly allocated range.
1741 *
1742 * RETURN VALUES:
1743 * 0 - success
1744 * -ENOSPC - insufficient disk resources
1745 * -EIO - i/o error
1746 *
1747 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1748 */
1749static int
1750dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1751{
1752 int rc, nb;
1753 s64 b, lblkno, n;
1754 struct metapage *mp;
1755 struct dmap *dp;
1756
1757 /* check if the allocation request is confined to a single dmap.
1758 */
1759 if (l2nb <= L2BPERDMAP) {
1760 /* get the buffer for the dmap.
1761 */
1762 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
1763 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1764 if (mp == NULL)
1765 return -EIO;
1766 dp = (struct dmap *) mp->data;
1767
1768 /* try to allocate the blocks.
1769 */
1770 rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results);
1771 if (rc == 0)
1772 mark_metapage_dirty(mp);
1773
1774 release_metapage(mp);
1775
1776 return (rc);
1777 }
1778
1779 /* allocation request involving multiple dmaps. it must start on
1780 * a dmap boundary.
1781 */
1782 assert((blkno & (BPERDMAP - 1)) == 0);
1783
1784 /* allocate the blocks dmap by dmap.
1785 */
1786 for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) {
1787 /* get the buffer for the dmap.
1788 */
1789 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
1790 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1791 if (mp == NULL) {
1792 rc = -EIO;
1793 goto backout;
1794 }
1795 dp = (struct dmap *) mp->data;
1796
1797 /* the dmap better be all free.
1798 */
1799 if (dp->tree.stree[ROOT] != L2BPERDMAP) {
1800 release_metapage(mp);
1801 jfs_error(bmp->db_ipbmap->i_sb,
1802 "dbAllocCtl: the dmap is not all free");
1803 rc = -EIO;
1804 goto backout;
1805 }
1806
1807 /* determine how many blocks to allocate from this dmap.
1808 */
1809 nb = min(n, (s64)BPERDMAP);
1810
1811 /* allocate the blocks from the dmap.
1812 */
1813 if ((rc = dbAllocDmap(bmp, dp, b, nb))) {
1814 release_metapage(mp);
1815 goto backout;
1816 }
1817
1818 /* write the buffer.
1819 */
1820 write_metapage(mp);
1821 }
1822
1823 /* set the results (starting block number) and return.
1824 */
1825 *results = blkno;
1826 return (0);
1827
1828 /* something failed in handling an allocation request involving
1829 * multiple dmaps. we'll try to clean up by backing out any
1830 * allocation that has already happened for this request. if
1831 * we fail in backing out the allocation, we'll mark the file
1832 * system to indicate that blocks have been leaked.
1833 */
1834 backout:
1835
1836 /* try to backout the allocations dmap by dmap.
1837 */
1838 for (n = nblocks - n, b = blkno; n > 0;
1839 n -= BPERDMAP, b += BPERDMAP) {
1840 /* get the buffer for this dmap.
1841 */
1842 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
1843 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1844 if (mp == NULL) {
1845 /* could not back out. mark the file system
1846 * to indicate that we have leaked blocks.
1847 */
1848 jfs_error(bmp->db_ipbmap->i_sb,
1849 "dbAllocCtl: I/O Error: Block Leakage.");
1850 continue;
1851 }
1852 dp = (struct dmap *) mp->data;
1853
1854 /* free the blocks is this dmap.
1855 */
1856 if (dbFreeDmap(bmp, dp, b, BPERDMAP)) {
1857 /* could not back out. mark the file system
1858 * to indicate that we have leaked blocks.
1859 */
1860 release_metapage(mp);
1861 jfs_error(bmp->db_ipbmap->i_sb,
1862 "dbAllocCtl: Block Leakage.");
1863 continue;
1864 }
1865
1866 /* write the buffer.
1867 */
1868 write_metapage(mp);
1869 }
1870
1871 return (rc);
1872}
1873
1874
1875/*
1876 * NAME: dbAllocDmapLev()
1877 *
1878 * FUNCTION: attempt to allocate a specified number of contiguous blocks
1879 * from a specified dmap.
1880 *
1881 * this routine checks if the contiguous blocks are available.
1882 * if so, nblocks of blocks are allocated; otherwise, ENOSPC is
1883 * returned.
1884 *
1885 * PARAMETERS:
1886 * mp - pointer to bmap descriptor
1887 * dp - pointer to dmap to attempt to allocate blocks from.
1888 * l2nb - log2 number of contiguous block desired.
1889 * nblocks - actual number of contiguous block desired.
1890 * results - on successful return, set to the starting block number
1891 * of the newly allocated range.
1892 *
1893 * RETURN VALUES:
1894 * 0 - success
1895 * -ENOSPC - insufficient disk resources
1896 * -EIO - i/o error
1897 *
1898 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
1899 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
1900 */
1901static int
1902dbAllocDmapLev(struct bmap * bmp,
1903 struct dmap * dp, int nblocks, int l2nb, s64 * results)
1904{
1905 s64 blkno;
1906 int leafidx, rc;
1907
1908 /* can't be more than a dmaps worth of blocks */
1909 assert(l2nb <= L2BPERDMAP);
1910
1911 /* search the tree within the dmap page for sufficient
1912 * free space. if sufficient free space is found, dbFindLeaf()
1913 * returns the index of the leaf at which free space was found.
1914 */
1915 if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
1916 return -ENOSPC;
1917
1918 /* determine the block number within the file system corresponding
1919 * to the leaf at which free space was found.
1920 */
1921 blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD);
1922
1923 /* if not all bits of the dmap word are free, get the starting
1924 * bit number within the dmap word of the required string of free
1925 * bits and adjust the block number with this value.
1926 */
1927 if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN)
1928 blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb);
1929
1930 /* allocate the blocks */
1931 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
1932 *results = blkno;
1933
1934 return (rc);
1935}
1936
1937
1938/*
1939 * NAME: dbAllocDmap()
1940 *
1941 * FUNCTION: adjust the disk allocation map to reflect the allocation
1942 * of a specified block range within a dmap.
1943 *
1944 * this routine allocates the specified blocks from the dmap
1945 * through a call to dbAllocBits(). if the allocation of the
1946 * block range causes the maximum string of free blocks within
1947 * the dmap to change (i.e. the value of the root of the dmap's
1948 * dmtree), this routine will cause this change to be reflected
1949 * up through the appropriate levels of the dmap control pages
1950 * by a call to dbAdjCtl() for the L0 dmap control page that
1951 * covers this dmap.
1952 *
1953 * PARAMETERS:
1954 * bmp - pointer to bmap descriptor
1955 * dp - pointer to dmap to allocate the block range from.
1956 * blkno - starting block number of the block to be allocated.
1957 * nblocks - number of blocks to be allocated.
1958 *
1959 * RETURN VALUES:
1960 * 0 - success
1961 * -EIO - i/o error
1962 *
1963 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
1964 */
1965static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
1966 int nblocks)
1967{
1968 s8 oldroot;
1969 int rc;
1970
1971 /* save the current value of the root (i.e. maximum free string)
1972 * of the dmap tree.
1973 */
1974 oldroot = dp->tree.stree[ROOT];
1975
1976 /* allocate the specified (blocks) bits */
1977 dbAllocBits(bmp, dp, blkno, nblocks);
1978
1979 /* if the root has not changed, done. */
1980 if (dp->tree.stree[ROOT] == oldroot)
1981 return (0);
1982
1983 /* root changed. bubble the change up to the dmap control pages.
1984 * if the adjustment of the upper level control pages fails,
1985 * backout the bit allocation (thus making everything consistent).
1986 */
1987 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0)))
1988 dbFreeBits(bmp, dp, blkno, nblocks);
1989
1990 return (rc);
1991}
1992
1993
1994/*
1995 * NAME: dbFreeDmap()
1996 *
1997 * FUNCTION: adjust the disk allocation map to reflect the allocation
1998 * of a specified block range within a dmap.
1999 *
2000 * this routine frees the specified blocks from the dmap through
2001 * a call to dbFreeBits(). if the deallocation of the block range
2002 * causes the maximum string of free blocks within the dmap to
2003 * change (i.e. the value of the root of the dmap's dmtree), this
2004 * routine will cause this change to be reflected up through the
2005 * appropriate levels of the dmap control pages by a call to
2006 * dbAdjCtl() for the L0 dmap control page that covers this dmap.
2007 *
2008 * PARAMETERS:
2009 * bmp - pointer to bmap descriptor
2010 * dp - pointer to dmap to free the block range from.
2011 * blkno - starting block number of the block to be freed.
2012 * nblocks - number of blocks to be freed.
2013 *
2014 * RETURN VALUES:
2015 * 0 - success
2016 * -EIO - i/o error
2017 *
2018 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2019 */
2020static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2021 int nblocks)
2022{
2023 s8 oldroot;
Dave Kleikamp56d12542005-07-15 09:43:36 -05002024 int rc = 0, word;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025
2026 /* save the current value of the root (i.e. maximum free string)
2027 * of the dmap tree.
2028 */
2029 oldroot = dp->tree.stree[ROOT];
2030
2031 /* free the specified (blocks) bits */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002032 rc = dbFreeBits(bmp, dp, blkno, nblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033
Dave Kleikamp56d12542005-07-15 09:43:36 -05002034 /* if error or the root has not changed, done. */
2035 if (rc || (dp->tree.stree[ROOT] == oldroot))
2036 return (rc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037
2038 /* root changed. bubble the change up to the dmap control pages.
2039 * if the adjustment of the upper level control pages fails,
2040 * backout the deallocation.
2041 */
2042 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) {
2043 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
2044
2045 /* as part of backing out the deallocation, we will have
2046 * to back split the dmap tree if the deallocation caused
2047 * the freed blocks to become part of a larger binary buddy
2048 * system.
2049 */
2050 if (dp->tree.stree[word] == NOFREE)
2051 dbBackSplit((dmtree_t *) & dp->tree, word);
2052
2053 dbAllocBits(bmp, dp, blkno, nblocks);
2054 }
2055
2056 return (rc);
2057}
2058
2059
2060/*
2061 * NAME: dbAllocBits()
2062 *
2063 * FUNCTION: allocate a specified block range from a dmap.
2064 *
2065 * this routine updates the dmap to reflect the working
2066 * state allocation of the specified block range. it directly
2067 * updates the bits of the working map and causes the adjustment
2068 * of the binary buddy system described by the dmap's dmtree
2069 * leaves to reflect the bits allocated. it also causes the
2070 * dmap's dmtree, as a whole, to reflect the allocated range.
2071 *
2072 * PARAMETERS:
2073 * bmp - pointer to bmap descriptor
2074 * dp - pointer to dmap to allocate bits from.
2075 * blkno - starting block number of the bits to be allocated.
2076 * nblocks - number of bits to be allocated.
2077 *
2078 * RETURN VALUES: none
2079 *
2080 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2081 */
2082static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2083 int nblocks)
2084{
2085 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2086 dmtree_t *tp = (dmtree_t *) & dp->tree;
2087 int size;
2088 s8 *leaf;
2089
2090 /* pick up a pointer to the leaves of the dmap tree */
2091 leaf = dp->tree.stree + LEAFIND;
2092
2093 /* determine the bit number and word within the dmap of the
2094 * starting block.
2095 */
2096 dbitno = blkno & (BPERDMAP - 1);
2097 word = dbitno >> L2DBWORD;
2098
2099 /* block range better be within the dmap */
2100 assert(dbitno + nblocks <= BPERDMAP);
2101
2102 /* allocate the bits of the dmap's words corresponding to the block
2103 * range. not all bits of the first and last words may be contained
2104 * within the block range. if this is the case, we'll work against
2105 * those words (i.e. partial first and/or last) on an individual basis
2106 * (a single pass), allocating the bits of interest by hand and
2107 * updating the leaf corresponding to the dmap word. a single pass
2108 * will be used for all dmap words fully contained within the
2109 * specified range. within this pass, the bits of all fully contained
2110 * dmap words will be marked as free in a single shot and the leaves
2111 * will be updated. a single leaf may describe the free space of
2112 * multiple dmap words, so we may update only a subset of the actual
2113 * leaves corresponding to the dmap words of the block range.
2114 */
2115 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
2116 /* determine the bit number within the word and
2117 * the number of bits within the word.
2118 */
2119 wbitno = dbitno & (DBWORD - 1);
2120 nb = min(rembits, DBWORD - wbitno);
2121
2122 /* check if only part of a word is to be allocated.
2123 */
2124 if (nb < DBWORD) {
2125 /* allocate (set to 1) the appropriate bits within
2126 * this dmap word.
2127 */
2128 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
2129 >> wbitno);
2130
2131 /* update the leaf for this dmap word. in addition
2132 * to setting the leaf value to the binary buddy max
2133 * of the updated dmap word, dbSplit() will split
2134 * the binary system of the leaves if need be.
2135 */
2136 dbSplit(tp, word, BUDMIN,
2137 dbMaxBud((u8 *) & dp->wmap[word]));
2138
2139 word += 1;
2140 } else {
2141 /* one or more dmap words are fully contained
2142 * within the block range. determine how many
2143 * words and allocate (set to 1) the bits of these
2144 * words.
2145 */
2146 nwords = rembits >> L2DBWORD;
2147 memset(&dp->wmap[word], (int) ONES, nwords * 4);
2148
2149 /* determine how many bits.
2150 */
2151 nb = nwords << L2DBWORD;
2152
2153 /* now update the appropriate leaves to reflect
2154 * the allocated words.
2155 */
2156 for (; nwords > 0; nwords -= nw) {
2157 if (leaf[word] < BUDMIN) {
2158 jfs_error(bmp->db_ipbmap->i_sb,
2159 "dbAllocBits: leaf page "
2160 "corrupt");
2161 break;
2162 }
2163
2164 /* determine what the leaf value should be
2165 * updated to as the minimum of the l2 number
2166 * of bits being allocated and the l2 number
2167 * of bits currently described by this leaf.
2168 */
2169 size = min((int)leaf[word], NLSTOL2BSZ(nwords));
2170
2171 /* update the leaf to reflect the allocation.
2172 * in addition to setting the leaf value to
2173 * NOFREE, dbSplit() will split the binary
2174 * system of the leaves to reflect the current
2175 * allocation (size).
2176 */
2177 dbSplit(tp, word, size, NOFREE);
2178
2179 /* get the number of dmap words handled */
2180 nw = BUDSIZE(size, BUDMIN);
2181 word += nw;
2182 }
2183 }
2184 }
2185
2186 /* update the free count for this dmap */
2187 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks);
2188
2189 BMAP_LOCK(bmp);
2190
2191 /* if this allocation group is completely free,
2192 * update the maximum allocation group number if this allocation
2193 * group is the new max.
2194 */
2195 agno = blkno >> bmp->db_agl2size;
2196 if (agno > bmp->db_maxag)
2197 bmp->db_maxag = agno;
2198
2199 /* update the free count for the allocation group and map */
2200 bmp->db_agfree[agno] -= nblocks;
2201 bmp->db_nfree -= nblocks;
2202
2203 BMAP_UNLOCK(bmp);
2204}
2205
2206
2207/*
2208 * NAME: dbFreeBits()
2209 *
2210 * FUNCTION: free a specified block range from a dmap.
2211 *
2212 * this routine updates the dmap to reflect the working
2213 * state allocation of the specified block range. it directly
2214 * updates the bits of the working map and causes the adjustment
2215 * of the binary buddy system described by the dmap's dmtree
2216 * leaves to reflect the bits freed. it also causes the dmap's
2217 * dmtree, as a whole, to reflect the deallocated range.
2218 *
2219 * PARAMETERS:
2220 * bmp - pointer to bmap descriptor
2221 * dp - pointer to dmap to free bits from.
2222 * blkno - starting block number of the bits to be freed.
2223 * nblocks - number of bits to be freed.
2224 *
Dave Kleikamp56d12542005-07-15 09:43:36 -05002225 * RETURN VALUES: 0 for success
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 *
2227 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2228 */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002229static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 int nblocks)
2231{
2232 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2233 dmtree_t *tp = (dmtree_t *) & dp->tree;
Dave Kleikamp56d12542005-07-15 09:43:36 -05002234 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 int size;
2236
2237 /* determine the bit number and word within the dmap of the
2238 * starting block.
2239 */
2240 dbitno = blkno & (BPERDMAP - 1);
2241 word = dbitno >> L2DBWORD;
2242
2243 /* block range better be within the dmap.
2244 */
2245 assert(dbitno + nblocks <= BPERDMAP);
2246
2247 /* free the bits of the dmaps words corresponding to the block range.
2248 * not all bits of the first and last words may be contained within
2249 * the block range. if this is the case, we'll work against those
2250 * words (i.e. partial first and/or last) on an individual basis
2251 * (a single pass), freeing the bits of interest by hand and updating
2252 * the leaf corresponding to the dmap word. a single pass will be used
2253 * for all dmap words fully contained within the specified range.
2254 * within this pass, the bits of all fully contained dmap words will
2255 * be marked as free in a single shot and the leaves will be updated. a
2256 * single leaf may describe the free space of multiple dmap words,
2257 * so we may update only a subset of the actual leaves corresponding
2258 * to the dmap words of the block range.
2259 *
2260 * dbJoin() is used to update leaf values and will join the binary
2261 * buddy system of the leaves if the new leaf values indicate this
2262 * should be done.
2263 */
2264 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
2265 /* determine the bit number within the word and
2266 * the number of bits within the word.
2267 */
2268 wbitno = dbitno & (DBWORD - 1);
2269 nb = min(rembits, DBWORD - wbitno);
2270
2271 /* check if only part of a word is to be freed.
2272 */
2273 if (nb < DBWORD) {
2274 /* free (zero) the appropriate bits within this
2275 * dmap word.
2276 */
2277 dp->wmap[word] &=
2278 cpu_to_le32(~(ONES << (DBWORD - nb)
2279 >> wbitno));
2280
2281 /* update the leaf for this dmap word.
2282 */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002283 rc = dbJoin(tp, word,
2284 dbMaxBud((u8 *) & dp->wmap[word]));
2285 if (rc)
2286 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287
2288 word += 1;
2289 } else {
2290 /* one or more dmap words are fully contained
2291 * within the block range. determine how many
2292 * words and free (zero) the bits of these words.
2293 */
2294 nwords = rembits >> L2DBWORD;
2295 memset(&dp->wmap[word], 0, nwords * 4);
2296
2297 /* determine how many bits.
2298 */
2299 nb = nwords << L2DBWORD;
2300
2301 /* now update the appropriate leaves to reflect
2302 * the freed words.
2303 */
2304 for (; nwords > 0; nwords -= nw) {
2305 /* determine what the leaf value should be
2306 * updated to as the minimum of the l2 number
2307 * of bits being freed and the l2 (max) number
2308 * of bits that can be described by this leaf.
2309 */
2310 size =
2311 min(LITOL2BSZ
2312 (word, L2LPERDMAP, BUDMIN),
2313 NLSTOL2BSZ(nwords));
2314
2315 /* update the leaf.
2316 */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002317 rc = dbJoin(tp, word, size);
2318 if (rc)
2319 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320
2321 /* get the number of dmap words handled.
2322 */
2323 nw = BUDSIZE(size, BUDMIN);
2324 word += nw;
2325 }
2326 }
2327 }
2328
2329 /* update the free count for this dmap.
2330 */
2331 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
2332
2333 BMAP_LOCK(bmp);
2334
2335 /* update the free count for the allocation group and
2336 * map.
2337 */
2338 agno = blkno >> bmp->db_agl2size;
2339 bmp->db_nfree += nblocks;
2340 bmp->db_agfree[agno] += nblocks;
2341
2342 /* check if this allocation group is not completely free and
2343 * if it is currently the maximum (rightmost) allocation group.
2344 * if so, establish the new maximum allocation group number by
2345 * searching left for the first allocation group with allocation.
2346 */
2347 if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) ||
2348 (agno == bmp->db_numag - 1 &&
2349 bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) {
2350 while (bmp->db_maxag > 0) {
2351 bmp->db_maxag -= 1;
2352 if (bmp->db_agfree[bmp->db_maxag] !=
2353 bmp->db_agsize)
2354 break;
2355 }
2356
2357 /* re-establish the allocation group preference if the
2358 * current preference is right of the maximum allocation
2359 * group.
2360 */
2361 if (bmp->db_agpref > bmp->db_maxag)
2362 bmp->db_agpref = bmp->db_maxag;
2363 }
2364
2365 BMAP_UNLOCK(bmp);
Dave Kleikamp56d12542005-07-15 09:43:36 -05002366
2367 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368}
2369
2370
2371/*
2372 * NAME: dbAdjCtl()
2373 *
2374 * FUNCTION: adjust a dmap control page at a specified level to reflect
2375 * the change in a lower level dmap or dmap control page's
2376 * maximum string of free blocks (i.e. a change in the root
2377 * of the lower level object's dmtree) due to the allocation
2378 * or deallocation of a range of blocks with a single dmap.
2379 *
2380 * on entry, this routine is provided with the new value of
2381 * the lower level dmap or dmap control page root and the
2382 * starting block number of the block range whose allocation
2383 * or deallocation resulted in the root change. this range
2384 * is respresented by a single leaf of the current dmapctl
2385 * and the leaf will be updated with this value, possibly
2386 * causing a binary buddy system within the leaves to be
2387 * split or joined. the update may also cause the dmapctl's
2388 * dmtree to be updated.
2389 *
2390 * if the adjustment of the dmap control page, itself, causes its
2391 * root to change, this change will be bubbled up to the next dmap
2392 * control level by a recursive call to this routine, specifying
2393 * the new root value and the next dmap control page level to
2394 * be adjusted.
2395 * PARAMETERS:
2396 * bmp - pointer to bmap descriptor
2397 * blkno - the first block of a block range within a dmap. it is
2398 * the allocation or deallocation of this block range that
2399 * requires the dmap control page to be adjusted.
2400 * newval - the new value of the lower level dmap or dmap control
2401 * page root.
2402 * alloc - TRUE if adjustment is due to an allocation.
2403 * level - current level of dmap control page (i.e. L0, L1, L2) to
2404 * be adjusted.
2405 *
2406 * RETURN VALUES:
2407 * 0 - success
2408 * -EIO - i/o error
2409 *
2410 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2411 */
2412static int
2413dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2414{
2415 struct metapage *mp;
2416 s8 oldroot;
2417 int oldval;
2418 s64 lblkno;
2419 struct dmapctl *dcp;
2420 int rc, leafno, ti;
2421
2422 /* get the buffer for the dmap control page for the specified
2423 * block number and control page level.
2424 */
2425 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level);
2426 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
2427 if (mp == NULL)
2428 return -EIO;
2429 dcp = (struct dmapctl *) mp->data;
2430
2431 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
2432 jfs_error(bmp->db_ipbmap->i_sb,
2433 "dbAdjCtl: Corrupt dmapctl page");
2434 release_metapage(mp);
2435 return -EIO;
2436 }
2437
2438 /* determine the leaf number corresponding to the block and
2439 * the index within the dmap control tree.
2440 */
2441 leafno = BLKTOCTLLEAF(blkno, dcp->budmin);
2442 ti = leafno + le32_to_cpu(dcp->leafidx);
2443
2444 /* save the current leaf value and the current root level (i.e.
2445 * maximum l2 free string described by this dmapctl).
2446 */
2447 oldval = dcp->stree[ti];
2448 oldroot = dcp->stree[ROOT];
2449
2450 /* check if this is a control page update for an allocation.
2451 * if so, update the leaf to reflect the new leaf value using
2452 * dbSplit(); otherwise (deallocation), use dbJoin() to udpate
2453 * the leaf with the new value. in addition to updating the
2454 * leaf, dbSplit() will also split the binary buddy system of
2455 * the leaves, if required, and bubble new values within the
2456 * dmapctl tree, if required. similarly, dbJoin() will join
2457 * the binary buddy system of leaves and bubble new values up
2458 * the dmapctl tree as required by the new leaf value.
2459 */
2460 if (alloc) {
2461 /* check if we are in the middle of a binary buddy
2462 * system. this happens when we are performing the
2463 * first allocation out of an allocation group that
2464 * is part (not the first part) of a larger binary
2465 * buddy system. if we are in the middle, back split
2466 * the system prior to calling dbSplit() which assumes
2467 * that it is at the front of a binary buddy system.
2468 */
2469 if (oldval == NOFREE) {
2470 dbBackSplit((dmtree_t *) dcp, leafno);
2471 oldval = dcp->stree[ti];
2472 }
2473 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
2474 } else {
Dave Kleikamp56d12542005-07-15 09:43:36 -05002475 rc = dbJoin((dmtree_t *) dcp, leafno, newval);
2476 if (rc)
2477 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 }
2479
2480 /* check if the root of the current dmap control page changed due
2481 * to the update and if the current dmap control page is not at
2482 * the current top level (i.e. L0, L1, L2) of the map. if so (i.e.
2483 * root changed and this is not the top level), call this routine
2484 * again (recursion) for the next higher level of the mapping to
2485 * reflect the change in root for the current dmap control page.
2486 */
2487 if (dcp->stree[ROOT] != oldroot) {
2488 /* are we below the top level of the map. if so,
2489 * bubble the root up to the next higher level.
2490 */
2491 if (level < bmp->db_maxlevel) {
2492 /* bubble up the new root of this dmap control page to
2493 * the next level.
2494 */
2495 if ((rc =
2496 dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc,
2497 level + 1))) {
2498 /* something went wrong in bubbling up the new
2499 * root value, so backout the changes to the
2500 * current dmap control page.
2501 */
2502 if (alloc) {
2503 dbJoin((dmtree_t *) dcp, leafno,
2504 oldval);
2505 } else {
2506 /* the dbJoin() above might have
2507 * caused a larger binary buddy system
2508 * to form and we may now be in the
2509 * middle of it. if this is the case,
2510 * back split the buddies.
2511 */
2512 if (dcp->stree[ti] == NOFREE)
2513 dbBackSplit((dmtree_t *)
2514 dcp, leafno);
2515 dbSplit((dmtree_t *) dcp, leafno,
2516 dcp->budmin, oldval);
2517 }
2518
2519 /* release the buffer and return the error.
2520 */
2521 release_metapage(mp);
2522 return (rc);
2523 }
2524 } else {
2525 /* we're at the top level of the map. update
2526 * the bmap control page to reflect the size
2527 * of the maximum free buddy system.
2528 */
2529 assert(level == bmp->db_maxlevel);
2530 if (bmp->db_maxfreebud != oldroot) {
2531 jfs_error(bmp->db_ipbmap->i_sb,
2532 "dbAdjCtl: the maximum free buddy is "
2533 "not the old root");
2534 }
2535 bmp->db_maxfreebud = dcp->stree[ROOT];
2536 }
2537 }
2538
2539 /* write the buffer.
2540 */
2541 write_metapage(mp);
2542
2543 return (0);
2544}
2545
2546
2547/*
2548 * NAME: dbSplit()
2549 *
2550 * FUNCTION: update the leaf of a dmtree with a new value, splitting
2551 * the leaf from the binary buddy system of the dmtree's
2552 * leaves, as required.
2553 *
2554 * PARAMETERS:
2555 * tp - pointer to the tree containing the leaf.
2556 * leafno - the number of the leaf to be updated.
2557 * splitsz - the size the binary buddy system starting at the leaf
2558 * must be split to, specified as the log2 number of blocks.
2559 * newval - the new value for the leaf.
2560 *
2561 * RETURN VALUES: none
2562 *
2563 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2564 */
2565static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2566{
2567 int budsz;
2568 int cursz;
2569 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2570
2571 /* check if the leaf needs to be split.
2572 */
2573 if (leaf[leafno] > tp->dmt_budmin) {
2574 /* the split occurs by cutting the buddy system in half
2575 * at the specified leaf until we reach the specified
2576 * size. pick up the starting split size (current size
2577 * - 1 in l2) and the corresponding buddy size.
2578 */
2579 cursz = leaf[leafno] - 1;
2580 budsz = BUDSIZE(cursz, tp->dmt_budmin);
2581
2582 /* split until we reach the specified size.
2583 */
2584 while (cursz >= splitsz) {
2585 /* update the buddy's leaf with its new value.
2586 */
2587 dbAdjTree(tp, leafno ^ budsz, cursz);
2588
2589 /* on to the next size and buddy.
2590 */
2591 cursz -= 1;
2592 budsz >>= 1;
2593 }
2594 }
2595
2596 /* adjust the dmap tree to reflect the specified leaf's new
2597 * value.
2598 */
2599 dbAdjTree(tp, leafno, newval);
2600}
2601
2602
2603/*
2604 * NAME: dbBackSplit()
2605 *
2606 * FUNCTION: back split the binary buddy system of dmtree leaves
2607 * that hold a specified leaf until the specified leaf
2608 * starts its own binary buddy system.
2609 *
2610 * the allocators typically perform allocations at the start
2611 * of binary buddy systems and dbSplit() is used to accomplish
2612 * any required splits. in some cases, however, allocation
2613 * may occur in the middle of a binary system and requires a
2614 * back split, with the split proceeding out from the middle of
2615 * the system (less efficient) rather than the start of the
2616 * system (more efficient). the cases in which a back split
2617 * is required are rare and are limited to the first allocation
2618 * within an allocation group which is a part (not first part)
2619 * of a larger binary buddy system and a few exception cases
2620 * in which a previous join operation must be backed out.
2621 *
2622 * PARAMETERS:
2623 * tp - pointer to the tree containing the leaf.
2624 * leafno - the number of the leaf to be updated.
2625 *
2626 * RETURN VALUES: none
2627 *
2628 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2629 */
2630static void dbBackSplit(dmtree_t * tp, int leafno)
2631{
2632 int budsz, bud, w, bsz, size;
2633 int cursz;
2634 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2635
2636 /* leaf should be part (not first part) of a binary
2637 * buddy system.
2638 */
2639 assert(leaf[leafno] == NOFREE);
2640
2641 /* the back split is accomplished by iteratively finding the leaf
2642 * that starts the buddy system that contains the specified leaf and
2643 * splitting that system in two. this iteration continues until
2644 * the specified leaf becomes the start of a buddy system.
2645 *
2646 * determine maximum possible l2 size for the specified leaf.
2647 */
2648 size =
2649 LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs),
2650 tp->dmt_budmin);
2651
2652 /* determine the number of leaves covered by this size. this
2653 * is the buddy size that we will start with as we search for
2654 * the buddy system that contains the specified leaf.
2655 */
2656 budsz = BUDSIZE(size, tp->dmt_budmin);
2657
2658 /* back split.
2659 */
2660 while (leaf[leafno] == NOFREE) {
2661 /* find the leftmost buddy leaf.
2662 */
2663 for (w = leafno, bsz = budsz;; bsz <<= 1,
2664 w = (w < bud) ? w : bud) {
2665 assert(bsz < le32_to_cpu(tp->dmt_nleafs));
2666
2667 /* determine the buddy.
2668 */
2669 bud = w ^ bsz;
2670
2671 /* check if this buddy is the start of the system.
2672 */
2673 if (leaf[bud] != NOFREE) {
2674 /* split the leaf at the start of the
2675 * system in two.
2676 */
2677 cursz = leaf[bud] - 1;
2678 dbSplit(tp, bud, cursz, cursz);
2679 break;
2680 }
2681 }
2682 }
2683
2684 assert(leaf[leafno] == size);
2685}
2686
2687
2688/*
2689 * NAME: dbJoin()
2690 *
2691 * FUNCTION: update the leaf of a dmtree with a new value, joining
2692 * the leaf with other leaves of the dmtree into a multi-leaf
2693 * binary buddy system, as required.
2694 *
2695 * PARAMETERS:
2696 * tp - pointer to the tree containing the leaf.
2697 * leafno - the number of the leaf to be updated.
2698 * newval - the new value for the leaf.
2699 *
2700 * RETURN VALUES: none
2701 */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002702static int dbJoin(dmtree_t * tp, int leafno, int newval)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703{
2704 int budsz, buddy;
2705 s8 *leaf;
2706
2707 /* can the new leaf value require a join with other leaves ?
2708 */
2709 if (newval >= tp->dmt_budmin) {
2710 /* pickup a pointer to the leaves of the tree.
2711 */
2712 leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2713
2714 /* try to join the specified leaf into a large binary
2715 * buddy system. the join proceeds by attempting to join
2716 * the specified leafno with its buddy (leaf) at new value.
2717 * if the join occurs, we attempt to join the left leaf
2718 * of the joined buddies with its buddy at new value + 1.
2719 * we continue to join until we find a buddy that cannot be
2720 * joined (does not have a value equal to the size of the
2721 * last join) or until all leaves have been joined into a
2722 * single system.
2723 *
2724 * get the buddy size (number of words covered) of
2725 * the new value.
2726 */
2727 budsz = BUDSIZE(newval, tp->dmt_budmin);
2728
2729 /* try to join.
2730 */
2731 while (budsz < le32_to_cpu(tp->dmt_nleafs)) {
2732 /* get the buddy leaf.
2733 */
2734 buddy = leafno ^ budsz;
2735
2736 /* if the leaf's new value is greater than its
2737 * buddy's value, we join no more.
2738 */
2739 if (newval > leaf[buddy])
2740 break;
2741
Dave Kleikamp56d12542005-07-15 09:43:36 -05002742 /* It shouldn't be less */
2743 if (newval < leaf[buddy])
2744 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002745
2746 /* check which (leafno or buddy) is the left buddy.
2747 * the left buddy gets to claim the blocks resulting
2748 * from the join while the right gets to claim none.
2749 * the left buddy is also eligable to participate in
2750 * a join at the next higher level while the right
2751 * is not.
2752 *
2753 */
2754 if (leafno < buddy) {
2755 /* leafno is the left buddy.
2756 */
2757 dbAdjTree(tp, buddy, NOFREE);
2758 } else {
2759 /* buddy is the left buddy and becomes
2760 * leafno.
2761 */
2762 dbAdjTree(tp, leafno, NOFREE);
2763 leafno = buddy;
2764 }
2765
2766 /* on to try the next join.
2767 */
2768 newval += 1;
2769 budsz <<= 1;
2770 }
2771 }
2772
2773 /* update the leaf value.
2774 */
2775 dbAdjTree(tp, leafno, newval);
Dave Kleikamp56d12542005-07-15 09:43:36 -05002776
2777 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778}
2779
2780
2781/*
2782 * NAME: dbAdjTree()
2783 *
2784 * FUNCTION: update a leaf of a dmtree with a new value, adjusting
2785 * the dmtree, as required, to reflect the new leaf value.
2786 * the combination of any buddies must already be done before
2787 * this is called.
2788 *
2789 * PARAMETERS:
2790 * tp - pointer to the tree to be adjusted.
2791 * leafno - the number of the leaf to be updated.
2792 * newval - the new value for the leaf.
2793 *
2794 * RETURN VALUES: none
2795 */
2796static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2797{
2798 int lp, pp, k;
2799 int max;
2800
2801 /* pick up the index of the leaf for this leafno.
2802 */
2803 lp = leafno + le32_to_cpu(tp->dmt_leafidx);
2804
2805 /* is the current value the same as the old value ? if so,
2806 * there is nothing to do.
2807 */
2808 if (tp->dmt_stree[lp] == newval)
2809 return;
2810
2811 /* set the new value.
2812 */
2813 tp->dmt_stree[lp] = newval;
2814
2815 /* bubble the new value up the tree as required.
2816 */
2817 for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) {
2818 /* get the index of the first leaf of the 4 leaf
2819 * group containing the specified leaf (leafno).
2820 */
2821 lp = ((lp - 1) & ~0x03) + 1;
2822
2823 /* get the index of the parent of this 4 leaf group.
2824 */
2825 pp = (lp - 1) >> 2;
2826
2827 /* determine the maximum of the 4 leaves.
2828 */
2829 max = TREEMAX(&tp->dmt_stree[lp]);
2830
2831 /* if the maximum of the 4 is the same as the
2832 * parent's value, we're done.
2833 */
2834 if (tp->dmt_stree[pp] == max)
2835 break;
2836
2837 /* parent gets new value.
2838 */
2839 tp->dmt_stree[pp] = max;
2840
2841 /* parent becomes leaf for next go-round.
2842 */
2843 lp = pp;
2844 }
2845}
2846
2847
2848/*
2849 * NAME: dbFindLeaf()
2850 *
2851 * FUNCTION: search a dmtree_t for sufficient free blocks, returning
2852 * the index of a leaf describing the free blocks if
2853 * sufficient free blocks are found.
2854 *
2855 * the search starts at the top of the dmtree_t tree and
2856 * proceeds down the tree to the leftmost leaf with sufficient
2857 * free space.
2858 *
2859 * PARAMETERS:
2860 * tp - pointer to the tree to be searched.
2861 * l2nb - log2 number of free blocks to search for.
2862 * leafidx - return pointer to be set to the index of the leaf
2863 * describing at least l2nb free blocks if sufficient
2864 * free blocks are found.
2865 *
2866 * RETURN VALUES:
2867 * 0 - success
2868 * -ENOSPC - insufficient free blocks.
2869 */
2870static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2871{
2872 int ti, n = 0, k, x = 0;
2873
2874 /* first check the root of the tree to see if there is
2875 * sufficient free space.
2876 */
2877 if (l2nb > tp->dmt_stree[ROOT])
2878 return -ENOSPC;
2879
2880 /* sufficient free space available. now search down the tree
2881 * starting at the next level for the leftmost leaf that
2882 * describes sufficient free space.
2883 */
2884 for (k = le32_to_cpu(tp->dmt_height), ti = 1;
2885 k > 0; k--, ti = ((ti + n) << 2) + 1) {
2886 /* search the four nodes at this level, starting from
2887 * the left.
2888 */
2889 for (x = ti, n = 0; n < 4; n++) {
2890 /* sufficient free space found. move to the next
2891 * level (or quit if this is the last level).
2892 */
2893 if (l2nb <= tp->dmt_stree[x + n])
2894 break;
2895 }
2896
2897 /* better have found something since the higher
2898 * levels of the tree said it was here.
2899 */
2900 assert(n < 4);
2901 }
2902
2903 /* set the return to the leftmost leaf describing sufficient
2904 * free space.
2905 */
2906 *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx);
2907
2908 return (0);
2909}
2910
2911
2912/*
2913 * NAME: dbFindBits()
2914 *
2915 * FUNCTION: find a specified number of binary buddy free bits within a
2916 * dmap bitmap word value.
2917 *
2918 * this routine searches the bitmap value for (1 << l2nb) free
2919 * bits at (1 << l2nb) alignments within the value.
2920 *
2921 * PARAMETERS:
2922 * word - dmap bitmap word value.
2923 * l2nb - number of free bits specified as a log2 number.
2924 *
2925 * RETURN VALUES:
2926 * starting bit number of free bits.
2927 */
2928static int dbFindBits(u32 word, int l2nb)
2929{
2930 int bitno, nb;
2931 u32 mask;
2932
2933 /* get the number of bits.
2934 */
2935 nb = 1 << l2nb;
2936 assert(nb <= DBWORD);
2937
2938 /* complement the word so we can use a mask (i.e. 0s represent
2939 * free bits) and compute the mask.
2940 */
2941 word = ~word;
2942 mask = ONES << (DBWORD - nb);
2943
2944 /* scan the word for nb free bits at nb alignments.
2945 */
2946 for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) {
2947 if ((mask & word) == mask)
2948 break;
2949 }
2950
2951 ASSERT(bitno < 32);
2952
2953 /* return the bit number.
2954 */
2955 return (bitno);
2956}
2957
2958
2959/*
2960 * NAME: dbMaxBud(u8 *cp)
2961 *
2962 * FUNCTION: determine the largest binary buddy string of free
2963 * bits within 32-bits of the map.
2964 *
2965 * PARAMETERS:
2966 * cp - pointer to the 32-bit value.
2967 *
2968 * RETURN VALUES:
2969 * largest binary buddy of free bits within a dmap word.
2970 */
2971static int dbMaxBud(u8 * cp)
2972{
2973 signed char tmp1, tmp2;
2974
2975 /* check if the wmap word is all free. if so, the
2976 * free buddy size is BUDMIN.
2977 */
2978 if (*((uint *) cp) == 0)
2979 return (BUDMIN);
2980
2981 /* check if the wmap word is half free. if so, the
2982 * free buddy size is BUDMIN-1.
2983 */
2984 if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0)
2985 return (BUDMIN - 1);
2986
2987 /* not all free or half free. determine the free buddy
2988 * size thru table lookup using quarters of the wmap word.
2989 */
2990 tmp1 = max(budtab[cp[2]], budtab[cp[3]]);
2991 tmp2 = max(budtab[cp[0]], budtab[cp[1]]);
2992 return (max(tmp1, tmp2));
2993}
2994
2995
2996/*
2997 * NAME: cnttz(uint word)
2998 *
2999 * FUNCTION: determine the number of trailing zeros within a 32-bit
3000 * value.
3001 *
3002 * PARAMETERS:
3003 * value - 32-bit value to be examined.
3004 *
3005 * RETURN VALUES:
3006 * count of trailing zeros
3007 */
3008static int cnttz(u32 word)
3009{
3010 int n;
3011
3012 for (n = 0; n < 32; n++, word >>= 1) {
3013 if (word & 0x01)
3014 break;
3015 }
3016
3017 return (n);
3018}
3019
3020
3021/*
3022 * NAME: cntlz(u32 value)
3023 *
3024 * FUNCTION: determine the number of leading zeros within a 32-bit
3025 * value.
3026 *
3027 * PARAMETERS:
3028 * value - 32-bit value to be examined.
3029 *
3030 * RETURN VALUES:
3031 * count of leading zeros
3032 */
3033static int cntlz(u32 value)
3034{
3035 int n;
3036
3037 for (n = 0; n < 32; n++, value <<= 1) {
3038 if (value & HIGHORDER)
3039 break;
3040 }
3041 return (n);
3042}
3043
3044
3045/*
3046 * NAME: blkstol2(s64 nb)
3047 *
3048 * FUNCTION: convert a block count to its log2 value. if the block
3049 * count is not a l2 multiple, it is rounded up to the next
3050 * larger l2 multiple.
3051 *
3052 * PARAMETERS:
3053 * nb - number of blocks
3054 *
3055 * RETURN VALUES:
3056 * log2 number of blocks
3057 */
3058int blkstol2(s64 nb)
3059{
3060 int l2nb;
3061 s64 mask; /* meant to be signed */
3062
3063 mask = (s64) 1 << (64 - 1);
3064
3065 /* count the leading bits.
3066 */
3067 for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) {
3068 /* leading bit found.
3069 */
3070 if (nb & mask) {
3071 /* determine the l2 value.
3072 */
3073 l2nb = (64 - 1) - l2nb;
3074
3075 /* check if we need to round up.
3076 */
3077 if (~mask & nb)
3078 l2nb++;
3079
3080 return (l2nb);
3081 }
3082 }
3083 assert(0);
3084 return 0; /* fix compiler warning */
3085}
3086
3087
3088/*
3089 * NAME: dbAllocBottomUp()
3090 *
3091 * FUNCTION: alloc the specified block range from the working block
3092 * allocation map.
3093 *
3094 * the blocks will be alloc from the working map one dmap
3095 * at a time.
3096 *
3097 * PARAMETERS:
3098 * ip - pointer to in-core inode;
3099 * blkno - starting block number to be freed.
3100 * nblocks - number of blocks to be freed.
3101 *
3102 * RETURN VALUES:
3103 * 0 - success
3104 * -EIO - i/o error
3105 */
3106int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3107{
3108 struct metapage *mp;
3109 struct dmap *dp;
3110 int nb, rc;
3111 s64 lblkno, rem;
3112 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
3113 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
3114
3115 IREAD_LOCK(ipbmap);
3116
3117 /* block to be allocated better be within the mapsize. */
3118 ASSERT(nblocks <= bmp->db_mapsize - blkno);
3119
3120 /*
3121 * allocate the blocks a dmap at a time.
3122 */
3123 mp = NULL;
3124 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
3125 /* release previous dmap if any */
3126 if (mp) {
3127 write_metapage(mp);
3128 }
3129
3130 /* get the buffer for the current dmap. */
3131 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
3132 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
3133 if (mp == NULL) {
3134 IREAD_UNLOCK(ipbmap);
3135 return -EIO;
3136 }
3137 dp = (struct dmap *) mp->data;
3138
3139 /* determine the number of blocks to be allocated from
3140 * this dmap.
3141 */
3142 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
3143
Linus Torvalds1da177e2005-04-16 15:20:36 -07003144 /* allocate the blocks. */
3145 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
3146 release_metapage(mp);
3147 IREAD_UNLOCK(ipbmap);
3148 return (rc);
3149 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003150 }
3151
3152 /* write the last buffer. */
3153 write_metapage(mp);
3154
3155 IREAD_UNLOCK(ipbmap);
3156
3157 return (0);
3158}
3159
3160
3161static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3162 int nblocks)
3163{
3164 int rc;
3165 int dbitno, word, rembits, nb, nwords, wbitno, agno;
3166 s8 oldroot, *leaf;
3167 struct dmaptree *tp = (struct dmaptree *) & dp->tree;
3168
3169 /* save the current value of the root (i.e. maximum free string)
3170 * of the dmap tree.
3171 */
3172 oldroot = tp->stree[ROOT];
3173
3174 /* pick up a pointer to the leaves of the dmap tree */
3175 leaf = tp->stree + LEAFIND;
3176
3177 /* determine the bit number and word within the dmap of the
3178 * starting block.
3179 */
3180 dbitno = blkno & (BPERDMAP - 1);
3181 word = dbitno >> L2DBWORD;
3182
3183 /* block range better be within the dmap */
3184 assert(dbitno + nblocks <= BPERDMAP);
3185
3186 /* allocate the bits of the dmap's words corresponding to the block
3187 * range. not all bits of the first and last words may be contained
3188 * within the block range. if this is the case, we'll work against
3189 * those words (i.e. partial first and/or last) on an individual basis
3190 * (a single pass), allocating the bits of interest by hand and
3191 * updating the leaf corresponding to the dmap word. a single pass
3192 * will be used for all dmap words fully contained within the
3193 * specified range. within this pass, the bits of all fully contained
3194 * dmap words will be marked as free in a single shot and the leaves
3195 * will be updated. a single leaf may describe the free space of
3196 * multiple dmap words, so we may update only a subset of the actual
3197 * leaves corresponding to the dmap words of the block range.
3198 */
3199 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
3200 /* determine the bit number within the word and
3201 * the number of bits within the word.
3202 */
3203 wbitno = dbitno & (DBWORD - 1);
3204 nb = min(rembits, DBWORD - wbitno);
3205
3206 /* check if only part of a word is to be allocated.
3207 */
3208 if (nb < DBWORD) {
3209 /* allocate (set to 1) the appropriate bits within
3210 * this dmap word.
3211 */
3212 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
3213 >> wbitno);
3214
3215 word++;
3216 } else {
3217 /* one or more dmap words are fully contained
3218 * within the block range. determine how many
3219 * words and allocate (set to 1) the bits of these
3220 * words.
3221 */
3222 nwords = rembits >> L2DBWORD;
3223 memset(&dp->wmap[word], (int) ONES, nwords * 4);
3224
3225 /* determine how many bits */
3226 nb = nwords << L2DBWORD;
3227 word += nwords;
3228 }
3229 }
3230
3231 /* update the free count for this dmap */
3232 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks);
3233
3234 /* reconstruct summary tree */
3235 dbInitDmapTree(dp);
3236
3237 BMAP_LOCK(bmp);
3238
3239 /* if this allocation group is completely free,
3240 * update the highest active allocation group number
3241 * if this allocation group is the new max.
3242 */
3243 agno = blkno >> bmp->db_agl2size;
3244 if (agno > bmp->db_maxag)
3245 bmp->db_maxag = agno;
3246
3247 /* update the free count for the allocation group and map */
3248 bmp->db_agfree[agno] -= nblocks;
3249 bmp->db_nfree -= nblocks;
3250
3251 BMAP_UNLOCK(bmp);
3252
3253 /* if the root has not changed, done. */
3254 if (tp->stree[ROOT] == oldroot)
3255 return (0);
3256
3257 /* root changed. bubble the change up to the dmap control pages.
3258 * if the adjustment of the upper level control pages fails,
3259 * backout the bit allocation (thus making everything consistent).
3260 */
3261 if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0)))
3262 dbFreeBits(bmp, dp, blkno, nblocks);
3263
3264 return (rc);
3265}
3266
3267
3268/*
3269 * NAME: dbExtendFS()
3270 *
3271 * FUNCTION: extend bmap from blkno for nblocks;
3272 * dbExtendFS() updates bmap ready for dbAllocBottomUp();
3273 *
3274 * L2
3275 * |
3276 * L1---------------------------------L1
3277 * | |
3278 * L0---------L0---------L0 L0---------L0---------L0
3279 * | | | | | |
3280 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
3281 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
3282 *
3283 * <---old---><----------------------------extend----------------------->
3284 */
3285int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3286{
3287 struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb);
3288 int nbperpage = sbi->nbperpage;
3289 int i, i0 = TRUE, j, j0 = TRUE, k, n;
3290 s64 newsize;
3291 s64 p;
3292 struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL;
3293 struct dmapctl *l2dcp, *l1dcp, *l0dcp;
3294 struct dmap *dp;
3295 s8 *l0leaf, *l1leaf, *l2leaf;
3296 struct bmap *bmp = sbi->bmap;
3297 int agno, l2agsize, oldl2agsize;
3298 s64 ag_rem;
3299
3300 newsize = blkno + nblocks;
3301
3302 jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld",
3303 (long long) blkno, (long long) nblocks, (long long) newsize);
3304
3305 /*
3306 * initialize bmap control page.
3307 *
3308 * all the data in bmap control page should exclude
3309 * the mkfs hidden dmap page.
3310 */
3311
3312 /* update mapsize */
3313 bmp->db_mapsize = newsize;
3314 bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize);
3315
3316 /* compute new AG size */
3317 l2agsize = dbGetL2AGSize(newsize);
3318 oldl2agsize = bmp->db_agl2size;
3319
3320 bmp->db_agl2size = l2agsize;
3321 bmp->db_agsize = 1 << l2agsize;
3322
3323 /* compute new number of AG */
3324 agno = bmp->db_numag;
3325 bmp->db_numag = newsize >> l2agsize;
3326 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
3327
3328 /*
3329 * reconfigure db_agfree[]
3330 * from old AG configuration to new AG configuration;
3331 *
3332 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
3333 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
3334 * note: new AG size = old AG size * (2**x).
3335 */
3336 if (l2agsize == oldl2agsize)
3337 goto extend;
3338 k = 1 << (l2agsize - oldl2agsize);
3339 ag_rem = bmp->db_agfree[0]; /* save agfree[0] */
3340 for (i = 0, n = 0; i < agno; n++) {
3341 bmp->db_agfree[n] = 0; /* init collection point */
3342
3343 /* coalesce cotiguous k AGs; */
3344 for (j = 0; j < k && i < agno; j++, i++) {
3345 /* merge AGi to AGn */
3346 bmp->db_agfree[n] += bmp->db_agfree[i];
3347 }
3348 }
3349 bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */
3350
3351 for (; n < MAXAG; n++)
3352 bmp->db_agfree[n] = 0;
3353
3354 /*
3355 * update highest active ag number
3356 */
3357
3358 bmp->db_maxag = bmp->db_maxag / k;
3359
3360 /*
3361 * extend bmap
3362 *
3363 * update bit maps and corresponding level control pages;
3364 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
3365 */
3366 extend:
3367 /* get L2 page */
3368 p = BMAPBLKNO + nbperpage; /* L2 page */
3369 l2mp = read_metapage(ipbmap, p, PSIZE, 0);
3370 if (!l2mp) {
3371 jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read");
3372 return -EIO;
3373 }
3374 l2dcp = (struct dmapctl *) l2mp->data;
3375
3376 /* compute start L1 */
3377 k = blkno >> L2MAXL1SIZE;
3378 l2leaf = l2dcp->stree + CTLLEAFIND + k;
3379 p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */
3380
3381 /*
3382 * extend each L1 in L2
3383 */
3384 for (; k < LPERCTL; k++, p += nbperpage) {
3385 /* get L1 page */
3386 if (j0) {
3387 /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */
3388 l1mp = read_metapage(ipbmap, p, PSIZE, 0);
3389 if (l1mp == NULL)
3390 goto errout;
3391 l1dcp = (struct dmapctl *) l1mp->data;
3392
3393 /* compute start L0 */
3394 j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE;
3395 l1leaf = l1dcp->stree + CTLLEAFIND + j;
3396 p = BLKTOL0(blkno, sbi->l2nbperpage);
3397 j0 = FALSE;
3398 } else {
3399 /* assign/init L1 page */
3400 l1mp = get_metapage(ipbmap, p, PSIZE, 0);
3401 if (l1mp == NULL)
3402 goto errout;
3403
3404 l1dcp = (struct dmapctl *) l1mp->data;
3405
3406 /* compute start L0 */
3407 j = 0;
3408 l1leaf = l1dcp->stree + CTLLEAFIND;
3409 p += nbperpage; /* 1st L0 of L1.k */
3410 }
3411
3412 /*
3413 * extend each L0 in L1
3414 */
3415 for (; j < LPERCTL; j++) {
3416 /* get L0 page */
3417 if (i0) {
3418 /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */
3419
3420 l0mp = read_metapage(ipbmap, p, PSIZE, 0);
3421 if (l0mp == NULL)
3422 goto errout;
3423 l0dcp = (struct dmapctl *) l0mp->data;
3424
3425 /* compute start dmap */
3426 i = (blkno & (MAXL0SIZE - 1)) >>
3427 L2BPERDMAP;
3428 l0leaf = l0dcp->stree + CTLLEAFIND + i;
3429 p = BLKTODMAP(blkno,
3430 sbi->l2nbperpage);
3431 i0 = FALSE;
3432 } else {
3433 /* assign/init L0 page */
3434 l0mp = get_metapage(ipbmap, p, PSIZE, 0);
3435 if (l0mp == NULL)
3436 goto errout;
3437
3438 l0dcp = (struct dmapctl *) l0mp->data;
3439
3440 /* compute start dmap */
3441 i = 0;
3442 l0leaf = l0dcp->stree + CTLLEAFIND;
3443 p += nbperpage; /* 1st dmap of L0.j */
3444 }
3445
3446 /*
3447 * extend each dmap in L0
3448 */
3449 for (; i < LPERCTL; i++) {
3450 /*
3451 * reconstruct the dmap page, and
3452 * initialize corresponding parent L0 leaf
3453 */
3454 if ((n = blkno & (BPERDMAP - 1))) {
3455 /* read in dmap page: */
3456 mp = read_metapage(ipbmap, p,
3457 PSIZE, 0);
3458 if (mp == NULL)
3459 goto errout;
3460 n = min(nblocks, (s64)BPERDMAP - n);
3461 } else {
3462 /* assign/init dmap page */
3463 mp = read_metapage(ipbmap, p,
3464 PSIZE, 0);
3465 if (mp == NULL)
3466 goto errout;
3467
3468 n = min(nblocks, (s64)BPERDMAP);
3469 }
3470
3471 dp = (struct dmap *) mp->data;
3472 *l0leaf = dbInitDmap(dp, blkno, n);
3473
3474 bmp->db_nfree += n;
3475 agno = le64_to_cpu(dp->start) >> l2agsize;
3476 bmp->db_agfree[agno] += n;
3477
3478 write_metapage(mp);
3479
3480 l0leaf++;
3481 p += nbperpage;
3482
3483 blkno += n;
3484 nblocks -= n;
3485 if (nblocks == 0)
3486 break;
3487 } /* for each dmap in a L0 */
3488
3489 /*
3490 * build current L0 page from its leaves, and
3491 * initialize corresponding parent L1 leaf
3492 */
3493 *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i);
3494 write_metapage(l0mp);
3495 l0mp = NULL;
3496
3497 if (nblocks)
3498 l1leaf++; /* continue for next L0 */
3499 else {
3500 /* more than 1 L0 ? */
3501 if (j > 0)
3502 break; /* build L1 page */
3503 else {
3504 /* summarize in global bmap page */
3505 bmp->db_maxfreebud = *l1leaf;
3506 release_metapage(l1mp);
3507 release_metapage(l2mp);
3508 goto finalize;
3509 }
3510 }
3511 } /* for each L0 in a L1 */
3512
3513 /*
3514 * build current L1 page from its leaves, and
3515 * initialize corresponding parent L2 leaf
3516 */
3517 *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j);
3518 write_metapage(l1mp);
3519 l1mp = NULL;
3520
3521 if (nblocks)
3522 l2leaf++; /* continue for next L1 */
3523 else {
3524 /* more than 1 L1 ? */
3525 if (k > 0)
3526 break; /* build L2 page */
3527 else {
3528 /* summarize in global bmap page */
3529 bmp->db_maxfreebud = *l2leaf;
3530 release_metapage(l2mp);
3531 goto finalize;
3532 }
3533 }
3534 } /* for each L1 in a L2 */
3535
3536 jfs_error(ipbmap->i_sb,
3537 "dbExtendFS: function has not returned as expected");
3538errout:
3539 if (l0mp)
3540 release_metapage(l0mp);
3541 if (l1mp)
3542 release_metapage(l1mp);
3543 release_metapage(l2mp);
3544 return -EIO;
3545
3546 /*
3547 * finalize bmap control page
3548 */
3549finalize:
3550
3551 return 0;
3552}
3553
3554
3555/*
3556 * dbFinalizeBmap()
3557 */
3558void dbFinalizeBmap(struct inode *ipbmap)
3559{
3560 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
3561 int actags, inactags, l2nl;
3562 s64 ag_rem, actfree, inactfree, avgfree;
3563 int i, n;
3564
3565 /*
3566 * finalize bmap control page
3567 */
3568//finalize:
3569 /*
3570 * compute db_agpref: preferred ag to allocate from
3571 * (the leftmost ag with average free space in it);
3572 */
3573//agpref:
3574 /* get the number of active ags and inacitve ags */
3575 actags = bmp->db_maxag + 1;
3576 inactags = bmp->db_numag - actags;
3577 ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */
3578
3579 /* determine how many blocks are in the inactive allocation
3580 * groups. in doing this, we must account for the fact that
3581 * the rightmost group might be a partial group (i.e. file
3582 * system size is not a multiple of the group size).
3583 */
3584 inactfree = (inactags && ag_rem) ?
3585 ((inactags - 1) << bmp->db_agl2size) + ag_rem
3586 : inactags << bmp->db_agl2size;
3587
3588 /* determine how many free blocks are in the active
3589 * allocation groups plus the average number of free blocks
3590 * within the active ags.
3591 */
3592 actfree = bmp->db_nfree - inactfree;
3593 avgfree = (u32) actfree / (u32) actags;
3594
3595 /* if the preferred allocation group has not average free space.
3596 * re-establish the preferred group as the leftmost
3597 * group with average free space.
3598 */
3599 if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
3600 for (bmp->db_agpref = 0; bmp->db_agpref < actags;
3601 bmp->db_agpref++) {
3602 if (bmp->db_agfree[bmp->db_agpref] >= avgfree)
3603 break;
3604 }
3605 if (bmp->db_agpref >= bmp->db_numag) {
3606 jfs_error(ipbmap->i_sb,
3607 "cannot find ag with average freespace");
3608 }
3609 }
3610
3611 /*
3612 * compute db_aglevel, db_agheigth, db_width, db_agstart:
3613 * an ag is covered in aglevel dmapctl summary tree,
3614 * at agheight level height (from leaf) with agwidth number of nodes
3615 * each, which starts at agstart index node of the smmary tree node
3616 * array;
3617 */
3618 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
3619 l2nl =
3620 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
3621 bmp->db_agheigth = l2nl >> 1;
3622 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1));
3623 for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0;
3624 i--) {
3625 bmp->db_agstart += n;
3626 n <<= 2;
3627 }
3628
3629}
3630
3631
3632/*
3633 * NAME: dbInitDmap()/ujfs_idmap_page()
3634 *
3635 * FUNCTION: initialize working/persistent bitmap of the dmap page
3636 * for the specified number of blocks:
3637 *
3638 * at entry, the bitmaps had been initialized as free (ZEROS);
3639 * The number of blocks will only account for the actually
3640 * existing blocks. Blocks which don't actually exist in
3641 * the aggregate will be marked as allocated (ONES);
3642 *
3643 * PARAMETERS:
3644 * dp - pointer to page of map
3645 * nblocks - number of blocks this page
3646 *
3647 * RETURNS: NONE
3648 */
3649static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
3650{
3651 int blkno, w, b, r, nw, nb, i;
3652
3653 /* starting block number within the dmap */
3654 blkno = Blkno & (BPERDMAP - 1);
3655
3656 if (blkno == 0) {
3657 dp->nblocks = dp->nfree = cpu_to_le32(nblocks);
3658 dp->start = cpu_to_le64(Blkno);
3659
3660 if (nblocks == BPERDMAP) {
3661 memset(&dp->wmap[0], 0, LPERDMAP * 4);
3662 memset(&dp->pmap[0], 0, LPERDMAP * 4);
3663 goto initTree;
3664 }
3665 } else {
3666 dp->nblocks =
3667 cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks);
3668 dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks);
3669 }
3670
3671 /* word number containing start block number */
3672 w = blkno >> L2DBWORD;
3673
3674 /*
3675 * free the bits corresponding to the block range (ZEROS):
3676 * note: not all bits of the first and last words may be contained
3677 * within the block range.
3678 */
3679 for (r = nblocks; r > 0; r -= nb, blkno += nb) {
3680 /* number of bits preceding range to be freed in the word */
3681 b = blkno & (DBWORD - 1);
3682 /* number of bits to free in the word */
3683 nb = min(r, DBWORD - b);
3684
3685 /* is partial word to be freed ? */
3686 if (nb < DBWORD) {
3687 /* free (set to 0) from the bitmap word */
3688 dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
3689 >> b));
3690 dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
3691 >> b));
3692
3693 /* skip the word freed */
3694 w++;
3695 } else {
3696 /* free (set to 0) contiguous bitmap words */
3697 nw = r >> L2DBWORD;
3698 memset(&dp->wmap[w], 0, nw * 4);
3699 memset(&dp->pmap[w], 0, nw * 4);
3700
3701 /* skip the words freed */
3702 nb = nw << L2DBWORD;
3703 w += nw;
3704 }
3705 }
3706
3707 /*
3708 * mark bits following the range to be freed (non-existing
3709 * blocks) as allocated (ONES)
3710 */
3711
3712 if (blkno == BPERDMAP)
3713 goto initTree;
3714
3715 /* the first word beyond the end of existing blocks */
3716 w = blkno >> L2DBWORD;
3717
3718 /* does nblocks fall on a 32-bit boundary ? */
3719 b = blkno & (DBWORD - 1);
3720 if (b) {
3721 /* mark a partial word allocated */
3722 dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b);
3723 w++;
3724 }
3725
3726 /* set the rest of the words in the page to allocated (ONES) */
3727 for (i = w; i < LPERDMAP; i++)
3728 dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES);
3729
3730 /*
3731 * init tree
3732 */
3733 initTree:
3734 return (dbInitDmapTree(dp));
3735}
3736
3737
3738/*
3739 * NAME: dbInitDmapTree()/ujfs_complete_dmap()
3740 *
3741 * FUNCTION: initialize summary tree of the specified dmap:
3742 *
3743 * at entry, bitmap of the dmap has been initialized;
3744 *
3745 * PARAMETERS:
3746 * dp - dmap to complete
3747 * blkno - starting block number for this dmap
3748 * treemax - will be filled in with max free for this dmap
3749 *
3750 * RETURNS: max free string at the root of the tree
3751 */
3752static int dbInitDmapTree(struct dmap * dp)
3753{
3754 struct dmaptree *tp;
3755 s8 *cp;
3756 int i;
3757
3758 /* init fixed info of tree */
3759 tp = &dp->tree;
3760 tp->nleafs = cpu_to_le32(LPERDMAP);
3761 tp->l2nleafs = cpu_to_le32(L2LPERDMAP);
3762 tp->leafidx = cpu_to_le32(LEAFIND);
3763 tp->height = cpu_to_le32(4);
3764 tp->budmin = BUDMIN;
3765
3766 /* init each leaf from corresponding wmap word:
3767 * note: leaf is set to NOFREE(-1) if all blocks of corresponding
3768 * bitmap word are allocated.
3769 */
3770 cp = tp->stree + le32_to_cpu(tp->leafidx);
3771 for (i = 0; i < LPERDMAP; i++)
3772 *cp++ = dbMaxBud((u8 *) & dp->wmap[i]);
3773
3774 /* build the dmap's binary buddy summary tree */
3775 return (dbInitTree(tp));
3776}
3777
3778
3779/*
3780 * NAME: dbInitTree()/ujfs_adjtree()
3781 *
3782 * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl.
3783 *
3784 * at entry, the leaves of the tree has been initialized
3785 * from corresponding bitmap word or root of summary tree
3786 * of the child control page;
3787 * configure binary buddy system at the leaf level, then
3788 * bubble up the values of the leaf nodes up the tree.
3789 *
3790 * PARAMETERS:
3791 * cp - Pointer to the root of the tree
3792 * l2leaves- Number of leaf nodes as a power of 2
3793 * l2min - Number of blocks that can be covered by a leaf
3794 * as a power of 2
3795 *
3796 * RETURNS: max free string at the root of the tree
3797 */
3798static int dbInitTree(struct dmaptree * dtp)
3799{
3800 int l2max, l2free, bsize, nextb, i;
3801 int child, parent, nparent;
3802 s8 *tp, *cp, *cp1;
3803
3804 tp = dtp->stree;
3805
3806 /* Determine the maximum free string possible for the leaves */
3807 l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin;
3808
3809 /*
3810 * configure the leaf levevl into binary buddy system
3811 *
3812 * Try to combine buddies starting with a buddy size of 1
3813 * (i.e. two leaves). At a buddy size of 1 two buddy leaves
3814 * can be combined if both buddies have a maximum free of l2min;
3815 * the combination will result in the left-most buddy leaf having
3816 * a maximum free of l2min+1.
3817 * After processing all buddies for a given size, process buddies
3818 * at the next higher buddy size (i.e. current size * 2) and
3819 * the next maximum free (current free + 1).
3820 * This continues until the maximum possible buddy combination
3821 * yields maximum free.
3822 */
3823 for (l2free = dtp->budmin, bsize = 1; l2free < l2max;
3824 l2free++, bsize = nextb) {
3825 /* get next buddy size == current buddy pair size */
3826 nextb = bsize << 1;
3827
3828 /* scan each adjacent buddy pair at current buddy size */
3829 for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx);
3830 i < le32_to_cpu(dtp->nleafs);
3831 i += nextb, cp += nextb) {
3832 /* coalesce if both adjacent buddies are max free */
3833 if (*cp == l2free && *(cp + bsize) == l2free) {
3834 *cp = l2free + 1; /* left take right */
3835 *(cp + bsize) = -1; /* right give left */
3836 }
3837 }
3838 }
3839
3840 /*
3841 * bubble summary information of leaves up the tree.
3842 *
3843 * Starting at the leaf node level, the four nodes described by
3844 * the higher level parent node are compared for a maximum free and
3845 * this maximum becomes the value of the parent node.
3846 * when all lower level nodes are processed in this fashion then
3847 * move up to the next level (parent becomes a lower level node) and
3848 * continue the process for that level.
3849 */
3850 for (child = le32_to_cpu(dtp->leafidx),
3851 nparent = le32_to_cpu(dtp->nleafs) >> 2;
3852 nparent > 0; nparent >>= 2, child = parent) {
3853 /* get index of 1st node of parent level */
3854 parent = (child - 1) >> 2;
3855
3856 /* set the value of the parent node as the maximum
3857 * of the four nodes of the current level.
3858 */
3859 for (i = 0, cp = tp + child, cp1 = tp + parent;
3860 i < nparent; i++, cp += 4, cp1++)
3861 *cp1 = TREEMAX(cp);
3862 }
3863
3864 return (*tp);
3865}
3866
3867
3868/*
3869 * dbInitDmapCtl()
3870 *
3871 * function: initialize dmapctl page
3872 */
3873static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i)
3874{ /* start leaf index not covered by range */
3875 s8 *cp;
3876
3877 dcp->nleafs = cpu_to_le32(LPERCTL);
3878 dcp->l2nleafs = cpu_to_le32(L2LPERCTL);
3879 dcp->leafidx = cpu_to_le32(CTLLEAFIND);
3880 dcp->height = cpu_to_le32(5);
3881 dcp->budmin = L2BPERDMAP + L2LPERCTL * level;
3882
3883 /*
3884 * initialize the leaves of current level that were not covered
3885 * by the specified input block range (i.e. the leaves have no
3886 * low level dmapctl or dmap).
3887 */
3888 cp = &dcp->stree[CTLLEAFIND + i];
3889 for (; i < LPERCTL; i++)
3890 *cp++ = NOFREE;
3891
3892 /* build the dmap's binary buddy summary tree */
3893 return (dbInitTree((struct dmaptree *) dcp));
3894}
3895
3896
3897/*
3898 * NAME: dbGetL2AGSize()/ujfs_getagl2size()
3899 *
3900 * FUNCTION: Determine log2(allocation group size) from aggregate size
3901 *
3902 * PARAMETERS:
3903 * nblocks - Number of blocks in aggregate
3904 *
3905 * RETURNS: log2(allocation group size) in aggregate blocks
3906 */
3907static int dbGetL2AGSize(s64 nblocks)
3908{
3909 s64 sz;
3910 s64 m;
3911 int l2sz;
3912
3913 if (nblocks < BPERDMAP * MAXAG)
3914 return (L2BPERDMAP);
3915
3916 /* round up aggregate size to power of 2 */
3917 m = ((u64) 1 << (64 - 1));
3918 for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) {
3919 if (m & nblocks)
3920 break;
3921 }
3922
3923 sz = (s64) 1 << l2sz;
3924 if (sz < nblocks)
3925 l2sz += 1;
3926
3927 /* agsize = roundupSize/max_number_of_ag */
3928 return (l2sz - L2MAXAG);
3929}
3930
3931
3932/*
3933 * NAME: dbMapFileSizeToMapSize()
3934 *
3935 * FUNCTION: compute number of blocks the block allocation map file
3936 * can cover from the map file size;
3937 *
3938 * RETURNS: Number of blocks which can be covered by this block map file;
3939 */
3940
3941/*
3942 * maximum number of map pages at each level including control pages
3943 */
3944#define MAXL0PAGES (1 + LPERCTL)
3945#define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES)
3946#define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES)
3947
3948/*
3949 * convert number of map pages to the zero origin top dmapctl level
3950 */
3951#define BMAPPGTOLEV(npages) \
3952 (((npages) <= 3 + MAXL0PAGES) ? 0 \
3953 : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
3954
3955s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
3956{
3957 struct super_block *sb = ipbmap->i_sb;
3958 s64 nblocks;
3959 s64 npages, ndmaps;
3960 int level, i;
3961 int complete, factor;
3962
3963 nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize;
3964 npages = nblocks >> JFS_SBI(sb)->l2nbperpage;
3965 level = BMAPPGTOLEV(npages);
3966
3967 /* At each level, accumulate the number of dmap pages covered by
3968 * the number of full child levels below it;
3969 * repeat for the last incomplete child level.
3970 */
3971 ndmaps = 0;
3972 npages--; /* skip the first global control page */
3973 /* skip higher level control pages above top level covered by map */
3974 npages -= (2 - level);
3975 npages--; /* skip top level's control page */
3976 for (i = level; i >= 0; i--) {
3977 factor =
3978 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
3979 complete = (u32) npages / factor;
3980 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL
3981 : ((i == 1) ? LPERCTL : 1));
3982
3983 /* pages in last/incomplete child */
3984 npages = (u32) npages % factor;
3985 /* skip incomplete child's level control page */
3986 npages--;
3987 }
3988
3989 /* convert the number of dmaps into the number of blocks
3990 * which can be covered by the dmaps;
3991 */
3992 nblocks = ndmaps << L2BPERDMAP;
3993
3994 return (nblocks);
3995}