blob: 6dac48e29d2827f6cd64c87f8b41fe9a2ea2f1bf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) International Business Machines Corp., 2000-2004
Tino Reichardtb40c2e62012-09-17 11:58:19 -05003 * Portions Copyright (C) Tino Reichardt, 2012
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
Dave Kleikamp63f83c92006-10-02 09:55:27 -05007 * the Free Software Foundation; either version 2 of the License, or
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * (at your option) any later version.
Dave Kleikamp63f83c92006-10-02 09:55:27 -05009 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070010 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
Dave Kleikamp63f83c92006-10-02 09:55:27 -050016 * along with this program; if not, write to the Free Software
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/fs.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090021#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include "jfs_incore.h"
23#include "jfs_superblock.h"
24#include "jfs_dmap.h"
25#include "jfs_imap.h"
26#include "jfs_lock.h"
27#include "jfs_metapage.h"
28#include "jfs_debug.h"
Tino Reichardtb40c2e62012-09-17 11:58:19 -050029#include "jfs_discard.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070030
31/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070032 * SERIALIZATION of the Block Allocation Map.
33 *
34 * the working state of the block allocation map is accessed in
35 * two directions:
Dave Kleikamp63f83c92006-10-02 09:55:27 -050036 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070037 * 1) allocation and free requests that start at the dmap
38 * level and move up through the dmap control pages (i.e.
39 * the vast majority of requests).
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -050041 * 2) allocation requests that start at dmap control page
42 * level and work down towards the dmaps.
43 *
44 * the serialization scheme used here is as follows.
45 *
46 * requests which start at the bottom are serialized against each
47 * other through buffers and each requests holds onto its buffers
48 * as it works it way up from a single dmap to the required level
Linus Torvalds1da177e2005-04-16 15:20:36 -070049 * of dmap control page.
50 * requests that start at the top are serialized against each other
51 * and request that start from the bottom by the multiple read/single
52 * write inode lock of the bmap inode. requests starting at the top
53 * take this lock in write mode while request starting at the bottom
54 * take the lock in read mode. a single top-down request may proceed
Dave Kleikamp63f83c92006-10-02 09:55:27 -050055 * exclusively while multiple bottoms-up requests may proceed
56 * simultaneously (under the protection of busy buffers).
57 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 * in addition to information found in dmaps and dmap control pages,
59 * the working state of the block allocation map also includes read/
60 * write information maintained in the bmap descriptor (i.e. total
61 * free block count, allocation group level free block counts).
62 * a single exclusive lock (BMAP_LOCK) is used to guard this information
63 * in the face of multiple-bottoms up requests.
64 * (lock ordering: IREAD_LOCK, BMAP_LOCK);
Dave Kleikamp63f83c92006-10-02 09:55:27 -050065 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070066 * accesses to the persistent state of the block allocation map (limited
67 * to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
68 */
69
Ingo Molnar1de87442006-01-24 15:22:50 -060070#define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock)
71#define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock)
72#define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock)
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
74/*
75 * forward references
76 */
77static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
78 int nblocks);
79static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
Dave Kleikampb6a47fd2005-10-11 09:06:59 -050080static int dbBackSplit(dmtree_t * tp, int leafno);
Dave Kleikamp56d12542005-07-15 09:43:36 -050081static int dbJoin(dmtree_t * tp, int leafno, int newval);
Linus Torvalds1da177e2005-04-16 15:20:36 -070082static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
83static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
84 int level);
85static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results);
86static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
87 int nblocks);
88static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno,
89 int nblocks,
90 int l2nb, s64 * results);
91static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
92 int nblocks);
93static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks,
94 int l2nb,
95 s64 * results);
96static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb,
97 s64 * results);
98static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno,
99 s64 * results);
100static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
101static int dbFindBits(u32 word, int l2nb);
102static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
103static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx);
Dave Kleikamp56d12542005-07-15 09:43:36 -0500104static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
105 int nblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
107 int nblocks);
108static int dbMaxBud(u8 * cp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static int blkstol2(s64 nb);
110
111static int cntlz(u32 value);
112static int cnttz(u32 word);
113
114static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
115 int nblocks);
116static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks);
117static int dbInitDmapTree(struct dmap * dp);
118static int dbInitTree(struct dmaptree * dtp);
119static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i);
120static int dbGetL2AGSize(s64 nblocks);
121
122/*
123 * buddy table
124 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500125 * table used for determining buddy sizes within characters of
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126 * dmap bitmap words. the characters themselves serve as indexes
127 * into the table, with the table elements yielding the maximum
128 * binary buddy of free bits within the character.
129 */
Arjan van de Ven4d5dbd02005-11-29 08:28:58 -0600130static const s8 budtab[256] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
137 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
138 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
139 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
141 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
142 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
143 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
145 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
146 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
147};
148
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149/*
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500150 * NAME: dbMount()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 *
152 * FUNCTION: initializate the block allocation map.
153 *
154 * memory is allocated for the in-core bmap descriptor and
155 * the in-core descriptor is initialized from disk.
156 *
157 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500158 * ipbmap - pointer to in-core inode for the block map.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 *
160 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500161 * 0 - success
162 * -ENOMEM - insufficient memory
163 * -EIO - i/o error
Pavel Skripkin644fa3b2022-03-19 22:30:00 +0300164 * -EINVAL - wrong bmap data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 */
166int dbMount(struct inode *ipbmap)
167{
168 struct bmap *bmp;
169 struct dbmap_disk *dbmp_le;
170 struct metapage *mp;
171 int i;
172
173 /*
174 * allocate/initialize the in-memory bmap descriptor
175 */
176 /* allocate memory for the in-memory bmap descriptor */
177 bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL);
178 if (bmp == NULL)
179 return -ENOMEM;
180
181 /* read the on-disk bmap descriptor. */
182 mp = read_metapage(ipbmap,
183 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
184 PSIZE, 0);
185 if (mp == NULL) {
186 kfree(bmp);
187 return -EIO;
188 }
189
190 /* copy the on-disk bmap descriptor to its in-memory version. */
191 dbmp_le = (struct dbmap_disk *) mp->data;
192 bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
193 bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
194 bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
195 bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
Pavel Skripkin644fa3b2022-03-19 22:30:00 +0300196 if (!bmp->db_numag) {
197 release_metapage(mp);
198 kfree(bmp);
199 return -EINVAL;
200 }
201
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
203 bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
204 bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
205 bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
Daniel Mackd7eecb42010-01-28 16:13:01 +0800206 bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
208 bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
209 bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
210 for (i = 0; i < MAXAG; i++)
211 bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
212 bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
213 bmp->db_maxfreebud = dbmp_le->dn_maxfreebud;
214
215 /* release the buffer. */
216 release_metapage(mp);
217
218 /* bind the bmap inode and the bmap descriptor to each other. */
219 bmp->db_ipbmap = ipbmap;
220 JFS_SBI(ipbmap->i_sb)->bmap = bmp;
221
222 memset(bmp->db_active, 0, sizeof(bmp->db_active));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
224 /*
225 * allocate/initialize the bmap lock
226 */
227 BMAP_LOCK_INIT(bmp);
228
229 return (0);
230}
231
232
233/*
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500234 * NAME: dbUnmount()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 *
236 * FUNCTION: terminate the block allocation map in preparation for
237 * file system unmount.
238 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500239 * the in-core bmap descriptor is written to disk and
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 * the memory for this descriptor is freed.
241 *
242 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500243 * ipbmap - pointer to in-core inode for the block map.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 *
245 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500246 * 0 - success
247 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 */
249int dbUnmount(struct inode *ipbmap, int mounterror)
250{
251 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252
253 if (!(mounterror || isReadOnly(ipbmap)))
254 dbSync(ipbmap);
255
256 /*
257 * Invalidate the page cache buffers
258 */
259 truncate_inode_pages(ipbmap->i_mapping, 0);
260
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 /* free the memory for the in-memory bmap. */
262 kfree(bmp);
263
264 return (0);
265}
266
267/*
268 * dbSync()
269 */
270int dbSync(struct inode *ipbmap)
271{
272 struct dbmap_disk *dbmp_le;
273 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
274 struct metapage *mp;
275 int i;
276
277 /*
278 * write bmap global control page
279 */
280 /* get the buffer for the on-disk bmap descriptor. */
281 mp = read_metapage(ipbmap,
282 BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
283 PSIZE, 0);
284 if (mp == NULL) {
285 jfs_err("dbSync: read_metapage failed!");
286 return -EIO;
287 }
288 /* copy the in-memory version of the bmap to the on-disk version */
289 dbmp_le = (struct dbmap_disk *) mp->data;
290 dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize);
291 dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree);
292 dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage);
293 dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag);
294 dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel);
295 dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
296 dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
297 dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
Daniel Mackd7eecb42010-01-28 16:13:01 +0800298 dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
300 dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
301 dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
302 for (i = 0; i < MAXAG; i++)
303 dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]);
304 dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize);
305 dbmp_le->dn_maxfreebud = bmp->db_maxfreebud;
306
307 /* write the buffer */
308 write_metapage(mp);
309
310 /*
311 * write out dirty pages of bmap
312 */
OGAWA Hirofumi28fd1292006-01-08 01:02:14 -0800313 filemap_write_and_wait(ipbmap->i_mapping);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 diWriteSpecial(ipbmap, 0);
316
317 return (0);
318}
319
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320/*
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500321 * NAME: dbFree()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 *
323 * FUNCTION: free the specified block range from the working block
324 * allocation map.
325 *
326 * the blocks will be free from the working map one dmap
327 * at a time.
328 *
329 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500330 * ip - pointer to in-core inode;
331 * blkno - starting block number to be freed.
332 * nblocks - number of blocks to be freed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 *
334 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500335 * 0 - success
336 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 */
338int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
339{
340 struct metapage *mp;
341 struct dmap *dp;
342 int nb, rc;
343 s64 lblkno, rem;
344 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
345 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
Tino Reichardtb40c2e62012-09-17 11:58:19 -0500346 struct super_block *sb = ipbmap->i_sb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347
Dave Kleikamp82d5b9a2007-01-09 14:14:48 -0600348 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
350 /* block to be freed better be within the mapsize. */
351 if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) {
352 IREAD_UNLOCK(ipbmap);
353 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
354 (unsigned long long) blkno,
355 (unsigned long long) nblocks);
Joe Percheseb8630d2013-06-04 16:39:15 -0700356 jfs_error(ip->i_sb, "block to be freed is outside the map\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357 return -EIO;
358 }
359
Tino Reichardtb40c2e62012-09-17 11:58:19 -0500360 /**
361 * TRIM the blocks, when mounted with discard option
362 */
363 if (JFS_SBI(sb)->flag & JFS_DISCARD)
364 if (JFS_SBI(sb)->minblks_trim <= nblocks)
365 jfs_issue_discard(ipbmap, blkno, nblocks);
366
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 /*
368 * free the blocks a dmap at a time.
369 */
370 mp = NULL;
371 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
372 /* release previous dmap if any */
373 if (mp) {
374 write_metapage(mp);
375 }
376
377 /* get the buffer for the current dmap. */
378 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
379 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
380 if (mp == NULL) {
381 IREAD_UNLOCK(ipbmap);
382 return -EIO;
383 }
384 dp = (struct dmap *) mp->data;
385
386 /* determine the number of blocks to be freed from
387 * this dmap.
388 */
389 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
390
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 /* free the blocks. */
392 if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
Joe Percheseb8630d2013-06-04 16:39:15 -0700393 jfs_error(ip->i_sb, "error in block map\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 release_metapage(mp);
395 IREAD_UNLOCK(ipbmap);
396 return (rc);
397 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 }
399
400 /* write the last buffer. */
401 write_metapage(mp);
402
403 IREAD_UNLOCK(ipbmap);
404
405 return (0);
406}
407
408
409/*
410 * NAME: dbUpdatePMap()
411 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500412 * FUNCTION: update the allocation state (free or allocate) of the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 * specified block range in the persistent block allocation map.
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500414 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 * the blocks will be updated in the persistent map one
416 * dmap at a time.
417 *
418 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500419 * ipbmap - pointer to in-core inode for the block map.
420 * free - 'true' if block range is to be freed from the persistent
421 * map; 'false' if it is to be allocated.
422 * blkno - starting block number of the range.
423 * nblocks - number of contiguous blocks in the range.
424 * tblk - transaction block;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 *
426 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500427 * 0 - success
428 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 */
430int
431dbUpdatePMap(struct inode *ipbmap,
432 int free, s64 blkno, s64 nblocks, struct tblock * tblk)
433{
434 int nblks, dbitno, wbitno, rbits;
435 int word, nbits, nwords;
436 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
437 s64 lblkno, rem, lastlblkno;
438 u32 mask;
439 struct dmap *dp;
440 struct metapage *mp;
441 struct jfs_log *log;
442 int lsn, difft, diffp;
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600443 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
445 /* the blocks better be within the mapsize. */
446 if (blkno + nblocks > bmp->db_mapsize) {
447 printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
448 (unsigned long long) blkno,
449 (unsigned long long) nblocks);
Joe Percheseb8630d2013-06-04 16:39:15 -0700450 jfs_error(ipbmap->i_sb, "blocks are outside the map\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 return -EIO;
452 }
453
454 /* compute delta of transaction lsn from log syncpt */
455 lsn = tblk->lsn;
456 log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
457 logdiff(difft, lsn, log);
458
459 /*
460 * update the block state a dmap at a time.
461 */
462 mp = NULL;
463 lastlblkno = 0;
464 for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) {
465 /* get the buffer for the current dmap. */
466 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
467 if (lblkno != lastlblkno) {
468 if (mp) {
469 write_metapage(mp);
470 }
471
472 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE,
473 0);
474 if (mp == NULL)
475 return -EIO;
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600476 metapage_wait_for_io(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477 }
478 dp = (struct dmap *) mp->data;
479
480 /* determine the bit number and word within the dmap of
481 * the starting block. also determine how many blocks
482 * are to be updated within this dmap.
483 */
484 dbitno = blkno & (BPERDMAP - 1);
485 word = dbitno >> L2DBWORD;
486 nblks = min(rem, (s64)BPERDMAP - dbitno);
487
488 /* update the bits of the dmap words. the first and last
489 * words may only have a subset of their bits updated. if
490 * this is the case, we'll work against that word (i.e.
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500491 * partial first and/or last) only in a single pass. a
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 * single pass will also be used to update all words that
493 * are to have all their bits updated.
494 */
495 for (rbits = nblks; rbits > 0;
496 rbits -= nbits, dbitno += nbits) {
497 /* determine the bit number within the word and
498 * the number of bits within the word.
499 */
500 wbitno = dbitno & (DBWORD - 1);
501 nbits = min(rbits, DBWORD - wbitno);
502
503 /* check if only part of the word is to be updated. */
504 if (nbits < DBWORD) {
505 /* update (free or allocate) the bits
506 * in this word.
507 */
508 mask =
509 (ONES << (DBWORD - nbits) >> wbitno);
510 if (free)
511 dp->pmap[word] &=
512 cpu_to_le32(~mask);
513 else
514 dp->pmap[word] |=
515 cpu_to_le32(mask);
516
517 word += 1;
518 } else {
519 /* one or more words are to have all
520 * their bits updated. determine how
521 * many words and how many bits.
522 */
523 nwords = rbits >> L2DBWORD;
524 nbits = nwords << L2DBWORD;
525
526 /* update (free or allocate) the bits
527 * in these words.
528 */
529 if (free)
530 memset(&dp->pmap[word], 0,
531 nwords * 4);
532 else
533 memset(&dp->pmap[word], (int) ONES,
534 nwords * 4);
535
536 word += nwords;
537 }
538 }
539
540 /*
541 * update dmap lsn
542 */
543 if (lblkno == lastlblkno)
544 continue;
545
546 lastlblkno = lblkno;
547
Dave Kleikampbe0bf7d2006-03-08 10:59:15 -0600548 LOGSYNC_LOCK(log, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 if (mp->lsn != 0) {
550 /* inherit older/smaller lsn */
551 logdiff(diffp, mp->lsn, log);
552 if (difft < diffp) {
553 mp->lsn = lsn;
554
555 /* move bp after tblock in logsync list */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556 list_move(&mp->synclist, &tblk->synclist);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 }
558
559 /* inherit younger/larger clsn */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 logdiff(difft, tblk->clsn, log);
561 logdiff(diffp, mp->clsn, log);
562 if (difft > diffp)
563 mp->clsn = tblk->clsn;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 } else {
565 mp->log = log;
566 mp->lsn = lsn;
567
568 /* insert bp after tblock in logsync list */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 log->count++;
570 list_add(&mp->synclist, &tblk->synclist);
571
572 mp->clsn = tblk->clsn;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 }
Dave Kleikampbe0bf7d2006-03-08 10:59:15 -0600574 LOGSYNC_UNLOCK(log, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 }
576
577 /* write the last buffer. */
578 if (mp) {
579 write_metapage(mp);
580 }
581
582 return (0);
583}
584
585
586/*
587 * NAME: dbNextAG()
588 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500589 * FUNCTION: find the preferred allocation group for new allocations.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 *
591 * Within the allocation groups, we maintain a preferred
592 * allocation group which consists of a group with at least
593 * average free space. It is the preferred group that we target
594 * new inode allocation towards. The tie-in between inode
595 * allocation and block allocation occurs as we allocate the
596 * first (data) block of an inode and specify the inode (block)
597 * as the allocation hint for this block.
598 *
599 * We try to avoid having more than one open file growing in
600 * an allocation group, as this will lead to fragmentation.
601 * This differs from the old OS/2 method of trying to keep
602 * empty ags around for large allocations.
603 *
604 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500605 * ipbmap - pointer to in-core inode for the block map.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 *
607 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500608 * the preferred allocation group number.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 */
610int dbNextAG(struct inode *ipbmap)
611{
612 s64 avgfree;
613 int agpref;
614 s64 hwm = 0;
615 int i;
616 int next_best = -1;
617 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
618
619 BMAP_LOCK(bmp);
620
621 /* determine the average number of free blocks within the ags. */
622 avgfree = (u32)bmp->db_nfree / bmp->db_numag;
623
624 /*
625 * if the current preferred ag does not have an active allocator
626 * and has at least average freespace, return it
627 */
628 agpref = bmp->db_agpref;
629 if ((atomic_read(&bmp->db_active[agpref]) == 0) &&
630 (bmp->db_agfree[agpref] >= avgfree))
631 goto unlock;
632
633 /* From the last preferred ag, find the next one with at least
634 * average free space.
635 */
636 for (i = 0 ; i < bmp->db_numag; i++, agpref++) {
637 if (agpref == bmp->db_numag)
638 agpref = 0;
639
640 if (atomic_read(&bmp->db_active[agpref]))
641 /* open file is currently growing in this ag */
642 continue;
643 if (bmp->db_agfree[agpref] >= avgfree) {
644 /* Return this one */
645 bmp->db_agpref = agpref;
646 goto unlock;
647 } else if (bmp->db_agfree[agpref] > hwm) {
648 /* Less than avg. freespace, but best so far */
649 hwm = bmp->db_agfree[agpref];
650 next_best = agpref;
651 }
652 }
653
654 /*
655 * If no inactive ag was found with average freespace, use the
656 * next best
657 */
658 if (next_best != -1)
659 bmp->db_agpref = next_best;
660 /* else leave db_agpref unchanged */
661unlock:
662 BMAP_UNLOCK(bmp);
663
664 /* return the preferred group.
665 */
666 return (bmp->db_agpref);
667}
668
669/*
670 * NAME: dbAlloc()
671 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500672 * FUNCTION: attempt to allocate a specified number of contiguous free
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 * blocks from the working allocation block map.
674 *
675 * the block allocation policy uses hints and a multi-step
676 * approach.
677 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500678 * for allocation requests smaller than the number of blocks
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 * per dmap, we first try to allocate the new blocks
680 * immediately following the hint. if these blocks are not
681 * available, we try to allocate blocks near the hint. if
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500682 * no blocks near the hint are available, we next try to
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 * allocate within the same dmap as contains the hint.
684 *
685 * if no blocks are available in the dmap or the allocation
686 * request is larger than the dmap size, we try to allocate
687 * within the same allocation group as contains the hint. if
688 * this does not succeed, we finally try to allocate anywhere
689 * within the aggregate.
690 *
691 * we also try to allocate anywhere within the aggregate for
692 * for allocation requests larger than the allocation group
693 * size or requests that specify no hint value.
694 *
695 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500696 * ip - pointer to in-core inode;
697 * hint - allocation hint.
698 * nblocks - number of contiguous blocks in the range.
699 * results - on successful return, set to the starting block number
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 * of the newly allocated contiguous range.
701 *
702 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500703 * 0 - success
704 * -ENOSPC - insufficient disk resources
705 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 */
707int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
708{
709 int rc, agno;
710 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
711 struct bmap *bmp;
712 struct metapage *mp;
713 s64 lblkno, blkno;
714 struct dmap *dp;
715 int l2nb;
716 s64 mapSize;
717 int writers;
718
719 /* assert that nblocks is valid */
720 assert(nblocks > 0);
721
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 /* get the log2 number of blocks to be allocated.
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500723 * if the number of blocks is not a log2 multiple,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724 * it will be rounded up to the next log2 multiple.
725 */
726 l2nb = BLKSTOL2(nblocks);
727
728 bmp = JFS_SBI(ip->i_sb)->bmap;
729
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 mapSize = bmp->db_mapsize;
731
732 /* the hint should be within the map */
733 if (hint >= mapSize) {
Joe Percheseb8630d2013-06-04 16:39:15 -0700734 jfs_error(ip->i_sb, "the hint is outside the map\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 return -EIO;
736 }
737
738 /* if the number of blocks to be allocated is greater than the
739 * allocation group size, try to allocate anywhere.
740 */
741 if (l2nb > bmp->db_agl2size) {
Dave Kleikamp82d5b9a2007-01-09 14:14:48 -0600742 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743
744 rc = dbAllocAny(bmp, nblocks, l2nb, results);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745
746 goto write_unlock;
747 }
748
749 /*
750 * If no hint, let dbNextAG recommend an allocation group
751 */
752 if (hint == 0)
753 goto pref_ag;
754
755 /* we would like to allocate close to the hint. adjust the
756 * hint to the block following the hint since the allocators
757 * will start looking for free space starting at this point.
758 */
759 blkno = hint + 1;
760
761 if (blkno >= bmp->db_mapsize)
762 goto pref_ag;
763
764 agno = blkno >> bmp->db_agl2size;
765
766 /* check if blkno crosses over into a new allocation group.
767 * if so, check if we should allow allocations within this
768 * allocation group.
769 */
770 if ((blkno & (bmp->db_agsize - 1)) == 0)
André Goddard Rosaaf901ca2009-11-14 13:09:05 -0200771 /* check if the AG is currently being written to.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 * if so, call dbNextAG() to find a non-busy
773 * AG with sufficient free space.
774 */
775 if (atomic_read(&bmp->db_active[agno]))
776 goto pref_ag;
777
778 /* check if the allocation request size can be satisfied from a
779 * single dmap. if so, try to allocate from the dmap containing
780 * the hint using a tiered strategy.
781 */
782 if (nblocks <= BPERDMAP) {
Dave Kleikamp82d5b9a2007-01-09 14:14:48 -0600783 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784
785 /* get the buffer for the dmap containing the hint.
786 */
787 rc = -EIO;
788 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
789 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
790 if (mp == NULL)
791 goto read_unlock;
792
793 dp = (struct dmap *) mp->data;
794
795 /* first, try to satisfy the allocation request with the
796 * blocks beginning at the hint.
797 */
798 if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks))
799 != -ENOSPC) {
800 if (rc == 0) {
801 *results = blkno;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 mark_metapage_dirty(mp);
803 }
804
805 release_metapage(mp);
806 goto read_unlock;
807 }
808
809 writers = atomic_read(&bmp->db_active[agno]);
810 if ((writers > 1) ||
811 ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) {
812 /*
813 * Someone else is writing in this allocation
814 * group. To avoid fragmenting, try another ag
815 */
816 release_metapage(mp);
817 IREAD_UNLOCK(ipbmap);
818 goto pref_ag;
819 }
820
821 /* next, try to satisfy the allocation request with blocks
822 * near the hint.
823 */
824 if ((rc =
825 dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
826 != -ENOSPC) {
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500827 if (rc == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 mark_metapage_dirty(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
830 release_metapage(mp);
831 goto read_unlock;
832 }
833
834 /* try to satisfy the allocation request with blocks within
835 * the same dmap as the hint.
836 */
837 if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
838 != -ENOSPC) {
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500839 if (rc == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 mark_metapage_dirty(mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
842 release_metapage(mp);
843 goto read_unlock;
844 }
845
846 release_metapage(mp);
847 IREAD_UNLOCK(ipbmap);
848 }
849
850 /* try to satisfy the allocation request with blocks within
851 * the same allocation group as the hint.
852 */
Dave Kleikamp82d5b9a2007-01-09 14:14:48 -0600853 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500854 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 goto write_unlock;
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500856
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 IWRITE_UNLOCK(ipbmap);
858
859
860 pref_ag:
861 /*
862 * Let dbNextAG recommend a preferred allocation group
863 */
864 agno = dbNextAG(ipbmap);
Dave Kleikamp82d5b9a2007-01-09 14:14:48 -0600865 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866
867 /* Try to allocate within this allocation group. if that fails, try to
868 * allocate anywhere in the map.
869 */
870 if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
871 rc = dbAllocAny(bmp, nblocks, l2nb, results);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872
873 write_unlock:
874 IWRITE_UNLOCK(ipbmap);
875
876 return (rc);
877
878 read_unlock:
879 IREAD_UNLOCK(ipbmap);
880
881 return (rc);
882}
883
884#ifdef _NOTYET
885/*
886 * NAME: dbAllocExact()
887 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500888 * FUNCTION: try to allocate the requested extent;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 *
890 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500891 * ip - pointer to in-core inode;
892 * blkno - extent address;
893 * nblocks - extent length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 *
895 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500896 * 0 - success
897 * -ENOSPC - insufficient disk resources
898 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 */
900int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
901{
902 int rc;
903 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
904 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
905 struct dmap *dp;
906 s64 lblkno;
907 struct metapage *mp;
908
Dave Kleikamp82d5b9a2007-01-09 14:14:48 -0600909 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910
911 /*
912 * validate extent request:
913 *
914 * note: defragfs policy:
Dave Kleikamp63f83c92006-10-02 09:55:27 -0500915 * max 64 blocks will be moved.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 * allocation request size must be satisfied from a single dmap.
917 */
918 if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) {
919 IREAD_UNLOCK(ipbmap);
920 return -EINVAL;
921 }
922
923 if (nblocks > ((s64) 1 << bmp->db_maxfreebud)) {
924 /* the free space is no longer available */
925 IREAD_UNLOCK(ipbmap);
926 return -ENOSPC;
927 }
928
929 /* read in the dmap covering the extent */
930 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
931 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
932 if (mp == NULL) {
933 IREAD_UNLOCK(ipbmap);
934 return -EIO;
935 }
936 dp = (struct dmap *) mp->data;
937
938 /* try to allocate the requested extent */
939 rc = dbAllocNext(bmp, dp, blkno, nblocks);
940
941 IREAD_UNLOCK(ipbmap);
942
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500943 if (rc == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 mark_metapage_dirty(mp);
Dave Kleikampb38a3ab2005-06-27 15:35:37 -0500945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 release_metapage(mp);
947
948 return (rc);
949}
950#endif /* _NOTYET */
951
952/*
953 * NAME: dbReAlloc()
954 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500955 * FUNCTION: attempt to extend a current allocation by a specified
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 * number of blocks.
957 *
958 * this routine attempts to satisfy the allocation request
959 * by first trying to extend the existing allocation in
960 * place by allocating the additional blocks as the blocks
961 * immediately following the current allocation. if these
962 * blocks are not available, this routine will attempt to
963 * allocate a new set of contiguous blocks large enough
964 * to cover the existing allocation plus the additional
965 * number of blocks required.
966 *
967 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500968 * ip - pointer to in-core inode requiring allocation.
969 * blkno - starting block of the current allocation.
970 * nblocks - number of contiguous blocks within the current
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 * allocation.
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500972 * addnblocks - number of blocks to add to the allocation.
973 * results - on successful return, set to the starting block number
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974 * of the existing allocation if the existing allocation
975 * was extended in place or to a newly allocated contiguous
976 * range if the existing allocation could not be extended
977 * in place.
978 *
979 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -0500980 * 0 - success
981 * -ENOSPC - insufficient disk resources
982 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 */
984int
985dbReAlloc(struct inode *ip,
986 s64 blkno, s64 nblocks, s64 addnblocks, s64 * results)
987{
988 int rc;
989
990 /* try to extend the allocation in place.
991 */
992 if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) {
993 *results = blkno;
994 return (0);
995 } else {
996 if (rc != -ENOSPC)
997 return (rc);
998 }
999
1000 /* could not extend the allocation in place, so allocate a
1001 * new set of blocks for the entire request (i.e. try to get
1002 * a range of contiguous blocks large enough to cover the
1003 * existing allocation plus the additional blocks.)
1004 */
1005 return (dbAlloc
1006 (ip, blkno + nblocks - 1, addnblocks + nblocks, results));
1007}
1008
1009
1010/*
1011 * NAME: dbExtend()
1012 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001013 * FUNCTION: attempt to extend a current allocation by a specified
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 * number of blocks.
1015 *
1016 * this routine attempts to satisfy the allocation request
1017 * by first trying to extend the existing allocation in
1018 * place by allocating the additional blocks as the blocks
1019 * immediately following the current allocation.
1020 *
1021 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001022 * ip - pointer to in-core inode requiring allocation.
1023 * blkno - starting block of the current allocation.
1024 * nblocks - number of contiguous blocks within the current
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 * allocation.
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001026 * addnblocks - number of blocks to add to the allocation.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 *
1028 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001029 * 0 - success
1030 * -ENOSPC - insufficient disk resources
1031 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 */
1033static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
1034{
1035 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
1036 s64 lblkno, lastblkno, extblkno;
1037 uint rel_block;
1038 struct metapage *mp;
1039 struct dmap *dp;
1040 int rc;
1041 struct inode *ipbmap = sbi->ipbmap;
1042 struct bmap *bmp;
1043
1044 /*
1045 * We don't want a non-aligned extent to cross a page boundary
1046 */
1047 if (((rel_block = blkno & (sbi->nbperpage - 1))) &&
1048 (rel_block + nblocks + addnblocks > sbi->nbperpage))
1049 return -ENOSPC;
1050
1051 /* get the last block of the current allocation */
1052 lastblkno = blkno + nblocks - 1;
1053
1054 /* determine the block number of the block following
1055 * the existing allocation.
1056 */
1057 extblkno = lastblkno + 1;
1058
Dave Kleikamp82d5b9a2007-01-09 14:14:48 -06001059 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060
1061 /* better be within the file system */
1062 bmp = sbi->bmap;
1063 if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) {
1064 IREAD_UNLOCK(ipbmap);
Joe Percheseb8630d2013-06-04 16:39:15 -07001065 jfs_error(ip->i_sb, "the block is outside the filesystem\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 return -EIO;
1067 }
1068
1069 /* we'll attempt to extend the current allocation in place by
1070 * allocating the additional blocks as the blocks immediately
1071 * following the current allocation. we only try to extend the
1072 * current allocation in place if the number of additional blocks
1073 * can fit into a dmap, the last block of the current allocation
1074 * is not the last block of the file system, and the start of the
1075 * inplace extension is not on an allocation group boundary.
1076 */
1077 if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize ||
1078 (extblkno & (bmp->db_agsize - 1)) == 0) {
1079 IREAD_UNLOCK(ipbmap);
1080 return -ENOSPC;
1081 }
1082
1083 /* get the buffer for the dmap containing the first block
1084 * of the extension.
1085 */
1086 lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage);
1087 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
1088 if (mp == NULL) {
1089 IREAD_UNLOCK(ipbmap);
1090 return -EIO;
1091 }
1092
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 dp = (struct dmap *) mp->data;
1094
1095 /* try to allocate the blocks immediately following the
1096 * current allocation.
1097 */
1098 rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks);
1099
1100 IREAD_UNLOCK(ipbmap);
1101
1102 /* were we successful ? */
Dave Kleikampb38a3ab2005-06-27 15:35:37 -05001103 if (rc == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 write_metapage(mp);
Dave Kleikampb38a3ab2005-06-27 15:35:37 -05001105 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 /* we were not successful */
1107 release_metapage(mp);
1108
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 return (rc);
1110}
1111
1112
1113/*
1114 * NAME: dbAllocNext()
1115 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001116 * FUNCTION: attempt to allocate the blocks of the specified block
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 * range within a dmap.
1118 *
1119 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001120 * bmp - pointer to bmap descriptor
1121 * dp - pointer to dmap.
1122 * blkno - starting block number of the range.
1123 * nblocks - number of contiguous free blocks of the range.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 *
1125 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001126 * 0 - success
1127 * -ENOSPC - insufficient disk resources
1128 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 *
1130 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1131 */
1132static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
1133 int nblocks)
1134{
1135 int dbitno, word, rembits, nb, nwords, wbitno, nw;
1136 int l2size;
1137 s8 *leaf;
1138 u32 mask;
1139
1140 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
Joe Percheseb8630d2013-06-04 16:39:15 -07001141 jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 return -EIO;
1143 }
1144
1145 /* pick up a pointer to the leaves of the dmap tree.
1146 */
1147 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
1148
1149 /* determine the bit number and word within the dmap of the
1150 * starting block.
1151 */
1152 dbitno = blkno & (BPERDMAP - 1);
1153 word = dbitno >> L2DBWORD;
1154
1155 /* check if the specified block range is contained within
1156 * this dmap.
1157 */
1158 if (dbitno + nblocks > BPERDMAP)
1159 return -ENOSPC;
1160
1161 /* check if the starting leaf indicates that anything
1162 * is free.
1163 */
1164 if (leaf[word] == NOFREE)
1165 return -ENOSPC;
1166
1167 /* check the dmaps words corresponding to block range to see
1168 * if the block range is free. not all bits of the first and
1169 * last words may be contained within the block range. if this
1170 * is the case, we'll work against those words (i.e. partial first
1171 * and/or last) on an individual basis (a single pass) and examine
1172 * the actual bits to determine if they are free. a single pass
1173 * will be used for all dmap words fully contained within the
1174 * specified range. within this pass, the leaves of the dmap
1175 * tree will be examined to determine if the blocks are free. a
1176 * single leaf may describe the free space of multiple dmap
1177 * words, so we may visit only a subset of the actual leaves
1178 * corresponding to the dmap words of the block range.
1179 */
1180 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
1181 /* determine the bit number within the word and
1182 * the number of bits within the word.
1183 */
1184 wbitno = dbitno & (DBWORD - 1);
1185 nb = min(rembits, DBWORD - wbitno);
1186
1187 /* check if only part of the word is to be examined.
1188 */
1189 if (nb < DBWORD) {
1190 /* check if the bits are free.
1191 */
1192 mask = (ONES << (DBWORD - nb) >> wbitno);
1193 if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask)
1194 return -ENOSPC;
1195
1196 word += 1;
1197 } else {
1198 /* one or more dmap words are fully contained
1199 * within the block range. determine how many
1200 * words and how many bits.
1201 */
1202 nwords = rembits >> L2DBWORD;
1203 nb = nwords << L2DBWORD;
1204
1205 /* now examine the appropriate leaves to determine
1206 * if the blocks are free.
1207 */
1208 while (nwords > 0) {
1209 /* does the leaf describe any free space ?
1210 */
1211 if (leaf[word] < BUDMIN)
1212 return -ENOSPC;
1213
1214 /* determine the l2 number of bits provided
1215 * by this leaf.
1216 */
1217 l2size =
Fabian Frederick4f65b6d2014-05-19 21:36:58 +02001218 min_t(int, leaf[word], NLSTOL2BSZ(nwords));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219
1220 /* determine how many words were handled.
1221 */
1222 nw = BUDSIZE(l2size, BUDMIN);
1223
1224 nwords -= nw;
1225 word += nw;
1226 }
1227 }
1228 }
1229
1230 /* allocate the blocks.
1231 */
1232 return (dbAllocDmap(bmp, dp, blkno, nblocks));
1233}
1234
1235
1236/*
1237 * NAME: dbAllocNear()
1238 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001239 * FUNCTION: attempt to allocate a number of contiguous free blocks near
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 * a specified block (hint) within a dmap.
1241 *
1242 * starting with the dmap leaf that covers the hint, we'll
1243 * check the next four contiguous leaves for sufficient free
1244 * space. if sufficient free space is found, we'll allocate
1245 * the desired free space.
1246 *
1247 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001248 * bmp - pointer to bmap descriptor
1249 * dp - pointer to dmap.
1250 * blkno - block number to allocate near.
1251 * nblocks - actual number of contiguous free blocks desired.
1252 * l2nb - log2 number of contiguous free blocks desired.
1253 * results - on successful return, set to the starting block number
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 * of the newly allocated range.
1255 *
1256 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001257 * 0 - success
1258 * -ENOSPC - insufficient disk resources
1259 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 *
1261 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
1262 */
1263static int
1264dbAllocNear(struct bmap * bmp,
1265 struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results)
1266{
1267 int word, lword, rc;
1268 s8 *leaf;
1269
1270 if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
Joe Percheseb8630d2013-06-04 16:39:15 -07001271 jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 return -EIO;
1273 }
1274
1275 leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
1276
1277 /* determine the word within the dmap that holds the hint
1278 * (i.e. blkno). also, determine the last word in the dmap
1279 * that we'll include in our examination.
1280 */
1281 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
1282 lword = min(word + 4, LPERDMAP);
1283
1284 /* examine the leaves for sufficient free space.
1285 */
1286 for (; word < lword; word++) {
1287 /* does the leaf describe sufficient free space ?
1288 */
1289 if (leaf[word] < l2nb)
1290 continue;
1291
1292 /* determine the block number within the file system
1293 * of the first block described by this dmap word.
1294 */
1295 blkno = le64_to_cpu(dp->start) + (word << L2DBWORD);
1296
1297 /* if not all bits of the dmap word are free, get the
1298 * starting bit number within the dmap word of the required
1299 * string of free bits and adjust the block number with the
1300 * value.
1301 */
1302 if (leaf[word] < BUDMIN)
1303 blkno +=
1304 dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb);
1305
1306 /* allocate the blocks.
1307 */
1308 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
1309 *results = blkno;
1310
1311 return (rc);
1312 }
1313
1314 return -ENOSPC;
1315}
1316
1317
1318/*
1319 * NAME: dbAllocAG()
1320 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001321 * FUNCTION: attempt to allocate the specified number of contiguous
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 * free blocks within the specified allocation group.
1323 *
1324 * unless the allocation group size is equal to the number
1325 * of blocks per dmap, the dmap control pages will be used to
1326 * find the required free space, if available. we start the
1327 * search at the highest dmap control page level which
1328 * distinctly describes the allocation group's free space
1329 * (i.e. the highest level at which the allocation group's
1330 * free space is not mixed in with that of any other group).
1331 * in addition, we start the search within this level at a
1332 * height of the dmapctl dmtree at which the nodes distinctly
1333 * describe the allocation group's free space. at this height,
1334 * the allocation group's free space may be represented by 1
1335 * or two sub-trees, depending on the allocation group size.
1336 * we search the top nodes of these subtrees left to right for
1337 * sufficient free space. if sufficient free space is found,
Dave Kleikamp63f83c92006-10-02 09:55:27 -05001338 * the subtree is searched to find the leftmost leaf that
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 * has free space. once we have made it to the leaf, we
1340 * move the search to the next lower level dmap control page
1341 * corresponding to this leaf. we continue down the dmap control
1342 * pages until we find the dmap that contains or starts the
1343 * sufficient free space and we allocate at this dmap.
1344 *
1345 * if the allocation group size is equal to the dmap size,
1346 * we'll start at the dmap corresponding to the allocation
1347 * group and attempt the allocation at this level.
1348 *
1349 * the dmap control page search is also not performed if the
1350 * allocation group is completely free and we go to the first
1351 * dmap of the allocation group to do the allocation. this is
1352 * done because the allocation group may be part (not the first
1353 * part) of a larger binary buddy system, causing the dmap
1354 * control pages to indicate no free space (NOFREE) within
1355 * the allocation group.
1356 *
1357 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001358 * bmp - pointer to bmap descriptor
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 * agno - allocation group number.
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001360 * nblocks - actual number of contiguous free blocks desired.
1361 * l2nb - log2 number of contiguous free blocks desired.
1362 * results - on successful return, set to the starting block number
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 * of the newly allocated range.
1364 *
1365 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001366 * 0 - success
1367 * -ENOSPC - insufficient disk resources
1368 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 *
1370 * note: IWRITE_LOCK(ipmap) held on entry/exit;
1371 */
1372static int
1373dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
1374{
1375 struct metapage *mp;
1376 struct dmapctl *dcp;
1377 int rc, ti, i, k, m, n, agperlev;
1378 s64 blkno, lblkno;
1379 int budmin;
1380
1381 /* allocation request should not be for more than the
1382 * allocation group size.
1383 */
1384 if (l2nb > bmp->db_agl2size) {
1385 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001386 "allocation request is larger than the allocation group size\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 return -EIO;
1388 }
1389
1390 /* determine the starting block number of the allocation
1391 * group.
1392 */
1393 blkno = (s64) agno << bmp->db_agl2size;
1394
1395 /* check if the allocation group size is the minimum allocation
1396 * group size or if the allocation group is completely free. if
1397 * the allocation group size is the minimum size of BPERDMAP (i.e.
1398 * 1 dmap), there is no need to search the dmap control page (below)
1399 * that fully describes the allocation group since the allocation
1400 * group is already fully described by a dmap. in this case, we
1401 * just call dbAllocCtl() to search the dmap tree and allocate the
Dave Kleikamp63f83c92006-10-02 09:55:27 -05001402 * required space if available.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 *
1404 * if the allocation group is completely free, dbAllocCtl() is
1405 * also called to allocate the required space. this is done for
1406 * two reasons. first, it makes no sense searching the dmap control
1407 * pages for free space when we know that free space exists. second,
1408 * the dmap control pages may indicate that the allocation group
1409 * has no free space if the allocation group is part (not the first
1410 * part) of a larger binary buddy system.
1411 */
1412 if (bmp->db_agsize == BPERDMAP
1413 || bmp->db_agfree[agno] == bmp->db_agsize) {
1414 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1415 if ((rc == -ENOSPC) &&
1416 (bmp->db_agfree[agno] == bmp->db_agsize)) {
1417 printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n",
1418 (unsigned long long) blkno,
1419 (unsigned long long) nblocks);
1420 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001421 "dbAllocCtl failed in free AG\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 }
1423 return (rc);
1424 }
1425
1426 /* the buffer for the dmap control page that fully describes the
1427 * allocation group.
1428 */
1429 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel);
1430 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1431 if (mp == NULL)
1432 return -EIO;
1433 dcp = (struct dmapctl *) mp->data;
1434 budmin = dcp->budmin;
1435
1436 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
Joe Percheseb8630d2013-06-04 16:39:15 -07001437 jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 release_metapage(mp);
1439 return -EIO;
1440 }
1441
1442 /* search the subtree(s) of the dmap control page that describes
1443 * the allocation group, looking for sufficient free space. to begin,
1444 * determine how many allocation groups are represented in a dmap
1445 * control page at the control page level (i.e. L0, L1, L2) that
1446 * fully describes an allocation group. next, determine the starting
1447 * tree index of this allocation group within the control page.
1448 */
1449 agperlev =
Daniel Mackd7eecb42010-01-28 16:13:01 +08001450 (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
1452
Dave Kleikamp63f83c92006-10-02 09:55:27 -05001453 /* dmap control page trees fan-out by 4 and a single allocation
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 * group may be described by 1 or 2 subtrees within the ag level
1455 * dmap control page, depending upon the ag size. examine the ag's
1456 * subtrees for sufficient free space, starting with the leftmost
1457 * subtree.
1458 */
1459 for (i = 0; i < bmp->db_agwidth; i++, ti++) {
1460 /* is there sufficient free space ?
1461 */
1462 if (l2nb > dcp->stree[ti])
1463 continue;
1464
1465 /* sufficient free space found in a subtree. now search down
1466 * the subtree to find the leftmost leaf that describes this
1467 * free space.
1468 */
Daniel Mackd7eecb42010-01-28 16:13:01 +08001469 for (k = bmp->db_agheight; k > 0; k--) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
1471 if (l2nb <= dcp->stree[m + n]) {
1472 ti = m + n;
1473 break;
1474 }
1475 }
1476 if (n == 4) {
1477 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001478 "failed descending stree\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 release_metapage(mp);
1480 return -EIO;
1481 }
1482 }
1483
1484 /* determine the block number within the file system
1485 * that corresponds to this leaf.
1486 */
1487 if (bmp->db_aglevel == 2)
1488 blkno = 0;
1489 else if (bmp->db_aglevel == 1)
1490 blkno &= ~(MAXL1SIZE - 1);
1491 else /* bmp->db_aglevel == 0 */
1492 blkno &= ~(MAXL0SIZE - 1);
1493
1494 blkno +=
1495 ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin;
1496
1497 /* release the buffer in preparation for going down
1498 * the next level of dmap control pages.
1499 */
1500 release_metapage(mp);
1501
1502 /* check if we need to continue to search down the lower
1503 * level dmap control pages. we need to if the number of
1504 * blocks required is less than maximum number of blocks
1505 * described at the next lower level.
1506 */
1507 if (l2nb < budmin) {
1508
1509 /* search the lower level dmap control pages to get
Michael Opdenacker59c51592007-05-09 08:57:56 +02001510 * the starting block number of the dmap that
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 * contains or starts off the free space.
1512 */
1513 if ((rc =
1514 dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1,
1515 &blkno))) {
1516 if (rc == -ENOSPC) {
1517 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001518 "control page inconsistent\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 return -EIO;
1520 }
1521 return (rc);
1522 }
1523 }
1524
1525 /* allocate the blocks.
1526 */
1527 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1528 if (rc == -ENOSPC) {
1529 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001530 "unable to allocate blocks\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 rc = -EIO;
1532 }
1533 return (rc);
1534 }
1535
1536 /* no space in the allocation group. release the buffer and
1537 * return -ENOSPC.
1538 */
1539 release_metapage(mp);
1540
1541 return -ENOSPC;
1542}
1543
1544
1545/*
1546 * NAME: dbAllocAny()
1547 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001548 * FUNCTION: attempt to allocate the specified number of contiguous
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 * free blocks anywhere in the file system.
1550 *
1551 * dbAllocAny() attempts to find the sufficient free space by
1552 * searching down the dmap control pages, starting with the
1553 * highest level (i.e. L0, L1, L2) control page. if free space
1554 * large enough to satisfy the desired free space is found, the
1555 * desired free space is allocated.
1556 *
1557 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001558 * bmp - pointer to bmap descriptor
1559 * nblocks - actual number of contiguous free blocks desired.
1560 * l2nb - log2 number of contiguous free blocks desired.
1561 * results - on successful return, set to the starting block number
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 * of the newly allocated range.
1563 *
1564 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001565 * 0 - success
1566 * -ENOSPC - insufficient disk resources
1567 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 *
1569 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1570 */
1571static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
1572{
1573 int rc;
1574 s64 blkno = 0;
1575
1576 /* starting with the top level dmap control page, search
1577 * down the dmap control levels for sufficient free space.
1578 * if free space is found, dbFindCtl() returns the starting
1579 * block number of the dmap that contains or starts off the
1580 * range of free space.
1581 */
1582 if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno)))
1583 return (rc);
1584
1585 /* allocate the blocks.
1586 */
1587 rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
1588 if (rc == -ENOSPC) {
Joe Percheseb8630d2013-06-04 16:39:15 -07001589 jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 return -EIO;
1591 }
1592 return (rc);
1593}
1594
1595
1596/*
Tino Reichardtb40c2e62012-09-17 11:58:19 -05001597 * NAME: dbDiscardAG()
1598 *
1599 * FUNCTION: attempt to discard (TRIM) all free blocks of specific AG
1600 *
1601 * algorithm:
1602 * 1) allocate blocks, as large as possible and save them
1603 * while holding IWRITE_LOCK on ipbmap
1604 * 2) trim all these saved block/length values
1605 * 3) mark the blocks free again
1606 *
1607 * benefit:
1608 * - we work only on one ag at some time, minimizing how long we
1609 * need to lock ipbmap
1610 * - reading / writing the fs is possible most time, even on
1611 * trimming
1612 *
1613 * downside:
1614 * - we write two times to the dmapctl and dmap pages
1615 * - but for me, this seems the best way, better ideas?
1616 * /TR 2012
1617 *
1618 * PARAMETERS:
1619 * ip - pointer to in-core inode
1620 * agno - ag to trim
1621 * minlen - minimum value of contiguous blocks
1622 *
1623 * RETURN VALUES:
1624 * s64 - actual number of blocks trimmed
1625 */
1626s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
1627{
1628 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
1629 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
1630 s64 nblocks, blkno;
1631 u64 trimmed = 0;
1632 int rc, l2nb;
1633 struct super_block *sb = ipbmap->i_sb;
1634
1635 struct range2trim {
1636 u64 blkno;
1637 u64 nblocks;
1638 } *totrim, *tt;
1639
1640 /* max blkno / nblocks pairs to trim */
1641 int count = 0, range_cnt;
Dave Kleikamp84f41412012-09-18 11:27:22 -05001642 u64 max_ranges;
Tino Reichardtb40c2e62012-09-17 11:58:19 -05001643
1644 /* prevent others from writing new stuff here, while trimming */
1645 IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
1646
1647 nblocks = bmp->db_agfree[agno];
Dave Kleikamp84f41412012-09-18 11:27:22 -05001648 max_ranges = nblocks;
1649 do_div(max_ranges, minlen);
1650 range_cnt = min_t(u64, max_ranges + 1, 32 * 1024);
Tino Reichardtb40c2e62012-09-17 11:58:19 -05001651 totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS);
1652 if (totrim == NULL) {
Joe Percheseb8630d2013-06-04 16:39:15 -07001653 jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n");
Tino Reichardtb40c2e62012-09-17 11:58:19 -05001654 IWRITE_UNLOCK(ipbmap);
1655 return 0;
1656 }
1657
1658 tt = totrim;
1659 while (nblocks >= minlen) {
1660 l2nb = BLKSTOL2(nblocks);
1661
1662 /* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */
1663 rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno);
1664 if (rc == 0) {
1665 tt->blkno = blkno;
1666 tt->nblocks = nblocks;
1667 tt++; count++;
1668
1669 /* the whole ag is free, trim now */
1670 if (bmp->db_agfree[agno] == 0)
1671 break;
1672
1673 /* give a hint for the next while */
1674 nblocks = bmp->db_agfree[agno];
1675 continue;
1676 } else if (rc == -ENOSPC) {
1677 /* search for next smaller log2 block */
1678 l2nb = BLKSTOL2(nblocks) - 1;
Colin Ian Kingc123b182021-02-11 13:01:08 +00001679 nblocks = 1LL << l2nb;
Tino Reichardtb40c2e62012-09-17 11:58:19 -05001680 } else {
1681 /* Trim any already allocated blocks */
Joe Percheseb8630d2013-06-04 16:39:15 -07001682 jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n");
Tino Reichardtb40c2e62012-09-17 11:58:19 -05001683 break;
1684 }
1685
1686 /* check, if our trim array is full */
1687 if (unlikely(count >= range_cnt - 1))
1688 break;
1689 }
1690 IWRITE_UNLOCK(ipbmap);
1691
1692 tt->nblocks = 0; /* mark the current end */
1693 for (tt = totrim; tt->nblocks != 0; tt++) {
1694 /* when mounted with online discard, dbFree() will
1695 * call jfs_issue_discard() itself */
1696 if (!(JFS_SBI(sb)->flag & JFS_DISCARD))
1697 jfs_issue_discard(ip, tt->blkno, tt->nblocks);
1698 dbFree(ip, tt->blkno, tt->nblocks);
1699 trimmed += tt->nblocks;
1700 }
1701 kfree(totrim);
1702
1703 return trimmed;
1704}
1705
1706/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 * NAME: dbFindCtl()
1708 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001709 * FUNCTION: starting at a specified dmap control page level and block
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 * number, search down the dmap control levels for a range of
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001711 * contiguous free blocks large enough to satisfy an allocation
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 * request for the specified number of free blocks.
1713 *
1714 * if sufficient contiguous free blocks are found, this routine
1715 * returns the starting block number within a dmap page that
1716 * contains or starts a range of contiqious free blocks that
1717 * is sufficient in size.
1718 *
1719 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001720 * bmp - pointer to bmap descriptor
1721 * level - starting dmap control page level.
1722 * l2nb - log2 number of contiguous free blocks desired.
1723 * *blkno - on entry, starting block number for conducting the search.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 * on successful return, the first block within a dmap page
1725 * that contains or starts a range of contiguous free blocks.
1726 *
1727 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001728 * 0 - success
1729 * -ENOSPC - insufficient disk resources
1730 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 *
1732 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1733 */
1734static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
1735{
1736 int rc, leafidx, lev;
1737 s64 b, lblkno;
1738 struct dmapctl *dcp;
1739 int budmin;
1740 struct metapage *mp;
1741
1742 /* starting at the specified dmap control page level and block
1743 * number, search down the dmap control levels for the starting
Dave Kleikamp63f83c92006-10-02 09:55:27 -05001744 * block number of a dmap page that contains or starts off
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 * sufficient free blocks.
1746 */
1747 for (lev = level, b = *blkno; lev >= 0; lev--) {
1748 /* get the buffer of the dmap control page for the block
1749 * number and level (i.e. L0, L1, L2).
1750 */
1751 lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev);
1752 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1753 if (mp == NULL)
1754 return -EIO;
1755 dcp = (struct dmapctl *) mp->data;
1756 budmin = dcp->budmin;
1757
1758 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
1759 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001760 "Corrupt dmapctl page\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 release_metapage(mp);
1762 return -EIO;
1763 }
1764
1765 /* search the tree within the dmap control page for
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001766 * sufficient free space. if sufficient free space is found,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001767 * dbFindLeaf() returns the index of the leaf at which
1768 * free space was found.
1769 */
1770 rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx);
1771
1772 /* release the buffer.
1773 */
1774 release_metapage(mp);
1775
1776 /* space found ?
1777 */
1778 if (rc) {
1779 if (lev != level) {
1780 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001781 "dmap inconsistent\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 return -EIO;
1783 }
1784 return -ENOSPC;
1785 }
1786
1787 /* adjust the block number to reflect the location within
Dave Kleikamp63f83c92006-10-02 09:55:27 -05001788 * the dmap control page (i.e. the leaf) at which free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 * space was found.
1790 */
1791 b += (((s64) leafidx) << budmin);
1792
1793 /* we stop the search at this dmap control page level if
1794 * the number of blocks required is greater than or equal
1795 * to the maximum number of blocks described at the next
1796 * (lower) level.
1797 */
1798 if (l2nb >= budmin)
1799 break;
1800 }
1801
1802 *blkno = b;
1803 return (0);
1804}
1805
1806
1807/*
1808 * NAME: dbAllocCtl()
1809 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001810 * FUNCTION: attempt to allocate a specified number of contiguous
Dave Kleikamp63f83c92006-10-02 09:55:27 -05001811 * blocks starting within a specific dmap.
1812 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 * this routine is called by higher level routines that search
1814 * the dmap control pages above the actual dmaps for contiguous
1815 * free space. the result of successful searches by these
Dave Kleikamp63f83c92006-10-02 09:55:27 -05001816 * routines are the starting block numbers within dmaps, with
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 * the dmaps themselves containing the desired contiguous free
1818 * space or starting a contiguous free space of desired size
1819 * that is made up of the blocks of one or more dmaps. these
1820 * calls should not fail due to insufficent resources.
1821 *
1822 * this routine is called in some cases where it is not known
1823 * whether it will fail due to insufficient resources. more
1824 * specifically, this occurs when allocating from an allocation
1825 * group whose size is equal to the number of blocks per dmap.
1826 * in this case, the dmap control pages are not examined prior
1827 * to calling this routine (to save pathlength) and the call
1828 * might fail.
1829 *
1830 * for a request size that fits within a dmap, this routine relies
1831 * upon the dmap's dmtree to find the requested contiguous free
1832 * space. for request sizes that are larger than a dmap, the
1833 * requested free space will start at the first block of the
1834 * first dmap (i.e. blkno).
1835 *
1836 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001837 * bmp - pointer to bmap descriptor
1838 * nblocks - actual number of contiguous free blocks to allocate.
1839 * l2nb - log2 number of contiguous free blocks to allocate.
1840 * blkno - starting block number of the dmap to start the allocation
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841 * from.
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001842 * results - on successful return, set to the starting block number
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843 * of the newly allocated range.
1844 *
1845 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001846 * 0 - success
1847 * -ENOSPC - insufficient disk resources
1848 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 *
1850 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
1851 */
1852static int
1853dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
1854{
1855 int rc, nb;
1856 s64 b, lblkno, n;
1857 struct metapage *mp;
1858 struct dmap *dp;
1859
1860 /* check if the allocation request is confined to a single dmap.
1861 */
1862 if (l2nb <= L2BPERDMAP) {
1863 /* get the buffer for the dmap.
1864 */
1865 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
1866 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1867 if (mp == NULL)
1868 return -EIO;
1869 dp = (struct dmap *) mp->data;
1870
1871 /* try to allocate the blocks.
1872 */
1873 rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results);
1874 if (rc == 0)
1875 mark_metapage_dirty(mp);
1876
1877 release_metapage(mp);
1878
1879 return (rc);
1880 }
1881
1882 /* allocation request involving multiple dmaps. it must start on
1883 * a dmap boundary.
1884 */
1885 assert((blkno & (BPERDMAP - 1)) == 0);
1886
1887 /* allocate the blocks dmap by dmap.
1888 */
1889 for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) {
1890 /* get the buffer for the dmap.
1891 */
1892 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
1893 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1894 if (mp == NULL) {
1895 rc = -EIO;
1896 goto backout;
1897 }
1898 dp = (struct dmap *) mp->data;
1899
1900 /* the dmap better be all free.
1901 */
1902 if (dp->tree.stree[ROOT] != L2BPERDMAP) {
1903 release_metapage(mp);
1904 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001905 "the dmap is not all free\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 rc = -EIO;
1907 goto backout;
1908 }
1909
1910 /* determine how many blocks to allocate from this dmap.
1911 */
Fabian Frederick4f65b6d2014-05-19 21:36:58 +02001912 nb = min_t(s64, n, BPERDMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913
1914 /* allocate the blocks from the dmap.
1915 */
1916 if ((rc = dbAllocDmap(bmp, dp, b, nb))) {
1917 release_metapage(mp);
1918 goto backout;
1919 }
1920
1921 /* write the buffer.
1922 */
1923 write_metapage(mp);
1924 }
1925
1926 /* set the results (starting block number) and return.
1927 */
1928 *results = blkno;
1929 return (0);
1930
1931 /* something failed in handling an allocation request involving
1932 * multiple dmaps. we'll try to clean up by backing out any
1933 * allocation that has already happened for this request. if
1934 * we fail in backing out the allocation, we'll mark the file
1935 * system to indicate that blocks have been leaked.
1936 */
1937 backout:
1938
1939 /* try to backout the allocations dmap by dmap.
1940 */
1941 for (n = nblocks - n, b = blkno; n > 0;
1942 n -= BPERDMAP, b += BPERDMAP) {
1943 /* get the buffer for this dmap.
1944 */
1945 lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
1946 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
1947 if (mp == NULL) {
1948 /* could not back out. mark the file system
1949 * to indicate that we have leaked blocks.
1950 */
1951 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07001952 "I/O Error: Block Leakage\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953 continue;
1954 }
1955 dp = (struct dmap *) mp->data;
1956
1957 /* free the blocks is this dmap.
1958 */
1959 if (dbFreeDmap(bmp, dp, b, BPERDMAP)) {
1960 /* could not back out. mark the file system
1961 * to indicate that we have leaked blocks.
1962 */
1963 release_metapage(mp);
Joe Percheseb8630d2013-06-04 16:39:15 -07001964 jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 continue;
1966 }
1967
1968 /* write the buffer.
1969 */
1970 write_metapage(mp);
1971 }
1972
1973 return (rc);
1974}
1975
1976
1977/*
1978 * NAME: dbAllocDmapLev()
1979 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001980 * FUNCTION: attempt to allocate a specified number of contiguous blocks
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 * from a specified dmap.
Dave Kleikamp63f83c92006-10-02 09:55:27 -05001982 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 * this routine checks if the contiguous blocks are available.
1984 * if so, nblocks of blocks are allocated; otherwise, ENOSPC is
1985 * returned.
1986 *
1987 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001988 * mp - pointer to bmap descriptor
1989 * dp - pointer to dmap to attempt to allocate blocks from.
1990 * l2nb - log2 number of contiguous block desired.
1991 * nblocks - actual number of contiguous block desired.
1992 * results - on successful return, set to the starting block number
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993 * of the newly allocated range.
1994 *
1995 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05001996 * 0 - success
1997 * -ENOSPC - insufficient disk resources
1998 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002000 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001 * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
2002 */
2003static int
2004dbAllocDmapLev(struct bmap * bmp,
2005 struct dmap * dp, int nblocks, int l2nb, s64 * results)
2006{
2007 s64 blkno;
2008 int leafidx, rc;
2009
2010 /* can't be more than a dmaps worth of blocks */
2011 assert(l2nb <= L2BPERDMAP);
2012
2013 /* search the tree within the dmap page for sufficient
2014 * free space. if sufficient free space is found, dbFindLeaf()
2015 * returns the index of the leaf at which free space was found.
2016 */
2017 if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
2018 return -ENOSPC;
2019
2020 /* determine the block number within the file system corresponding
2021 * to the leaf at which free space was found.
2022 */
2023 blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD);
2024
2025 /* if not all bits of the dmap word are free, get the starting
2026 * bit number within the dmap word of the required string of free
2027 * bits and adjust the block number with this value.
2028 */
2029 if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN)
2030 blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb);
2031
2032 /* allocate the blocks */
2033 if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
2034 *results = blkno;
2035
2036 return (rc);
2037}
2038
2039
2040/*
2041 * NAME: dbAllocDmap()
2042 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002043 * FUNCTION: adjust the disk allocation map to reflect the allocation
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044 * of a specified block range within a dmap.
2045 *
2046 * this routine allocates the specified blocks from the dmap
2047 * through a call to dbAllocBits(). if the allocation of the
2048 * block range causes the maximum string of free blocks within
2049 * the dmap to change (i.e. the value of the root of the dmap's
2050 * dmtree), this routine will cause this change to be reflected
2051 * up through the appropriate levels of the dmap control pages
2052 * by a call to dbAdjCtl() for the L0 dmap control page that
2053 * covers this dmap.
2054 *
2055 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002056 * bmp - pointer to bmap descriptor
2057 * dp - pointer to dmap to allocate the block range from.
2058 * blkno - starting block number of the block to be allocated.
2059 * nblocks - number of blocks to be allocated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 *
2061 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002062 * 0 - success
2063 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 *
2065 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2066 */
2067static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2068 int nblocks)
2069{
2070 s8 oldroot;
2071 int rc;
2072
2073 /* save the current value of the root (i.e. maximum free string)
2074 * of the dmap tree.
2075 */
2076 oldroot = dp->tree.stree[ROOT];
2077
2078 /* allocate the specified (blocks) bits */
2079 dbAllocBits(bmp, dp, blkno, nblocks);
2080
2081 /* if the root has not changed, done. */
2082 if (dp->tree.stree[ROOT] == oldroot)
2083 return (0);
2084
2085 /* root changed. bubble the change up to the dmap control pages.
2086 * if the adjustment of the upper level control pages fails,
2087 * backout the bit allocation (thus making everything consistent).
2088 */
2089 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0)))
2090 dbFreeBits(bmp, dp, blkno, nblocks);
2091
2092 return (rc);
2093}
2094
2095
2096/*
2097 * NAME: dbFreeDmap()
2098 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002099 * FUNCTION: adjust the disk allocation map to reflect the allocation
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 * of a specified block range within a dmap.
2101 *
2102 * this routine frees the specified blocks from the dmap through
2103 * a call to dbFreeBits(). if the deallocation of the block range
2104 * causes the maximum string of free blocks within the dmap to
2105 * change (i.e. the value of the root of the dmap's dmtree), this
2106 * routine will cause this change to be reflected up through the
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002107 * appropriate levels of the dmap control pages by a call to
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 * dbAdjCtl() for the L0 dmap control page that covers this dmap.
2109 *
2110 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002111 * bmp - pointer to bmap descriptor
2112 * dp - pointer to dmap to free the block range from.
2113 * blkno - starting block number of the block to be freed.
2114 * nblocks - number of blocks to be freed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 *
2116 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002117 * 0 - success
2118 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 *
2120 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2121 */
2122static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
2123 int nblocks)
2124{
2125 s8 oldroot;
Dave Kleikamp56d12542005-07-15 09:43:36 -05002126 int rc = 0, word;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127
2128 /* save the current value of the root (i.e. maximum free string)
2129 * of the dmap tree.
2130 */
2131 oldroot = dp->tree.stree[ROOT];
2132
2133 /* free the specified (blocks) bits */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002134 rc = dbFreeBits(bmp, dp, blkno, nblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135
Dave Kleikamp56d12542005-07-15 09:43:36 -05002136 /* if error or the root has not changed, done. */
2137 if (rc || (dp->tree.stree[ROOT] == oldroot))
2138 return (rc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139
2140 /* root changed. bubble the change up to the dmap control pages.
2141 * if the adjustment of the upper level control pages fails,
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002142 * backout the deallocation.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 */
2144 if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) {
2145 word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
2146
2147 /* as part of backing out the deallocation, we will have
2148 * to back split the dmap tree if the deallocation caused
2149 * the freed blocks to become part of a larger binary buddy
2150 * system.
2151 */
2152 if (dp->tree.stree[word] == NOFREE)
2153 dbBackSplit((dmtree_t *) & dp->tree, word);
2154
2155 dbAllocBits(bmp, dp, blkno, nblocks);
2156 }
2157
2158 return (rc);
2159}
2160
2161
2162/*
2163 * NAME: dbAllocBits()
2164 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002165 * FUNCTION: allocate a specified block range from a dmap.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 *
2167 * this routine updates the dmap to reflect the working
2168 * state allocation of the specified block range. it directly
2169 * updates the bits of the working map and causes the adjustment
2170 * of the binary buddy system described by the dmap's dmtree
2171 * leaves to reflect the bits allocated. it also causes the
2172 * dmap's dmtree, as a whole, to reflect the allocated range.
2173 *
2174 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002175 * bmp - pointer to bmap descriptor
2176 * dp - pointer to dmap to allocate bits from.
2177 * blkno - starting block number of the bits to be allocated.
2178 * nblocks - number of bits to be allocated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 *
2180 * RETURN VALUES: none
2181 *
2182 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2183 */
2184static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
2185 int nblocks)
2186{
2187 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2188 dmtree_t *tp = (dmtree_t *) & dp->tree;
2189 int size;
2190 s8 *leaf;
2191
2192 /* pick up a pointer to the leaves of the dmap tree */
2193 leaf = dp->tree.stree + LEAFIND;
2194
2195 /* determine the bit number and word within the dmap of the
2196 * starting block.
2197 */
2198 dbitno = blkno & (BPERDMAP - 1);
2199 word = dbitno >> L2DBWORD;
2200
2201 /* block range better be within the dmap */
2202 assert(dbitno + nblocks <= BPERDMAP);
2203
2204 /* allocate the bits of the dmap's words corresponding to the block
2205 * range. not all bits of the first and last words may be contained
2206 * within the block range. if this is the case, we'll work against
2207 * those words (i.e. partial first and/or last) on an individual basis
2208 * (a single pass), allocating the bits of interest by hand and
2209 * updating the leaf corresponding to the dmap word. a single pass
2210 * will be used for all dmap words fully contained within the
2211 * specified range. within this pass, the bits of all fully contained
2212 * dmap words will be marked as free in a single shot and the leaves
2213 * will be updated. a single leaf may describe the free space of
2214 * multiple dmap words, so we may update only a subset of the actual
2215 * leaves corresponding to the dmap words of the block range.
2216 */
2217 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
2218 /* determine the bit number within the word and
2219 * the number of bits within the word.
2220 */
2221 wbitno = dbitno & (DBWORD - 1);
2222 nb = min(rembits, DBWORD - wbitno);
2223
2224 /* check if only part of a word is to be allocated.
2225 */
2226 if (nb < DBWORD) {
2227 /* allocate (set to 1) the appropriate bits within
2228 * this dmap word.
2229 */
2230 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
2231 >> wbitno);
2232
2233 /* update the leaf for this dmap word. in addition
2234 * to setting the leaf value to the binary buddy max
2235 * of the updated dmap word, dbSplit() will split
2236 * the binary system of the leaves if need be.
2237 */
2238 dbSplit(tp, word, BUDMIN,
2239 dbMaxBud((u8 *) & dp->wmap[word]));
2240
2241 word += 1;
2242 } else {
2243 /* one or more dmap words are fully contained
2244 * within the block range. determine how many
2245 * words and allocate (set to 1) the bits of these
2246 * words.
2247 */
2248 nwords = rembits >> L2DBWORD;
2249 memset(&dp->wmap[word], (int) ONES, nwords * 4);
2250
2251 /* determine how many bits.
2252 */
2253 nb = nwords << L2DBWORD;
2254
2255 /* now update the appropriate leaves to reflect
2256 * the allocated words.
2257 */
2258 for (; nwords > 0; nwords -= nw) {
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002259 if (leaf[word] < BUDMIN) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07002261 "leaf page corrupt\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 break;
2263 }
2264
2265 /* determine what the leaf value should be
2266 * updated to as the minimum of the l2 number
2267 * of bits being allocated and the l2 number
2268 * of bits currently described by this leaf.
2269 */
Fabian Frederick4f65b6d2014-05-19 21:36:58 +02002270 size = min_t(int, leaf[word],
2271 NLSTOL2BSZ(nwords));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272
2273 /* update the leaf to reflect the allocation.
2274 * in addition to setting the leaf value to
2275 * NOFREE, dbSplit() will split the binary
2276 * system of the leaves to reflect the current
2277 * allocation (size).
2278 */
2279 dbSplit(tp, word, size, NOFREE);
2280
2281 /* get the number of dmap words handled */
2282 nw = BUDSIZE(size, BUDMIN);
2283 word += nw;
2284 }
2285 }
2286 }
2287
2288 /* update the free count for this dmap */
Marcin Slusarz89145622008-02-13 15:34:20 -06002289 le32_add_cpu(&dp->nfree, -nblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290
2291 BMAP_LOCK(bmp);
2292
2293 /* if this allocation group is completely free,
2294 * update the maximum allocation group number if this allocation
2295 * group is the new max.
2296 */
2297 agno = blkno >> bmp->db_agl2size;
2298 if (agno > bmp->db_maxag)
2299 bmp->db_maxag = agno;
2300
2301 /* update the free count for the allocation group and map */
2302 bmp->db_agfree[agno] -= nblocks;
2303 bmp->db_nfree -= nblocks;
2304
2305 BMAP_UNLOCK(bmp);
2306}
2307
2308
2309/*
2310 * NAME: dbFreeBits()
2311 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002312 * FUNCTION: free a specified block range from a dmap.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313 *
2314 * this routine updates the dmap to reflect the working
2315 * state allocation of the specified block range. it directly
2316 * updates the bits of the working map and causes the adjustment
2317 * of the binary buddy system described by the dmap's dmtree
2318 * leaves to reflect the bits freed. it also causes the dmap's
2319 * dmtree, as a whole, to reflect the deallocated range.
2320 *
2321 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002322 * bmp - pointer to bmap descriptor
2323 * dp - pointer to dmap to free bits from.
2324 * blkno - starting block number of the bits to be freed.
2325 * nblocks - number of bits to be freed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 *
Dave Kleikamp56d12542005-07-15 09:43:36 -05002327 * RETURN VALUES: 0 for success
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 *
2329 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2330 */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002331static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 int nblocks)
2333{
2334 int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
2335 dmtree_t *tp = (dmtree_t *) & dp->tree;
Dave Kleikamp56d12542005-07-15 09:43:36 -05002336 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 int size;
2338
2339 /* determine the bit number and word within the dmap of the
2340 * starting block.
2341 */
2342 dbitno = blkno & (BPERDMAP - 1);
2343 word = dbitno >> L2DBWORD;
2344
2345 /* block range better be within the dmap.
2346 */
2347 assert(dbitno + nblocks <= BPERDMAP);
2348
2349 /* free the bits of the dmaps words corresponding to the block range.
2350 * not all bits of the first and last words may be contained within
2351 * the block range. if this is the case, we'll work against those
2352 * words (i.e. partial first and/or last) on an individual basis
2353 * (a single pass), freeing the bits of interest by hand and updating
2354 * the leaf corresponding to the dmap word. a single pass will be used
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002355 * for all dmap words fully contained within the specified range.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 * within this pass, the bits of all fully contained dmap words will
2357 * be marked as free in a single shot and the leaves will be updated. a
2358 * single leaf may describe the free space of multiple dmap words,
2359 * so we may update only a subset of the actual leaves corresponding
2360 * to the dmap words of the block range.
2361 *
2362 * dbJoin() is used to update leaf values and will join the binary
2363 * buddy system of the leaves if the new leaf values indicate this
2364 * should be done.
2365 */
2366 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
2367 /* determine the bit number within the word and
2368 * the number of bits within the word.
2369 */
2370 wbitno = dbitno & (DBWORD - 1);
2371 nb = min(rembits, DBWORD - wbitno);
2372
2373 /* check if only part of a word is to be freed.
2374 */
2375 if (nb < DBWORD) {
2376 /* free (zero) the appropriate bits within this
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002377 * dmap word.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 */
2379 dp->wmap[word] &=
2380 cpu_to_le32(~(ONES << (DBWORD - nb)
2381 >> wbitno));
2382
2383 /* update the leaf for this dmap word.
2384 */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002385 rc = dbJoin(tp, word,
2386 dbMaxBud((u8 *) & dp->wmap[word]));
2387 if (rc)
2388 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389
2390 word += 1;
2391 } else {
2392 /* one or more dmap words are fully contained
2393 * within the block range. determine how many
2394 * words and free (zero) the bits of these words.
2395 */
2396 nwords = rembits >> L2DBWORD;
2397 memset(&dp->wmap[word], 0, nwords * 4);
2398
2399 /* determine how many bits.
2400 */
2401 nb = nwords << L2DBWORD;
2402
2403 /* now update the appropriate leaves to reflect
2404 * the freed words.
2405 */
2406 for (; nwords > 0; nwords -= nw) {
2407 /* determine what the leaf value should be
2408 * updated to as the minimum of the l2 number
2409 * of bits being freed and the l2 (max) number
2410 * of bits that can be described by this leaf.
2411 */
2412 size =
2413 min(LITOL2BSZ
2414 (word, L2LPERDMAP, BUDMIN),
2415 NLSTOL2BSZ(nwords));
2416
2417 /* update the leaf.
2418 */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002419 rc = dbJoin(tp, word, size);
2420 if (rc)
2421 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002422
2423 /* get the number of dmap words handled.
2424 */
2425 nw = BUDSIZE(size, BUDMIN);
2426 word += nw;
2427 }
2428 }
2429 }
2430
2431 /* update the free count for this dmap.
2432 */
Marcin Slusarz89145622008-02-13 15:34:20 -06002433 le32_add_cpu(&dp->nfree, nblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434
2435 BMAP_LOCK(bmp);
2436
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002437 /* update the free count for the allocation group and
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 * map.
2439 */
2440 agno = blkno >> bmp->db_agl2size;
2441 bmp->db_nfree += nblocks;
2442 bmp->db_agfree[agno] += nblocks;
2443
2444 /* check if this allocation group is not completely free and
2445 * if it is currently the maximum (rightmost) allocation group.
2446 * if so, establish the new maximum allocation group number by
2447 * searching left for the first allocation group with allocation.
2448 */
2449 if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) ||
2450 (agno == bmp->db_numag - 1 &&
2451 bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) {
2452 while (bmp->db_maxag > 0) {
2453 bmp->db_maxag -= 1;
2454 if (bmp->db_agfree[bmp->db_maxag] !=
2455 bmp->db_agsize)
2456 break;
2457 }
2458
2459 /* re-establish the allocation group preference if the
2460 * current preference is right of the maximum allocation
2461 * group.
2462 */
2463 if (bmp->db_agpref > bmp->db_maxag)
2464 bmp->db_agpref = bmp->db_maxag;
2465 }
2466
2467 BMAP_UNLOCK(bmp);
Dave Kleikamp56d12542005-07-15 09:43:36 -05002468
2469 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470}
2471
2472
2473/*
2474 * NAME: dbAdjCtl()
2475 *
2476 * FUNCTION: adjust a dmap control page at a specified level to reflect
2477 * the change in a lower level dmap or dmap control page's
2478 * maximum string of free blocks (i.e. a change in the root
2479 * of the lower level object's dmtree) due to the allocation
2480 * or deallocation of a range of blocks with a single dmap.
2481 *
2482 * on entry, this routine is provided with the new value of
2483 * the lower level dmap or dmap control page root and the
2484 * starting block number of the block range whose allocation
2485 * or deallocation resulted in the root change. this range
2486 * is respresented by a single leaf of the current dmapctl
2487 * and the leaf will be updated with this value, possibly
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002488 * causing a binary buddy system within the leaves to be
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 * split or joined. the update may also cause the dmapctl's
2490 * dmtree to be updated.
2491 *
2492 * if the adjustment of the dmap control page, itself, causes its
2493 * root to change, this change will be bubbled up to the next dmap
2494 * control level by a recursive call to this routine, specifying
2495 * the new root value and the next dmap control page level to
2496 * be adjusted.
2497 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002498 * bmp - pointer to bmap descriptor
2499 * blkno - the first block of a block range within a dmap. it is
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500 * the allocation or deallocation of this block range that
2501 * requires the dmap control page to be adjusted.
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002502 * newval - the new value of the lower level dmap or dmap control
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 * page root.
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002504 * alloc - 'true' if adjustment is due to an allocation.
2505 * level - current level of dmap control page (i.e. L0, L1, L2) to
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 * be adjusted.
2507 *
2508 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002509 * 0 - success
2510 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 *
2512 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2513 */
2514static int
2515dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
2516{
2517 struct metapage *mp;
2518 s8 oldroot;
2519 int oldval;
2520 s64 lblkno;
2521 struct dmapctl *dcp;
2522 int rc, leafno, ti;
2523
2524 /* get the buffer for the dmap control page for the specified
2525 * block number and control page level.
2526 */
2527 lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level);
2528 mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
2529 if (mp == NULL)
2530 return -EIO;
2531 dcp = (struct dmapctl *) mp->data;
2532
2533 if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
Joe Percheseb8630d2013-06-04 16:39:15 -07002534 jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535 release_metapage(mp);
2536 return -EIO;
2537 }
2538
2539 /* determine the leaf number corresponding to the block and
2540 * the index within the dmap control tree.
2541 */
2542 leafno = BLKTOCTLLEAF(blkno, dcp->budmin);
2543 ti = leafno + le32_to_cpu(dcp->leafidx);
2544
2545 /* save the current leaf value and the current root level (i.e.
2546 * maximum l2 free string described by this dmapctl).
2547 */
2548 oldval = dcp->stree[ti];
2549 oldroot = dcp->stree[ROOT];
2550
2551 /* check if this is a control page update for an allocation.
2552 * if so, update the leaf to reflect the new leaf value using
Thomas Weber88393162010-03-16 11:47:56 +01002553 * dbSplit(); otherwise (deallocation), use dbJoin() to update
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 * the leaf with the new value. in addition to updating the
2555 * leaf, dbSplit() will also split the binary buddy system of
2556 * the leaves, if required, and bubble new values within the
2557 * dmapctl tree, if required. similarly, dbJoin() will join
2558 * the binary buddy system of leaves and bubble new values up
2559 * the dmapctl tree as required by the new leaf value.
2560 */
2561 if (alloc) {
2562 /* check if we are in the middle of a binary buddy
2563 * system. this happens when we are performing the
2564 * first allocation out of an allocation group that
2565 * is part (not the first part) of a larger binary
2566 * buddy system. if we are in the middle, back split
2567 * the system prior to calling dbSplit() which assumes
2568 * that it is at the front of a binary buddy system.
2569 */
2570 if (oldval == NOFREE) {
Dave Kleikampb6a47fd2005-10-11 09:06:59 -05002571 rc = dbBackSplit((dmtree_t *) dcp, leafno);
2572 if (rc)
2573 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 oldval = dcp->stree[ti];
2575 }
2576 dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
2577 } else {
Dave Kleikamp56d12542005-07-15 09:43:36 -05002578 rc = dbJoin((dmtree_t *) dcp, leafno, newval);
2579 if (rc)
2580 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 }
2582
2583 /* check if the root of the current dmap control page changed due
2584 * to the update and if the current dmap control page is not at
2585 * the current top level (i.e. L0, L1, L2) of the map. if so (i.e.
2586 * root changed and this is not the top level), call this routine
2587 * again (recursion) for the next higher level of the mapping to
2588 * reflect the change in root for the current dmap control page.
2589 */
2590 if (dcp->stree[ROOT] != oldroot) {
2591 /* are we below the top level of the map. if so,
2592 * bubble the root up to the next higher level.
2593 */
2594 if (level < bmp->db_maxlevel) {
2595 /* bubble up the new root of this dmap control page to
2596 * the next level.
2597 */
2598 if ((rc =
2599 dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc,
2600 level + 1))) {
2601 /* something went wrong in bubbling up the new
2602 * root value, so backout the changes to the
2603 * current dmap control page.
2604 */
2605 if (alloc) {
2606 dbJoin((dmtree_t *) dcp, leafno,
2607 oldval);
2608 } else {
2609 /* the dbJoin() above might have
2610 * caused a larger binary buddy system
2611 * to form and we may now be in the
2612 * middle of it. if this is the case,
2613 * back split the buddies.
2614 */
2615 if (dcp->stree[ti] == NOFREE)
2616 dbBackSplit((dmtree_t *)
2617 dcp, leafno);
2618 dbSplit((dmtree_t *) dcp, leafno,
2619 dcp->budmin, oldval);
2620 }
2621
2622 /* release the buffer and return the error.
2623 */
2624 release_metapage(mp);
2625 return (rc);
2626 }
2627 } else {
2628 /* we're at the top level of the map. update
2629 * the bmap control page to reflect the size
2630 * of the maximum free buddy system.
2631 */
2632 assert(level == bmp->db_maxlevel);
2633 if (bmp->db_maxfreebud != oldroot) {
2634 jfs_error(bmp->db_ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07002635 "the maximum free buddy is not the old root\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636 }
2637 bmp->db_maxfreebud = dcp->stree[ROOT];
2638 }
2639 }
2640
2641 /* write the buffer.
2642 */
2643 write_metapage(mp);
2644
2645 return (0);
2646}
2647
2648
2649/*
2650 * NAME: dbSplit()
2651 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002652 * FUNCTION: update the leaf of a dmtree with a new value, splitting
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653 * the leaf from the binary buddy system of the dmtree's
2654 * leaves, as required.
2655 *
2656 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002657 * tp - pointer to the tree containing the leaf.
2658 * leafno - the number of the leaf to be updated.
2659 * splitsz - the size the binary buddy system starting at the leaf
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660 * must be split to, specified as the log2 number of blocks.
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002661 * newval - the new value for the leaf.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002662 *
2663 * RETURN VALUES: none
2664 *
2665 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2666 */
2667static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
2668{
2669 int budsz;
2670 int cursz;
2671 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2672
2673 /* check if the leaf needs to be split.
2674 */
2675 if (leaf[leafno] > tp->dmt_budmin) {
2676 /* the split occurs by cutting the buddy system in half
2677 * at the specified leaf until we reach the specified
2678 * size. pick up the starting split size (current size
2679 * - 1 in l2) and the corresponding buddy size.
2680 */
2681 cursz = leaf[leafno] - 1;
2682 budsz = BUDSIZE(cursz, tp->dmt_budmin);
2683
2684 /* split until we reach the specified size.
2685 */
2686 while (cursz >= splitsz) {
2687 /* update the buddy's leaf with its new value.
2688 */
2689 dbAdjTree(tp, leafno ^ budsz, cursz);
2690
2691 /* on to the next size and buddy.
2692 */
2693 cursz -= 1;
2694 budsz >>= 1;
2695 }
2696 }
2697
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002698 /* adjust the dmap tree to reflect the specified leaf's new
Linus Torvalds1da177e2005-04-16 15:20:36 -07002699 * value.
2700 */
2701 dbAdjTree(tp, leafno, newval);
2702}
2703
2704
2705/*
2706 * NAME: dbBackSplit()
2707 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002708 * FUNCTION: back split the binary buddy system of dmtree leaves
Linus Torvalds1da177e2005-04-16 15:20:36 -07002709 * that hold a specified leaf until the specified leaf
2710 * starts its own binary buddy system.
2711 *
2712 * the allocators typically perform allocations at the start
2713 * of binary buddy systems and dbSplit() is used to accomplish
2714 * any required splits. in some cases, however, allocation
2715 * may occur in the middle of a binary system and requires a
2716 * back split, with the split proceeding out from the middle of
2717 * the system (less efficient) rather than the start of the
2718 * system (more efficient). the cases in which a back split
2719 * is required are rare and are limited to the first allocation
2720 * within an allocation group which is a part (not first part)
2721 * of a larger binary buddy system and a few exception cases
2722 * in which a previous join operation must be backed out.
2723 *
2724 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002725 * tp - pointer to the tree containing the leaf.
2726 * leafno - the number of the leaf to be updated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727 *
2728 * RETURN VALUES: none
2729 *
2730 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
2731 */
Dave Kleikampb6a47fd2005-10-11 09:06:59 -05002732static int dbBackSplit(dmtree_t * tp, int leafno)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733{
2734 int budsz, bud, w, bsz, size;
2735 int cursz;
2736 s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2737
2738 /* leaf should be part (not first part) of a binary
2739 * buddy system.
2740 */
2741 assert(leaf[leafno] == NOFREE);
2742
2743 /* the back split is accomplished by iteratively finding the leaf
2744 * that starts the buddy system that contains the specified leaf and
2745 * splitting that system in two. this iteration continues until
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002746 * the specified leaf becomes the start of a buddy system.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747 *
2748 * determine maximum possible l2 size for the specified leaf.
2749 */
2750 size =
2751 LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs),
2752 tp->dmt_budmin);
2753
2754 /* determine the number of leaves covered by this size. this
2755 * is the buddy size that we will start with as we search for
2756 * the buddy system that contains the specified leaf.
2757 */
2758 budsz = BUDSIZE(size, tp->dmt_budmin);
2759
2760 /* back split.
2761 */
2762 while (leaf[leafno] == NOFREE) {
2763 /* find the leftmost buddy leaf.
2764 */
2765 for (w = leafno, bsz = budsz;; bsz <<= 1,
2766 w = (w < bud) ? w : bud) {
Dave Kleikampb6a47fd2005-10-11 09:06:59 -05002767 if (bsz >= le32_to_cpu(tp->dmt_nleafs)) {
2768 jfs_err("JFS: block map error in dbBackSplit");
2769 return -EIO;
2770 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771
2772 /* determine the buddy.
2773 */
2774 bud = w ^ bsz;
2775
2776 /* check if this buddy is the start of the system.
2777 */
2778 if (leaf[bud] != NOFREE) {
2779 /* split the leaf at the start of the
2780 * system in two.
2781 */
2782 cursz = leaf[bud] - 1;
2783 dbSplit(tp, bud, cursz, cursz);
2784 break;
2785 }
2786 }
2787 }
2788
Dave Kleikampb6a47fd2005-10-11 09:06:59 -05002789 if (leaf[leafno] != size) {
2790 jfs_err("JFS: wrong leaf value in dbBackSplit");
2791 return -EIO;
2792 }
2793 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002794}
2795
2796
2797/*
2798 * NAME: dbJoin()
2799 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002800 * FUNCTION: update the leaf of a dmtree with a new value, joining
Linus Torvalds1da177e2005-04-16 15:20:36 -07002801 * the leaf with other leaves of the dmtree into a multi-leaf
2802 * binary buddy system, as required.
2803 *
2804 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002805 * tp - pointer to the tree containing the leaf.
2806 * leafno - the number of the leaf to be updated.
2807 * newval - the new value for the leaf.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002808 *
2809 * RETURN VALUES: none
2810 */
Dave Kleikamp56d12542005-07-15 09:43:36 -05002811static int dbJoin(dmtree_t * tp, int leafno, int newval)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002812{
2813 int budsz, buddy;
2814 s8 *leaf;
2815
2816 /* can the new leaf value require a join with other leaves ?
2817 */
2818 if (newval >= tp->dmt_budmin) {
2819 /* pickup a pointer to the leaves of the tree.
2820 */
2821 leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
2822
2823 /* try to join the specified leaf into a large binary
2824 * buddy system. the join proceeds by attempting to join
2825 * the specified leafno with its buddy (leaf) at new value.
2826 * if the join occurs, we attempt to join the left leaf
2827 * of the joined buddies with its buddy at new value + 1.
2828 * we continue to join until we find a buddy that cannot be
2829 * joined (does not have a value equal to the size of the
2830 * last join) or until all leaves have been joined into a
2831 * single system.
2832 *
2833 * get the buddy size (number of words covered) of
2834 * the new value.
2835 */
2836 budsz = BUDSIZE(newval, tp->dmt_budmin);
2837
2838 /* try to join.
2839 */
2840 while (budsz < le32_to_cpu(tp->dmt_nleafs)) {
2841 /* get the buddy leaf.
2842 */
2843 buddy = leafno ^ budsz;
2844
2845 /* if the leaf's new value is greater than its
2846 * buddy's value, we join no more.
2847 */
2848 if (newval > leaf[buddy])
2849 break;
2850
Dave Kleikamp56d12542005-07-15 09:43:36 -05002851 /* It shouldn't be less */
2852 if (newval < leaf[buddy])
2853 return -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002854
2855 /* check which (leafno or buddy) is the left buddy.
2856 * the left buddy gets to claim the blocks resulting
2857 * from the join while the right gets to claim none.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002858 * the left buddy is also eligible to participate in
Linus Torvalds1da177e2005-04-16 15:20:36 -07002859 * a join at the next higher level while the right
2860 * is not.
2861 *
2862 */
2863 if (leafno < buddy) {
2864 /* leafno is the left buddy.
2865 */
2866 dbAdjTree(tp, buddy, NOFREE);
2867 } else {
2868 /* buddy is the left buddy and becomes
2869 * leafno.
2870 */
2871 dbAdjTree(tp, leafno, NOFREE);
2872 leafno = buddy;
2873 }
2874
2875 /* on to try the next join.
2876 */
2877 newval += 1;
2878 budsz <<= 1;
2879 }
2880 }
2881
2882 /* update the leaf value.
2883 */
2884 dbAdjTree(tp, leafno, newval);
Dave Kleikamp56d12542005-07-15 09:43:36 -05002885
2886 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002887}
2888
2889
2890/*
2891 * NAME: dbAdjTree()
2892 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002893 * FUNCTION: update a leaf of a dmtree with a new value, adjusting
Linus Torvalds1da177e2005-04-16 15:20:36 -07002894 * the dmtree, as required, to reflect the new leaf value.
2895 * the combination of any buddies must already be done before
2896 * this is called.
2897 *
2898 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002899 * tp - pointer to the tree to be adjusted.
2900 * leafno - the number of the leaf to be updated.
2901 * newval - the new value for the leaf.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002902 *
2903 * RETURN VALUES: none
2904 */
2905static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
2906{
2907 int lp, pp, k;
2908 int max;
2909
2910 /* pick up the index of the leaf for this leafno.
2911 */
2912 lp = leafno + le32_to_cpu(tp->dmt_leafidx);
2913
2914 /* is the current value the same as the old value ? if so,
2915 * there is nothing to do.
2916 */
2917 if (tp->dmt_stree[lp] == newval)
2918 return;
2919
2920 /* set the new value.
2921 */
2922 tp->dmt_stree[lp] = newval;
2923
2924 /* bubble the new value up the tree as required.
2925 */
2926 for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) {
2927 /* get the index of the first leaf of the 4 leaf
2928 * group containing the specified leaf (leafno).
2929 */
2930 lp = ((lp - 1) & ~0x03) + 1;
2931
2932 /* get the index of the parent of this 4 leaf group.
2933 */
2934 pp = (lp - 1) >> 2;
2935
2936 /* determine the maximum of the 4 leaves.
2937 */
2938 max = TREEMAX(&tp->dmt_stree[lp]);
2939
2940 /* if the maximum of the 4 is the same as the
2941 * parent's value, we're done.
2942 */
2943 if (tp->dmt_stree[pp] == max)
2944 break;
2945
2946 /* parent gets new value.
2947 */
2948 tp->dmt_stree[pp] = max;
2949
2950 /* parent becomes leaf for next go-round.
2951 */
2952 lp = pp;
2953 }
2954}
2955
2956
2957/*
2958 * NAME: dbFindLeaf()
2959 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002960 * FUNCTION: search a dmtree_t for sufficient free blocks, returning
Dave Kleikamp63f83c92006-10-02 09:55:27 -05002961 * the index of a leaf describing the free blocks if
Linus Torvalds1da177e2005-04-16 15:20:36 -07002962 * sufficient free blocks are found.
2963 *
2964 * the search starts at the top of the dmtree_t tree and
2965 * proceeds down the tree to the leftmost leaf with sufficient
2966 * free space.
2967 *
2968 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002969 * tp - pointer to the tree to be searched.
2970 * l2nb - log2 number of free blocks to search for.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002971 * leafidx - return pointer to be set to the index of the leaf
2972 * describing at least l2nb free blocks if sufficient
2973 * free blocks are found.
2974 *
2975 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05002976 * 0 - success
2977 * -ENOSPC - insufficient free blocks.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002978 */
2979static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
2980{
2981 int ti, n = 0, k, x = 0;
2982
2983 /* first check the root of the tree to see if there is
2984 * sufficient free space.
2985 */
2986 if (l2nb > tp->dmt_stree[ROOT])
2987 return -ENOSPC;
2988
2989 /* sufficient free space available. now search down the tree
2990 * starting at the next level for the leftmost leaf that
2991 * describes sufficient free space.
2992 */
2993 for (k = le32_to_cpu(tp->dmt_height), ti = 1;
2994 k > 0; k--, ti = ((ti + n) << 2) + 1) {
2995 /* search the four nodes at this level, starting from
2996 * the left.
2997 */
2998 for (x = ti, n = 0; n < 4; n++) {
2999 /* sufficient free space found. move to the next
3000 * level (or quit if this is the last level).
3001 */
3002 if (l2nb <= tp->dmt_stree[x + n])
3003 break;
3004 }
3005
3006 /* better have found something since the higher
3007 * levels of the tree said it was here.
3008 */
3009 assert(n < 4);
3010 }
3011
3012 /* set the return to the leftmost leaf describing sufficient
3013 * free space.
3014 */
3015 *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx);
3016
3017 return (0);
3018}
3019
3020
3021/*
3022 * NAME: dbFindBits()
3023 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003024 * FUNCTION: find a specified number of binary buddy free bits within a
Linus Torvalds1da177e2005-04-16 15:20:36 -07003025 * dmap bitmap word value.
3026 *
3027 * this routine searches the bitmap value for (1 << l2nb) free
3028 * bits at (1 << l2nb) alignments within the value.
3029 *
3030 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003031 * word - dmap bitmap word value.
3032 * l2nb - number of free bits specified as a log2 number.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003033 *
3034 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003035 * starting bit number of free bits.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003036 */
3037static int dbFindBits(u32 word, int l2nb)
3038{
3039 int bitno, nb;
3040 u32 mask;
3041
3042 /* get the number of bits.
3043 */
3044 nb = 1 << l2nb;
3045 assert(nb <= DBWORD);
3046
3047 /* complement the word so we can use a mask (i.e. 0s represent
3048 * free bits) and compute the mask.
3049 */
3050 word = ~word;
3051 mask = ONES << (DBWORD - nb);
3052
3053 /* scan the word for nb free bits at nb alignments.
3054 */
3055 for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) {
3056 if ((mask & word) == mask)
3057 break;
3058 }
3059
3060 ASSERT(bitno < 32);
3061
3062 /* return the bit number.
3063 */
3064 return (bitno);
3065}
3066
3067
3068/*
3069 * NAME: dbMaxBud(u8 *cp)
3070 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003071 * FUNCTION: determine the largest binary buddy string of free
Linus Torvalds1da177e2005-04-16 15:20:36 -07003072 * bits within 32-bits of the map.
3073 *
3074 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003075 * cp - pointer to the 32-bit value.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003076 *
3077 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003078 * largest binary buddy of free bits within a dmap word.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003079 */
3080static int dbMaxBud(u8 * cp)
3081{
3082 signed char tmp1, tmp2;
3083
3084 /* check if the wmap word is all free. if so, the
3085 * free buddy size is BUDMIN.
3086 */
3087 if (*((uint *) cp) == 0)
3088 return (BUDMIN);
3089
3090 /* check if the wmap word is half free. if so, the
3091 * free buddy size is BUDMIN-1.
3092 */
3093 if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0)
3094 return (BUDMIN - 1);
3095
3096 /* not all free or half free. determine the free buddy
3097 * size thru table lookup using quarters of the wmap word.
3098 */
3099 tmp1 = max(budtab[cp[2]], budtab[cp[3]]);
3100 tmp2 = max(budtab[cp[0]], budtab[cp[1]]);
3101 return (max(tmp1, tmp2));
3102}
3103
3104
3105/*
3106 * NAME: cnttz(uint word)
3107 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003108 * FUNCTION: determine the number of trailing zeros within a 32-bit
Linus Torvalds1da177e2005-04-16 15:20:36 -07003109 * value.
3110 *
3111 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003112 * value - 32-bit value to be examined.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003113 *
3114 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003115 * count of trailing zeros
Linus Torvalds1da177e2005-04-16 15:20:36 -07003116 */
3117static int cnttz(u32 word)
3118{
3119 int n;
3120
3121 for (n = 0; n < 32; n++, word >>= 1) {
3122 if (word & 0x01)
3123 break;
3124 }
3125
3126 return (n);
3127}
3128
3129
3130/*
3131 * NAME: cntlz(u32 value)
3132 *
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003133 * FUNCTION: determine the number of leading zeros within a 32-bit
Linus Torvalds1da177e2005-04-16 15:20:36 -07003134 * value.
3135 *
3136 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003137 * value - 32-bit value to be examined.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003138 *
3139 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003140 * count of leading zeros
Linus Torvalds1da177e2005-04-16 15:20:36 -07003141 */
3142static int cntlz(u32 value)
3143{
3144 int n;
3145
3146 for (n = 0; n < 32; n++, value <<= 1) {
3147 if (value & HIGHORDER)
3148 break;
3149 }
3150 return (n);
3151}
3152
3153
3154/*
3155 * NAME: blkstol2(s64 nb)
3156 *
3157 * FUNCTION: convert a block count to its log2 value. if the block
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003158 * count is not a l2 multiple, it is rounded up to the next
Linus Torvalds1da177e2005-04-16 15:20:36 -07003159 * larger l2 multiple.
3160 *
3161 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003162 * nb - number of blocks
Linus Torvalds1da177e2005-04-16 15:20:36 -07003163 *
3164 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003165 * log2 number of blocks
Linus Torvalds1da177e2005-04-16 15:20:36 -07003166 */
Dave Kleikamp6cb12692005-09-15 23:25:41 -05003167static int blkstol2(s64 nb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003168{
3169 int l2nb;
3170 s64 mask; /* meant to be signed */
3171
3172 mask = (s64) 1 << (64 - 1);
3173
3174 /* count the leading bits.
3175 */
3176 for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) {
3177 /* leading bit found.
3178 */
3179 if (nb & mask) {
3180 /* determine the l2 value.
3181 */
3182 l2nb = (64 - 1) - l2nb;
3183
3184 /* check if we need to round up.
3185 */
3186 if (~mask & nb)
3187 l2nb++;
3188
3189 return (l2nb);
3190 }
3191 }
3192 assert(0);
3193 return 0; /* fix compiler warning */
3194}
3195
3196
3197/*
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003198 * NAME: dbAllocBottomUp()
Linus Torvalds1da177e2005-04-16 15:20:36 -07003199 *
3200 * FUNCTION: alloc the specified block range from the working block
3201 * allocation map.
3202 *
3203 * the blocks will be alloc from the working map one dmap
3204 * at a time.
3205 *
3206 * PARAMETERS:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003207 * ip - pointer to in-core inode;
3208 * blkno - starting block number to be freed.
3209 * nblocks - number of blocks to be freed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003210 *
3211 * RETURN VALUES:
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003212 * 0 - success
3213 * -EIO - i/o error
Linus Torvalds1da177e2005-04-16 15:20:36 -07003214 */
3215int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
3216{
3217 struct metapage *mp;
3218 struct dmap *dp;
3219 int nb, rc;
3220 s64 lblkno, rem;
3221 struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
3222 struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
3223
Dave Kleikamp82d5b9a2007-01-09 14:14:48 -06003224 IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003225
3226 /* block to be allocated better be within the mapsize. */
3227 ASSERT(nblocks <= bmp->db_mapsize - blkno);
3228
3229 /*
3230 * allocate the blocks a dmap at a time.
3231 */
3232 mp = NULL;
3233 for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
3234 /* release previous dmap if any */
3235 if (mp) {
3236 write_metapage(mp);
3237 }
3238
3239 /* get the buffer for the current dmap. */
3240 lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
3241 mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
3242 if (mp == NULL) {
3243 IREAD_UNLOCK(ipbmap);
3244 return -EIO;
3245 }
3246 dp = (struct dmap *) mp->data;
3247
3248 /* determine the number of blocks to be allocated from
3249 * this dmap.
3250 */
3251 nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
3252
Linus Torvalds1da177e2005-04-16 15:20:36 -07003253 /* allocate the blocks. */
3254 if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
3255 release_metapage(mp);
3256 IREAD_UNLOCK(ipbmap);
3257 return (rc);
3258 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003259 }
3260
3261 /* write the last buffer. */
3262 write_metapage(mp);
3263
3264 IREAD_UNLOCK(ipbmap);
3265
3266 return (0);
3267}
3268
3269
3270static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
3271 int nblocks)
3272{
3273 int rc;
3274 int dbitno, word, rembits, nb, nwords, wbitno, agno;
Dave Kleikamp3c2c2262011-06-20 13:00:27 -05003275 s8 oldroot;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003276 struct dmaptree *tp = (struct dmaptree *) & dp->tree;
3277
3278 /* save the current value of the root (i.e. maximum free string)
3279 * of the dmap tree.
3280 */
3281 oldroot = tp->stree[ROOT];
3282
Linus Torvalds1da177e2005-04-16 15:20:36 -07003283 /* determine the bit number and word within the dmap of the
3284 * starting block.
3285 */
3286 dbitno = blkno & (BPERDMAP - 1);
3287 word = dbitno >> L2DBWORD;
3288
3289 /* block range better be within the dmap */
3290 assert(dbitno + nblocks <= BPERDMAP);
3291
3292 /* allocate the bits of the dmap's words corresponding to the block
3293 * range. not all bits of the first and last words may be contained
3294 * within the block range. if this is the case, we'll work against
3295 * those words (i.e. partial first and/or last) on an individual basis
3296 * (a single pass), allocating the bits of interest by hand and
3297 * updating the leaf corresponding to the dmap word. a single pass
3298 * will be used for all dmap words fully contained within the
3299 * specified range. within this pass, the bits of all fully contained
3300 * dmap words will be marked as free in a single shot and the leaves
3301 * will be updated. a single leaf may describe the free space of
3302 * multiple dmap words, so we may update only a subset of the actual
3303 * leaves corresponding to the dmap words of the block range.
3304 */
3305 for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
3306 /* determine the bit number within the word and
3307 * the number of bits within the word.
3308 */
3309 wbitno = dbitno & (DBWORD - 1);
3310 nb = min(rembits, DBWORD - wbitno);
3311
3312 /* check if only part of a word is to be allocated.
3313 */
3314 if (nb < DBWORD) {
3315 /* allocate (set to 1) the appropriate bits within
3316 * this dmap word.
3317 */
3318 dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
3319 >> wbitno);
3320
3321 word++;
3322 } else {
3323 /* one or more dmap words are fully contained
3324 * within the block range. determine how many
3325 * words and allocate (set to 1) the bits of these
3326 * words.
3327 */
3328 nwords = rembits >> L2DBWORD;
3329 memset(&dp->wmap[word], (int) ONES, nwords * 4);
3330
3331 /* determine how many bits */
3332 nb = nwords << L2DBWORD;
3333 word += nwords;
3334 }
3335 }
3336
3337 /* update the free count for this dmap */
Marcin Slusarz89145622008-02-13 15:34:20 -06003338 le32_add_cpu(&dp->nfree, -nblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003339
3340 /* reconstruct summary tree */
3341 dbInitDmapTree(dp);
3342
3343 BMAP_LOCK(bmp);
3344
3345 /* if this allocation group is completely free,
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003346 * update the highest active allocation group number
Linus Torvalds1da177e2005-04-16 15:20:36 -07003347 * if this allocation group is the new max.
3348 */
3349 agno = blkno >> bmp->db_agl2size;
3350 if (agno > bmp->db_maxag)
3351 bmp->db_maxag = agno;
3352
3353 /* update the free count for the allocation group and map */
3354 bmp->db_agfree[agno] -= nblocks;
3355 bmp->db_nfree -= nblocks;
3356
3357 BMAP_UNLOCK(bmp);
3358
3359 /* if the root has not changed, done. */
3360 if (tp->stree[ROOT] == oldroot)
3361 return (0);
3362
3363 /* root changed. bubble the change up to the dmap control pages.
3364 * if the adjustment of the upper level control pages fails,
3365 * backout the bit allocation (thus making everything consistent).
3366 */
3367 if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0)))
3368 dbFreeBits(bmp, dp, blkno, nblocks);
3369
3370 return (rc);
3371}
3372
3373
3374/*
3375 * NAME: dbExtendFS()
3376 *
3377 * FUNCTION: extend bmap from blkno for nblocks;
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003378 * dbExtendFS() updates bmap ready for dbAllocBottomUp();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003379 *
3380 * L2
3381 * |
3382 * L1---------------------------------L1
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003383 * | |
3384 * L0---------L0---------L0 L0---------L0---------L0
3385 * | | | | | |
3386 * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003387 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
3388 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003389 * <---old---><----------------------------extend----------------------->
Linus Torvalds1da177e2005-04-16 15:20:36 -07003390 */
3391int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks)
3392{
3393 struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb);
3394 int nbperpage = sbi->nbperpage;
Richard Knutsson4d817152006-09-30 23:27:14 -07003395 int i, i0 = true, j, j0 = true, k, n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003396 s64 newsize;
3397 s64 p;
3398 struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL;
3399 struct dmapctl *l2dcp, *l1dcp, *l0dcp;
3400 struct dmap *dp;
3401 s8 *l0leaf, *l1leaf, *l2leaf;
3402 struct bmap *bmp = sbi->bmap;
3403 int agno, l2agsize, oldl2agsize;
3404 s64 ag_rem;
3405
3406 newsize = blkno + nblocks;
3407
3408 jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld",
3409 (long long) blkno, (long long) nblocks, (long long) newsize);
3410
3411 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003412 * initialize bmap control page.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003413 *
3414 * all the data in bmap control page should exclude
3415 * the mkfs hidden dmap page.
3416 */
3417
3418 /* update mapsize */
3419 bmp->db_mapsize = newsize;
3420 bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize);
3421
3422 /* compute new AG size */
3423 l2agsize = dbGetL2AGSize(newsize);
3424 oldl2agsize = bmp->db_agl2size;
3425
3426 bmp->db_agl2size = l2agsize;
3427 bmp->db_agsize = 1 << l2agsize;
3428
3429 /* compute new number of AG */
3430 agno = bmp->db_numag;
3431 bmp->db_numag = newsize >> l2agsize;
3432 bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
3433
3434 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003435 * reconfigure db_agfree[]
Linus Torvalds1da177e2005-04-16 15:20:36 -07003436 * from old AG configuration to new AG configuration;
3437 *
3438 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
3439 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
3440 * note: new AG size = old AG size * (2**x).
3441 */
3442 if (l2agsize == oldl2agsize)
3443 goto extend;
3444 k = 1 << (l2agsize - oldl2agsize);
3445 ag_rem = bmp->db_agfree[0]; /* save agfree[0] */
3446 for (i = 0, n = 0; i < agno; n++) {
3447 bmp->db_agfree[n] = 0; /* init collection point */
3448
André Goddard Rosaaf901ca2009-11-14 13:09:05 -02003449 /* coalesce contiguous k AGs; */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003450 for (j = 0; j < k && i < agno; j++, i++) {
3451 /* merge AGi to AGn */
3452 bmp->db_agfree[n] += bmp->db_agfree[i];
3453 }
3454 }
3455 bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */
3456
3457 for (; n < MAXAG; n++)
3458 bmp->db_agfree[n] = 0;
3459
3460 /*
3461 * update highest active ag number
3462 */
3463
3464 bmp->db_maxag = bmp->db_maxag / k;
3465
3466 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003467 * extend bmap
Linus Torvalds1da177e2005-04-16 15:20:36 -07003468 *
3469 * update bit maps and corresponding level control pages;
3470 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
3471 */
3472 extend:
3473 /* get L2 page */
3474 p = BMAPBLKNO + nbperpage; /* L2 page */
3475 l2mp = read_metapage(ipbmap, p, PSIZE, 0);
3476 if (!l2mp) {
Joe Percheseb8630d2013-06-04 16:39:15 -07003477 jfs_error(ipbmap->i_sb, "L2 page could not be read\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003478 return -EIO;
3479 }
3480 l2dcp = (struct dmapctl *) l2mp->data;
3481
3482 /* compute start L1 */
3483 k = blkno >> L2MAXL1SIZE;
3484 l2leaf = l2dcp->stree + CTLLEAFIND + k;
3485 p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */
3486
3487 /*
3488 * extend each L1 in L2
3489 */
3490 for (; k < LPERCTL; k++, p += nbperpage) {
3491 /* get L1 page */
3492 if (j0) {
3493 /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */
3494 l1mp = read_metapage(ipbmap, p, PSIZE, 0);
3495 if (l1mp == NULL)
3496 goto errout;
3497 l1dcp = (struct dmapctl *) l1mp->data;
3498
3499 /* compute start L0 */
3500 j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE;
3501 l1leaf = l1dcp->stree + CTLLEAFIND + j;
3502 p = BLKTOL0(blkno, sbi->l2nbperpage);
Richard Knutsson4d817152006-09-30 23:27:14 -07003503 j0 = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003504 } else {
3505 /* assign/init L1 page */
3506 l1mp = get_metapage(ipbmap, p, PSIZE, 0);
3507 if (l1mp == NULL)
3508 goto errout;
3509
3510 l1dcp = (struct dmapctl *) l1mp->data;
3511
3512 /* compute start L0 */
3513 j = 0;
3514 l1leaf = l1dcp->stree + CTLLEAFIND;
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003515 p += nbperpage; /* 1st L0 of L1.k */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003516 }
3517
3518 /*
3519 * extend each L0 in L1
3520 */
3521 for (; j < LPERCTL; j++) {
3522 /* get L0 page */
3523 if (i0) {
3524 /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */
3525
3526 l0mp = read_metapage(ipbmap, p, PSIZE, 0);
3527 if (l0mp == NULL)
3528 goto errout;
3529 l0dcp = (struct dmapctl *) l0mp->data;
3530
3531 /* compute start dmap */
3532 i = (blkno & (MAXL0SIZE - 1)) >>
3533 L2BPERDMAP;
3534 l0leaf = l0dcp->stree + CTLLEAFIND + i;
3535 p = BLKTODMAP(blkno,
3536 sbi->l2nbperpage);
Richard Knutsson4d817152006-09-30 23:27:14 -07003537 i0 = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003538 } else {
3539 /* assign/init L0 page */
3540 l0mp = get_metapage(ipbmap, p, PSIZE, 0);
3541 if (l0mp == NULL)
3542 goto errout;
3543
3544 l0dcp = (struct dmapctl *) l0mp->data;
3545
3546 /* compute start dmap */
3547 i = 0;
3548 l0leaf = l0dcp->stree + CTLLEAFIND;
3549 p += nbperpage; /* 1st dmap of L0.j */
3550 }
3551
3552 /*
3553 * extend each dmap in L0
3554 */
3555 for (; i < LPERCTL; i++) {
3556 /*
3557 * reconstruct the dmap page, and
3558 * initialize corresponding parent L0 leaf
3559 */
3560 if ((n = blkno & (BPERDMAP - 1))) {
3561 /* read in dmap page: */
3562 mp = read_metapage(ipbmap, p,
3563 PSIZE, 0);
3564 if (mp == NULL)
3565 goto errout;
3566 n = min(nblocks, (s64)BPERDMAP - n);
3567 } else {
3568 /* assign/init dmap page */
3569 mp = read_metapage(ipbmap, p,
3570 PSIZE, 0);
3571 if (mp == NULL)
3572 goto errout;
3573
Fabian Frederick4f65b6d2014-05-19 21:36:58 +02003574 n = min_t(s64, nblocks, BPERDMAP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003575 }
3576
3577 dp = (struct dmap *) mp->data;
3578 *l0leaf = dbInitDmap(dp, blkno, n);
3579
3580 bmp->db_nfree += n;
3581 agno = le64_to_cpu(dp->start) >> l2agsize;
3582 bmp->db_agfree[agno] += n;
3583
3584 write_metapage(mp);
3585
3586 l0leaf++;
3587 p += nbperpage;
3588
3589 blkno += n;
3590 nblocks -= n;
3591 if (nblocks == 0)
3592 break;
3593 } /* for each dmap in a L0 */
3594
3595 /*
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003596 * build current L0 page from its leaves, and
Linus Torvalds1da177e2005-04-16 15:20:36 -07003597 * initialize corresponding parent L1 leaf
3598 */
3599 *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i);
3600 write_metapage(l0mp);
3601 l0mp = NULL;
3602
3603 if (nblocks)
3604 l1leaf++; /* continue for next L0 */
3605 else {
3606 /* more than 1 L0 ? */
3607 if (j > 0)
3608 break; /* build L1 page */
3609 else {
3610 /* summarize in global bmap page */
3611 bmp->db_maxfreebud = *l1leaf;
3612 release_metapage(l1mp);
3613 release_metapage(l2mp);
3614 goto finalize;
3615 }
3616 }
3617 } /* for each L0 in a L1 */
3618
3619 /*
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003620 * build current L1 page from its leaves, and
Linus Torvalds1da177e2005-04-16 15:20:36 -07003621 * initialize corresponding parent L2 leaf
3622 */
3623 *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j);
3624 write_metapage(l1mp);
3625 l1mp = NULL;
3626
3627 if (nblocks)
3628 l2leaf++; /* continue for next L1 */
3629 else {
3630 /* more than 1 L1 ? */
3631 if (k > 0)
3632 break; /* build L2 page */
3633 else {
3634 /* summarize in global bmap page */
3635 bmp->db_maxfreebud = *l2leaf;
3636 release_metapage(l2mp);
3637 goto finalize;
3638 }
3639 }
3640 } /* for each L1 in a L2 */
3641
Joe Percheseb8630d2013-06-04 16:39:15 -07003642 jfs_error(ipbmap->i_sb, "function has not returned as expected\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003643errout:
3644 if (l0mp)
3645 release_metapage(l0mp);
3646 if (l1mp)
3647 release_metapage(l1mp);
3648 release_metapage(l2mp);
3649 return -EIO;
3650
3651 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003652 * finalize bmap control page
Linus Torvalds1da177e2005-04-16 15:20:36 -07003653 */
3654finalize:
3655
3656 return 0;
3657}
3658
3659
3660/*
3661 * dbFinalizeBmap()
3662 */
3663void dbFinalizeBmap(struct inode *ipbmap)
3664{
3665 struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
3666 int actags, inactags, l2nl;
3667 s64 ag_rem, actfree, inactfree, avgfree;
3668 int i, n;
3669
3670 /*
Dave Kleikampf720e3b2007-06-06 15:28:35 -05003671 * finalize bmap control page
Linus Torvalds1da177e2005-04-16 15:20:36 -07003672 */
3673//finalize:
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003674 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07003675 * compute db_agpref: preferred ag to allocate from
3676 * (the leftmost ag with average free space in it);
3677 */
3678//agpref:
3679 /* get the number of active ags and inacitve ags */
3680 actags = bmp->db_maxag + 1;
3681 inactags = bmp->db_numag - actags;
3682 ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */
3683
3684 /* determine how many blocks are in the inactive allocation
3685 * groups. in doing this, we must account for the fact that
3686 * the rightmost group might be a partial group (i.e. file
3687 * system size is not a multiple of the group size).
3688 */
3689 inactfree = (inactags && ag_rem) ?
3690 ((inactags - 1) << bmp->db_agl2size) + ag_rem
3691 : inactags << bmp->db_agl2size;
3692
3693 /* determine how many free blocks are in the active
3694 * allocation groups plus the average number of free blocks
3695 * within the active ags.
3696 */
3697 actfree = bmp->db_nfree - inactfree;
3698 avgfree = (u32) actfree / (u32) actags;
3699
3700 /* if the preferred allocation group has not average free space.
3701 * re-establish the preferred group as the leftmost
3702 * group with average free space.
3703 */
3704 if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
3705 for (bmp->db_agpref = 0; bmp->db_agpref < actags;
3706 bmp->db_agpref++) {
3707 if (bmp->db_agfree[bmp->db_agpref] >= avgfree)
3708 break;
3709 }
3710 if (bmp->db_agpref >= bmp->db_numag) {
3711 jfs_error(ipbmap->i_sb,
Joe Percheseb8630d2013-06-04 16:39:15 -07003712 "cannot find ag with average freespace\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003713 }
3714 }
3715
3716 /*
Daniel Mackd7eecb42010-01-28 16:13:01 +08003717 * compute db_aglevel, db_agheight, db_width, db_agstart:
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003718 * an ag is covered in aglevel dmapctl summary tree,
3719 * at agheight level height (from leaf) with agwidth number of nodes
3720 * each, which starts at agstart index node of the smmary tree node
Linus Torvalds1da177e2005-04-16 15:20:36 -07003721 * array;
3722 */
3723 bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
3724 l2nl =
3725 bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
Daniel Mackd7eecb42010-01-28 16:13:01 +08003726 bmp->db_agheight = l2nl >> 1;
3727 bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
3728 for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003729 i--) {
3730 bmp->db_agstart += n;
3731 n <<= 2;
3732 }
3733
3734}
3735
3736
3737/*
3738 * NAME: dbInitDmap()/ujfs_idmap_page()
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003739 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07003740 * FUNCTION: initialize working/persistent bitmap of the dmap page
3741 * for the specified number of blocks:
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003742 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07003743 * at entry, the bitmaps had been initialized as free (ZEROS);
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003744 * The number of blocks will only account for the actually
3745 * existing blocks. Blocks which don't actually exist in
Linus Torvalds1da177e2005-04-16 15:20:36 -07003746 * the aggregate will be marked as allocated (ONES);
3747 *
3748 * PARAMETERS:
3749 * dp - pointer to page of map
3750 * nblocks - number of blocks this page
3751 *
3752 * RETURNS: NONE
3753 */
3754static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
3755{
3756 int blkno, w, b, r, nw, nb, i;
3757
3758 /* starting block number within the dmap */
3759 blkno = Blkno & (BPERDMAP - 1);
3760
3761 if (blkno == 0) {
3762 dp->nblocks = dp->nfree = cpu_to_le32(nblocks);
3763 dp->start = cpu_to_le64(Blkno);
3764
3765 if (nblocks == BPERDMAP) {
3766 memset(&dp->wmap[0], 0, LPERDMAP * 4);
3767 memset(&dp->pmap[0], 0, LPERDMAP * 4);
3768 goto initTree;
3769 }
3770 } else {
Marcin Slusarz89145622008-02-13 15:34:20 -06003771 le32_add_cpu(&dp->nblocks, nblocks);
3772 le32_add_cpu(&dp->nfree, nblocks);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003773 }
3774
3775 /* word number containing start block number */
3776 w = blkno >> L2DBWORD;
3777
3778 /*
3779 * free the bits corresponding to the block range (ZEROS):
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003780 * note: not all bits of the first and last words may be contained
Linus Torvalds1da177e2005-04-16 15:20:36 -07003781 * within the block range.
3782 */
3783 for (r = nblocks; r > 0; r -= nb, blkno += nb) {
3784 /* number of bits preceding range to be freed in the word */
3785 b = blkno & (DBWORD - 1);
3786 /* number of bits to free in the word */
3787 nb = min(r, DBWORD - b);
3788
3789 /* is partial word to be freed ? */
3790 if (nb < DBWORD) {
3791 /* free (set to 0) from the bitmap word */
3792 dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
3793 >> b));
3794 dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
3795 >> b));
3796
3797 /* skip the word freed */
3798 w++;
3799 } else {
3800 /* free (set to 0) contiguous bitmap words */
3801 nw = r >> L2DBWORD;
3802 memset(&dp->wmap[w], 0, nw * 4);
3803 memset(&dp->pmap[w], 0, nw * 4);
3804
3805 /* skip the words freed */
3806 nb = nw << L2DBWORD;
3807 w += nw;
3808 }
3809 }
3810
3811 /*
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003812 * mark bits following the range to be freed (non-existing
Linus Torvalds1da177e2005-04-16 15:20:36 -07003813 * blocks) as allocated (ONES)
3814 */
3815
3816 if (blkno == BPERDMAP)
3817 goto initTree;
3818
3819 /* the first word beyond the end of existing blocks */
3820 w = blkno >> L2DBWORD;
3821
3822 /* does nblocks fall on a 32-bit boundary ? */
3823 b = blkno & (DBWORD - 1);
3824 if (b) {
3825 /* mark a partial word allocated */
3826 dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b);
3827 w++;
3828 }
3829
3830 /* set the rest of the words in the page to allocated (ONES) */
3831 for (i = w; i < LPERDMAP; i++)
3832 dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES);
3833
3834 /*
3835 * init tree
3836 */
3837 initTree:
3838 return (dbInitDmapTree(dp));
3839}
3840
3841
3842/*
3843 * NAME: dbInitDmapTree()/ujfs_complete_dmap()
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003844 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07003845 * FUNCTION: initialize summary tree of the specified dmap:
3846 *
3847 * at entry, bitmap of the dmap has been initialized;
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003848 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07003849 * PARAMETERS:
3850 * dp - dmap to complete
3851 * blkno - starting block number for this dmap
3852 * treemax - will be filled in with max free for this dmap
3853 *
3854 * RETURNS: max free string at the root of the tree
3855 */
3856static int dbInitDmapTree(struct dmap * dp)
3857{
3858 struct dmaptree *tp;
3859 s8 *cp;
3860 int i;
3861
3862 /* init fixed info of tree */
3863 tp = &dp->tree;
3864 tp->nleafs = cpu_to_le32(LPERDMAP);
3865 tp->l2nleafs = cpu_to_le32(L2LPERDMAP);
3866 tp->leafidx = cpu_to_le32(LEAFIND);
3867 tp->height = cpu_to_le32(4);
3868 tp->budmin = BUDMIN;
3869
3870 /* init each leaf from corresponding wmap word:
3871 * note: leaf is set to NOFREE(-1) if all blocks of corresponding
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003872 * bitmap word are allocated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003873 */
3874 cp = tp->stree + le32_to_cpu(tp->leafidx);
3875 for (i = 0; i < LPERDMAP; i++)
3876 *cp++ = dbMaxBud((u8 *) & dp->wmap[i]);
3877
3878 /* build the dmap's binary buddy summary tree */
3879 return (dbInitTree(tp));
3880}
3881
3882
3883/*
3884 * NAME: dbInitTree()/ujfs_adjtree()
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003885 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07003886 * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl.
3887 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003888 * at entry, the leaves of the tree has been initialized
Linus Torvalds1da177e2005-04-16 15:20:36 -07003889 * from corresponding bitmap word or root of summary tree
3890 * of the child control page;
3891 * configure binary buddy system at the leaf level, then
3892 * bubble up the values of the leaf nodes up the tree.
3893 *
3894 * PARAMETERS:
3895 * cp - Pointer to the root of the tree
3896 * l2leaves- Number of leaf nodes as a power of 2
3897 * l2min - Number of blocks that can be covered by a leaf
3898 * as a power of 2
3899 *
3900 * RETURNS: max free string at the root of the tree
3901 */
3902static int dbInitTree(struct dmaptree * dtp)
3903{
3904 int l2max, l2free, bsize, nextb, i;
3905 int child, parent, nparent;
3906 s8 *tp, *cp, *cp1;
3907
3908 tp = dtp->stree;
3909
3910 /* Determine the maximum free string possible for the leaves */
3911 l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin;
3912
3913 /*
3914 * configure the leaf levevl into binary buddy system
3915 *
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003916 * Try to combine buddies starting with a buddy size of 1
3917 * (i.e. two leaves). At a buddy size of 1 two buddy leaves
3918 * can be combined if both buddies have a maximum free of l2min;
3919 * the combination will result in the left-most buddy leaf having
3920 * a maximum free of l2min+1.
3921 * After processing all buddies for a given size, process buddies
3922 * at the next higher buddy size (i.e. current size * 2) and
3923 * the next maximum free (current free + 1).
3924 * This continues until the maximum possible buddy combination
Linus Torvalds1da177e2005-04-16 15:20:36 -07003925 * yields maximum free.
3926 */
3927 for (l2free = dtp->budmin, bsize = 1; l2free < l2max;
3928 l2free++, bsize = nextb) {
3929 /* get next buddy size == current buddy pair size */
3930 nextb = bsize << 1;
3931
3932 /* scan each adjacent buddy pair at current buddy size */
3933 for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx);
3934 i < le32_to_cpu(dtp->nleafs);
3935 i += nextb, cp += nextb) {
3936 /* coalesce if both adjacent buddies are max free */
3937 if (*cp == l2free && *(cp + bsize) == l2free) {
3938 *cp = l2free + 1; /* left take right */
3939 *(cp + bsize) = -1; /* right give left */
3940 }
3941 }
3942 }
3943
3944 /*
3945 * bubble summary information of leaves up the tree.
3946 *
3947 * Starting at the leaf node level, the four nodes described by
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003948 * the higher level parent node are compared for a maximum free and
3949 * this maximum becomes the value of the parent node.
3950 * when all lower level nodes are processed in this fashion then
3951 * move up to the next level (parent becomes a lower level node) and
Linus Torvalds1da177e2005-04-16 15:20:36 -07003952 * continue the process for that level.
3953 */
3954 for (child = le32_to_cpu(dtp->leafidx),
3955 nparent = le32_to_cpu(dtp->nleafs) >> 2;
3956 nparent > 0; nparent >>= 2, child = parent) {
3957 /* get index of 1st node of parent level */
3958 parent = (child - 1) >> 2;
3959
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003960 /* set the value of the parent node as the maximum
Linus Torvalds1da177e2005-04-16 15:20:36 -07003961 * of the four nodes of the current level.
3962 */
3963 for (i = 0, cp = tp + child, cp1 = tp + parent;
3964 i < nparent; i++, cp += 4, cp1++)
3965 *cp1 = TREEMAX(cp);
3966 }
3967
3968 return (*tp);
3969}
3970
3971
3972/*
3973 * dbInitDmapCtl()
3974 *
3975 * function: initialize dmapctl page
3976 */
3977static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i)
3978{ /* start leaf index not covered by range */
3979 s8 *cp;
3980
3981 dcp->nleafs = cpu_to_le32(LPERCTL);
3982 dcp->l2nleafs = cpu_to_le32(L2LPERCTL);
3983 dcp->leafidx = cpu_to_le32(CTLLEAFIND);
3984 dcp->height = cpu_to_le32(5);
3985 dcp->budmin = L2BPERDMAP + L2LPERCTL * level;
3986
3987 /*
Dave Kleikamp63f83c92006-10-02 09:55:27 -05003988 * initialize the leaves of current level that were not covered
3989 * by the specified input block range (i.e. the leaves have no
Linus Torvalds1da177e2005-04-16 15:20:36 -07003990 * low level dmapctl or dmap).
3991 */
3992 cp = &dcp->stree[CTLLEAFIND + i];
3993 for (; i < LPERCTL; i++)
3994 *cp++ = NOFREE;
3995
3996 /* build the dmap's binary buddy summary tree */
3997 return (dbInitTree((struct dmaptree *) dcp));
3998}
3999
4000
4001/*
4002 * NAME: dbGetL2AGSize()/ujfs_getagl2size()
Dave Kleikamp63f83c92006-10-02 09:55:27 -05004003 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004004 * FUNCTION: Determine log2(allocation group size) from aggregate size
Dave Kleikamp63f83c92006-10-02 09:55:27 -05004005 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07004006 * PARAMETERS:
4007 * nblocks - Number of blocks in aggregate
4008 *
4009 * RETURNS: log2(allocation group size) in aggregate blocks
4010 */
4011static int dbGetL2AGSize(s64 nblocks)
4012{
4013 s64 sz;
4014 s64 m;
4015 int l2sz;
4016
4017 if (nblocks < BPERDMAP * MAXAG)
4018 return (L2BPERDMAP);
4019
4020 /* round up aggregate size to power of 2 */
4021 m = ((u64) 1 << (64 - 1));
4022 for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) {
4023 if (m & nblocks)
4024 break;
4025 }
4026
4027 sz = (s64) 1 << l2sz;
4028 if (sz < nblocks)
4029 l2sz += 1;
4030
4031 /* agsize = roundupSize/max_number_of_ag */
4032 return (l2sz - L2MAXAG);
4033}
4034
4035
4036/*
4037 * NAME: dbMapFileSizeToMapSize()
Dave Kleikamp63f83c92006-10-02 09:55:27 -05004038 *
4039 * FUNCTION: compute number of blocks the block allocation map file
Linus Torvalds1da177e2005-04-16 15:20:36 -07004040 * can cover from the map file size;
4041 *
4042 * RETURNS: Number of blocks which can be covered by this block map file;
4043 */
4044
4045/*
4046 * maximum number of map pages at each level including control pages
4047 */
4048#define MAXL0PAGES (1 + LPERCTL)
4049#define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES)
4050#define MAXL2PAGES (1 + LPERCTL * MAXL1PAGES)
4051
4052/*
4053 * convert number of map pages to the zero origin top dmapctl level
4054 */
4055#define BMAPPGTOLEV(npages) \
Dave Kleikampf720e3b2007-06-06 15:28:35 -05004056 (((npages) <= 3 + MAXL0PAGES) ? 0 : \
4057 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004058
4059s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
4060{
4061 struct super_block *sb = ipbmap->i_sb;
4062 s64 nblocks;
4063 s64 npages, ndmaps;
4064 int level, i;
4065 int complete, factor;
4066
4067 nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize;
4068 npages = nblocks >> JFS_SBI(sb)->l2nbperpage;
4069 level = BMAPPGTOLEV(npages);
4070
Dave Kleikamp63f83c92006-10-02 09:55:27 -05004071 /* At each level, accumulate the number of dmap pages covered by
Linus Torvalds1da177e2005-04-16 15:20:36 -07004072 * the number of full child levels below it;
4073 * repeat for the last incomplete child level.
4074 */
4075 ndmaps = 0;
4076 npages--; /* skip the first global control page */
4077 /* skip higher level control pages above top level covered by map */
4078 npages -= (2 - level);
4079 npages--; /* skip top level's control page */
4080 for (i = level; i >= 0; i--) {
4081 factor =
4082 (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
4083 complete = (u32) npages / factor;
Dave Kleikampf720e3b2007-06-06 15:28:35 -05004084 ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
4085 ((i == 1) ? LPERCTL : 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -07004086
4087 /* pages in last/incomplete child */
4088 npages = (u32) npages % factor;
4089 /* skip incomplete child's level control page */
4090 npages--;
4091 }
4092
Dave Kleikamp63f83c92006-10-02 09:55:27 -05004093 /* convert the number of dmaps into the number of blocks
Linus Torvalds1da177e2005-04-16 15:20:36 -07004094 * which can be covered by the dmaps;
4095 */
4096 nblocks = ndmaps << L2BPERDMAP;
4097
4098 return (nblocks);
4099}