blob: 45180361871c9a98e04db73506a973e0c0dce0c1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) International Business Machines Corp., 2000-2004
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17*/
18
19#include <linux/fs.h>
20#include <linux/buffer_head.h>
21#include <linux/quotaops.h>
22#include "jfs_incore.h"
23#include "jfs_filsys.h"
24#include "jfs_metapage.h"
25#include "jfs_dinode.h"
26#include "jfs_imap.h"
27#include "jfs_dmap.h"
28#include "jfs_superblock.h"
29#include "jfs_txnmgr.h"
30#include "jfs_debug.h"
31
32#define BITSPERPAGE (PSIZE << 3)
33#define L2MEGABYTE 20
34#define MEGABYTE (1 << L2MEGABYTE)
35#define MEGABYTE32 (MEGABYTE << 5)
36
37/* convert block number to bmap file page number */
38#define BLKTODMAPN(b)\
39 (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
40
41/*
42 * jfs_extendfs()
43 *
44 * function: extend file system;
45 *
46 * |-------------------------------|----------|----------|
47 * file system space fsck inline log
48 * workspace space
49 *
50 * input:
51 * new LVSize: in LV blocks (required)
52 * new LogSize: in LV blocks (optional)
53 * new FSSize: in LV blocks (optional)
54 *
55 * new configuration:
56 * 1. set new LogSize as specified or default from new LVSize;
57 * 2. compute new FSCKSize from new LVSize;
58 * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where
59 * assert(new FSSize >= old FSSize),
60 * i.e., file system must not be shrinked;
61 */
62int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
63{
64 int rc = 0;
65 struct jfs_sb_info *sbi = JFS_SBI(sb);
66 struct inode *ipbmap = sbi->ipbmap;
67 struct inode *ipbmap2;
68 struct inode *ipimap = sbi->ipimap;
69 struct jfs_log *log = sbi->log;
70 struct bmap *bmp = sbi->bmap;
71 s64 newLogAddress, newFSCKAddress;
72 int newFSCKSize;
73 s64 newMapSize = 0, mapSize;
74 s64 XAddress, XSize, nblocks, xoff, xaddr, t64;
75 s64 oldLVSize;
76 s64 newFSSize;
77 s64 VolumeSize;
78 int newNpages = 0, nPages, newPage, xlen, t32;
79 int tid;
80 int log_formatted = 0;
81 struct inode *iplist[1];
82 struct jfs_superblock *j_sb, *j_sb2;
83 uint old_agsize;
84 struct buffer_head *bh, *bh2;
85
86 /* If the volume hasn't grown, get out now */
87
88 if (sbi->mntflag & JFS_INLINELOG)
89 oldLVSize = addressPXD(&sbi->logpxd) + lengthPXD(&sbi->logpxd);
90 else
91 oldLVSize = addressPXD(&sbi->fsckpxd) +
92 lengthPXD(&sbi->fsckpxd);
93
94 if (oldLVSize >= newLVSize) {
95 printk(KERN_WARNING
96 "jfs_extendfs: volume hasn't grown, returning\n");
97 goto out;
98 }
99
100 VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
101
102 if (VolumeSize) {
103 if (newLVSize > VolumeSize) {
104 printk(KERN_WARNING "jfs_extendfs: invalid size\n");
105 rc = -EINVAL;
106 goto out;
107 }
108 } else {
109 /* check the device */
110 bh = sb_bread(sb, newLVSize - 1);
111 if (!bh) {
112 printk(KERN_WARNING "jfs_extendfs: invalid size\n");
113 rc = -EINVAL;
114 goto out;
115 }
116 bforget(bh);
117 }
118
119 /* Can't extend write-protected drive */
120
121 if (isReadOnly(ipbmap)) {
122 printk(KERN_WARNING "jfs_extendfs: read-only file system\n");
123 rc = -EROFS;
124 goto out;
125 }
126
127 /*
128 * reconfigure LV spaces
129 * ---------------------
130 *
131 * validate new size, or, if not specified, determine new size
132 */
133
134 /*
135 * reconfigure inline log space:
136 */
137 if ((sbi->mntflag & JFS_INLINELOG)) {
138 if (newLogSize == 0) {
139 /*
140 * no size specified: default to 1/256 of aggregate
141 * size; rounded up to a megabyte boundary;
142 */
143 newLogSize = newLVSize >> 8;
144 t32 = (1 << (20 - sbi->l2bsize)) - 1;
145 newLogSize = (newLogSize + t32) & ~t32;
146 newLogSize =
147 min(newLogSize, MEGABYTE32 >> sbi->l2bsize);
148 } else {
149 /*
150 * convert the newLogSize to fs blocks.
151 *
152 * Since this is given in megabytes, it will always be
153 * an even number of pages.
154 */
155 newLogSize = (newLogSize * MEGABYTE) >> sbi->l2bsize;
156 }
157
158 } else
159 newLogSize = 0;
160
161 newLogAddress = newLVSize - newLogSize;
162
163 /*
164 * reconfigure fsck work space:
165 *
166 * configure it to the end of the logical volume regardless of
167 * whether file system extends to the end of the aggregate;
168 * Need enough 4k pages to cover:
169 * - 1 bit per block in aggregate rounded up to BPERDMAP boundary
170 * - 1 extra page to handle control page and intermediate level pages
171 * - 50 extra pages for the chkdsk service log
172 */
173 t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP)
174 << L2BPERDMAP;
175 t32 = ((t64 + (BITSPERPAGE - 1)) / BITSPERPAGE) + 1 + 50;
176 newFSCKSize = t32 << sbi->l2nbperpage;
177 newFSCKAddress = newLogAddress - newFSCKSize;
178
179 /*
180 * compute new file system space;
181 */
182 newFSSize = newLVSize - newLogSize - newFSCKSize;
183
184 /* file system cannot be shrinked */
185 if (newFSSize < bmp->db_mapsize) {
186 rc = -EINVAL;
187 goto out;
188 }
189
190 /*
191 * If we're expanding enough that the inline log does not overlap
192 * the old one, we can format the new log before we quiesce the
193 * filesystem.
194 */
195 if ((sbi->mntflag & JFS_INLINELOG) && (newLogAddress > oldLVSize)) {
196 if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
197 goto out;
198 log_formatted = 1;
199 }
200 /*
201 * quiesce file system
202 *
203 * (prepare to move the inline log and to prevent map update)
204 *
205 * block any new transactions and wait for completion of
206 * all wip transactions and flush modified pages s.t.
207 * on-disk file system is in consistent state and
208 * log is not required for recovery.
209 */
210 txQuiesce(sb);
211
Dave Kleikamp7fab4792005-05-02 12:25:02 -0600212 /* Reset size of direct inode */
213 sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size;
214
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 if (sbi->mntflag & JFS_INLINELOG) {
216 /*
217 * deactivate old inline log
218 */
219 lmLogShutdown(log);
220
221 /*
222 * mark on-disk super block for fs in transition;
223 *
224 * update on-disk superblock for the new space configuration
225 * of inline log space and fsck work space descriptors:
226 * N.B. FS descriptor is NOT updated;
227 *
228 * crash recovery:
229 * logredo(): if FM_EXTENDFS, return to fsck() for cleanup;
230 * fsck(): if FM_EXTENDFS, reformat inline log and fsck
231 * workspace from superblock inline log descriptor and fsck
232 * workspace descriptor;
233 */
234
235 /* read in superblock */
236 if ((rc = readSuper(sb, &bh)))
237 goto error_out;
238 j_sb = (struct jfs_superblock *)bh->b_data;
239
240 /* mark extendfs() in progress */
241 j_sb->s_state |= cpu_to_le32(FM_EXTENDFS);
242 j_sb->s_xsize = cpu_to_le64(newFSSize);
243 PXDaddress(&j_sb->s_xfsckpxd, newFSCKAddress);
244 PXDlength(&j_sb->s_xfsckpxd, newFSCKSize);
245 PXDaddress(&j_sb->s_xlogpxd, newLogAddress);
246 PXDlength(&j_sb->s_xlogpxd, newLogSize);
247
248 /* synchronously update superblock */
249 mark_buffer_dirty(bh);
250 sync_dirty_buffer(bh);
251 brelse(bh);
252
253 /*
254 * format new inline log synchronously;
255 *
256 * crash recovery: if log move in progress,
257 * reformat log and exit success;
258 */
259 if (!log_formatted)
260 if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
261 goto error_out;
262
263 /*
264 * activate new log
265 */
266 log->base = newLogAddress;
267 log->size = newLogSize >> (L2LOGPSIZE - sb->s_blocksize_bits);
268 if ((rc = lmLogInit(log)))
269 goto error_out;
270 }
271
272 /*
273 * extend block allocation map
274 * ---------------------------
275 *
276 * extendfs() for new extension, retry after crash recovery;
277 *
278 * note: both logredo() and fsck() rebuild map from
279 * the bitmap and configuration parameter from superblock
280 * (disregarding all other control information in the map);
281 *
282 * superblock:
283 * s_size: aggregate size in physical blocks;
284 */
285 /*
286 * compute the new block allocation map configuration
287 *
288 * map dinode:
289 * di_size: map file size in byte;
290 * di_nblocks: number of blocks allocated for map file;
291 * di_mapsize: number of blocks in aggregate (covered by map);
292 * map control page:
293 * db_mapsize: number of blocks in aggregate (covered by map);
294 */
295 newMapSize = newFSSize;
296 /* number of data pages of new bmap file:
297 * roundup new size to full dmap page boundary and
298 * add 1 extra dmap page for next extendfs()
299 */
300 t64 = (newMapSize - 1) + BPERDMAP;
301 newNpages = BLKTODMAPN(t64) + 1;
302
303 /*
304 * extend map from current map (WITHOUT growing mapfile)
305 *
306 * map new extension with unmapped part of the last partial
307 * dmap page, if applicable, and extra page(s) allocated
308 * at end of bmap by mkfs() or previous extendfs();
309 */
310 extendBmap:
311 /* compute number of blocks requested to extend */
312 mapSize = bmp->db_mapsize;
313 XAddress = mapSize; /* eXtension Address */
314 XSize = newMapSize - mapSize; /* eXtension Size */
315 old_agsize = bmp->db_agsize; /* We need to know if this changes */
316
317 /* compute number of blocks that can be extended by current mapfile */
318 t64 = dbMapFileSizeToMapSize(ipbmap);
319 if (mapSize > t64) {
320 printk(KERN_ERR "jfs_extendfs: mapSize (0x%Lx) > t64 (0x%Lx)\n",
321 (long long) mapSize, (long long) t64);
322 rc = -EIO;
323 goto error_out;
324 }
325 nblocks = min(t64 - mapSize, XSize);
326
327 /*
328 * update map pages for new extension:
329 *
330 * update/init dmap and bubble up the control hierarchy
331 * incrementally fold up dmaps into upper levels;
332 * update bmap control page;
333 */
334 if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
335 goto error_out;
336 /*
337 * the map now has extended to cover additional nblocks:
338 * dn_mapsize = oldMapsize + nblocks;
339 */
340 /* ipbmap->i_mapsize += nblocks; */
341 XSize -= nblocks;
342
343 /*
344 * grow map file to cover remaining extension
345 * and/or one extra dmap page for next extendfs();
346 *
347 * allocate new map pages and its backing blocks, and
348 * update map file xtree
349 */
350 /* compute number of data pages of current bmap file */
351 nPages = ipbmap->i_size >> L2PSIZE;
352
353 /* need to grow map file ? */
354 if (nPages == newNpages)
355 goto finalizeBmap;
356
357 /*
358 * grow bmap file for the new map pages required:
359 *
360 * allocate growth at the start of newly extended region;
361 * bmap file only grows sequentially, i.e., both data pages
362 * and possibly xtree index pages may grow in append mode,
363 * s.t. logredo() can reconstruct pre-extension state
364 * by washing away bmap file of pages outside s_size boundary;
365 */
366 /*
367 * journal map file growth as if a regular file growth:
368 * (note: bmap is created with di_mode = IFJOURNAL|IFREG);
369 *
370 * journaling of bmap file growth is not required since
371 * logredo() do/can not use log records of bmap file growth
372 * but it provides careful write semantics, pmap update, etc.;
373 */
374 /* synchronous write of data pages: bmap data pages are
375 * cached in meta-data cache, and not written out
376 * by txCommit();
377 */
378 filemap_fdatawait(ipbmap->i_mapping);
OGAWA Hirofumi28fd1292006-01-08 01:02:14 -0800379 filemap_write_and_wait(ipbmap->i_mapping);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 diWriteSpecial(ipbmap, 0);
381
382 newPage = nPages; /* first new page number */
383 xoff = newPage << sbi->l2nbperpage;
384 xlen = (newNpages - nPages) << sbi->l2nbperpage;
385 xlen = min(xlen, (int) nblocks) & ~(sbi->nbperpage - 1);
386 xaddr = XAddress;
387
388 tid = txBegin(sb, COMMIT_FORCE);
389
390 if ((rc = xtAppend(tid, ipbmap, 0, xoff, nblocks, &xlen, &xaddr, 0))) {
391 txEnd(tid);
392 goto error_out;
393 }
394 /* update bmap file size */
395 ipbmap->i_size += xlen << sbi->l2bsize;
396 inode_add_bytes(ipbmap, xlen << sbi->l2bsize);
397
398 iplist[0] = ipbmap;
399 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
400
401 txEnd(tid);
402
403 if (rc)
404 goto error_out;
405
406 /*
407 * map file has been grown now to cover extension to further out;
408 * di_size = new map file size;
409 *
410 * if huge extension, the previous extension based on previous
411 * map file size may not have been sufficient to cover whole extension
412 * (it could have been used up for new map pages),
413 * but the newly grown map file now covers lot bigger new free space
414 * available for further extension of map;
415 */
416 /* any more blocks to extend ? */
417 if (XSize)
418 goto extendBmap;
419
420 finalizeBmap:
421 /* finalize bmap */
422 dbFinalizeBmap(ipbmap);
423
424 /*
425 * update inode allocation map
426 * ---------------------------
427 *
428 * move iag lists from old to new iag;
429 * agstart field is not updated for logredo() to reconstruct
430 * iag lists if system crash occurs.
431 * (computation of ag number from agstart based on agsize
432 * will correctly identify the new ag);
433 */
434 /* if new AG size the same as old AG size, done! */
435 if (bmp->db_agsize != old_agsize) {
436 if ((rc = diExtendFS(ipimap, ipbmap)))
437 goto error_out;
438
439 /* finalize imap */
440 if ((rc = diSync(ipimap)))
441 goto error_out;
442 }
443
444 /*
445 * finalize
446 * --------
447 *
448 * extension is committed when on-disk super block is
449 * updated with new descriptors: logredo will recover
450 * crash before it to pre-extension state;
451 */
452
453 /* sync log to skip log replay of bmap file growth transaction; */
454 /* lmLogSync(log, 1); */
455
456 /*
457 * synchronous write bmap global control page;
458 * for crash before completion of write
459 * logredo() will recover to pre-extendfs state;
460 * for crash after completion of write,
461 * logredo() will recover post-extendfs state;
462 */
463 if ((rc = dbSync(ipbmap)))
464 goto error_out;
465
466 /*
467 * copy primary bmap inode to secondary bmap inode
468 */
469
470 ipbmap2 = diReadSpecial(sb, BMAP_I, 1);
471 if (ipbmap2 == NULL) {
472 printk(KERN_ERR "jfs_extendfs: diReadSpecial(bmap) failed\n");
473 goto error_out;
474 }
475 memcpy(&JFS_IP(ipbmap2)->i_xtroot, &JFS_IP(ipbmap)->i_xtroot, 288);
476 ipbmap2->i_size = ipbmap->i_size;
477 ipbmap2->i_blocks = ipbmap->i_blocks;
478
479 diWriteSpecial(ipbmap2, 1);
480 diFreeSpecial(ipbmap2);
481
482 /*
483 * update superblock
484 */
485 if ((rc = readSuper(sb, &bh)))
486 goto error_out;
487 j_sb = (struct jfs_superblock *)bh->b_data;
488
489 /* mark extendfs() completion */
490 j_sb->s_state &= cpu_to_le32(~FM_EXTENDFS);
491 j_sb->s_size = cpu_to_le64(bmp->db_mapsize <<
492 le16_to_cpu(j_sb->s_l2bfactor));
493 j_sb->s_agsize = cpu_to_le32(bmp->db_agsize);
494
495 /* update inline log space descriptor */
496 if (sbi->mntflag & JFS_INLINELOG) {
497 PXDaddress(&(j_sb->s_logpxd), newLogAddress);
498 PXDlength(&(j_sb->s_logpxd), newLogSize);
499 }
500
501 /* record log's mount serial number */
502 j_sb->s_logserial = cpu_to_le32(log->serial);
503
504 /* update fsck work space descriptor */
505 PXDaddress(&(j_sb->s_fsckpxd), newFSCKAddress);
506 PXDlength(&(j_sb->s_fsckpxd), newFSCKSize);
507 j_sb->s_fscklog = 1;
508 /* sb->s_fsckloglen remains the same */
509
510 /* Update secondary superblock */
511 bh2 = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits);
512 if (bh2) {
513 j_sb2 = (struct jfs_superblock *)bh2->b_data;
514 memcpy(j_sb2, j_sb, sizeof (struct jfs_superblock));
515
516 mark_buffer_dirty(bh);
517 sync_dirty_buffer(bh2);
518 brelse(bh2);
519 }
520
521 /* write primary superblock */
522 mark_buffer_dirty(bh);
523 sync_dirty_buffer(bh);
524 brelse(bh);
525
526 goto resume;
527
528 error_out:
529 jfs_error(sb, "jfs_extendfs");
530
531 resume:
532 /*
533 * resume file system transactions
534 */
535 txResume(sb);
536
537 out:
538 return rc;
539}