Blame - fs/ext3/resize.c - kernel/msm-4.9

blob: 34b39e9a1e5a01588db869c02c85f2407d12221a [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/fs/ext3/resize.c
				3	*
				4	* Support for resizing an ext3 filesystem while it is mounted.
				5	*
				6	* Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
				7	*
				8	* This could probably be made into a module, because it is not often in use.
				9	*/
				10
				11	#include <linux/config.h>
				12
				13	#define EXT3FS_DEBUG
				14
				15	#include <linux/sched.h>
				16	#include <linux/smp_lock.h>
				17	#include <linux/ext3_jbd.h>
				18
				19	#include <linux/errno.h>
				20	#include <linux/slab.h>
				21
				22
				23	#define outside(b, first, last) ((b) < (first) \|\| (b) >= (last))
				24	#define inside(b, first, last) ((b) >= (first) && (b) < (last))
				25
				26	static int verify_group_input(struct super_block *sb,
				27	struct ext3_new_group_data *input)
				28	{
				29	struct ext3_sb_info *sbi = EXT3_SB(sb);
				30	struct ext3_super_block *es = sbi->s_es;
				31	unsigned start = le32_to_cpu(es->s_blocks_count);
				32	unsigned end = start + input->blocks_count;
				33	unsigned group = input->group;
Glauber de Oliveira Costa	29ba172	2006-01-08 01:03:23 -0800	[diff] [blame]	34	unsigned itend = input->inode_table + sbi->s_itb_per_group;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	35	unsigned overhead = ext3_bg_has_super(sb, group) ?
				36	(1 + ext3_bg_num_gdb(sb, group) +
				37	le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
				38	unsigned metaend = start + overhead;
				39	struct buffer_head *bh = NULL;
				40	int free_blocks_count;
				41	int err = -EINVAL;
				42
				43	input->free_blocks_count = free_blocks_count =
				44	input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
				45
				46	if (test_opt(sb, DEBUG))
				47	printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
				48	"(%d free, %u reserved)\n",
				49	ext3_bg_has_super(sb, input->group) ? "normal" :
				50	"no-super", input->group, input->blocks_count,
				51	free_blocks_count, input->reserved_blocks);
				52
				53	if (group != sbi->s_groups_count)
				54	ext3_warning(sb, __FUNCTION__,
				55	"Cannot add at group %u (only %lu groups)",
				56	input->group, sbi->s_groups_count);
				57	else if ((start - le32_to_cpu(es->s_first_data_block)) %
				58	EXT3_BLOCKS_PER_GROUP(sb))
				59	ext3_warning(sb, __FUNCTION__, "Last group not full");
				60	else if (input->reserved_blocks > input->blocks_count / 5)
				61	ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
				62	input->reserved_blocks);
				63	else if (free_blocks_count < 0)
				64	ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
				65	input->blocks_count);
				66	else if (!(bh = sb_bread(sb, end - 1)))
				67	ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)",
				68	end - 1);
				69	else if (outside(input->block_bitmap, start, end))
				70	ext3_warning(sb, __FUNCTION__,
				71	"Block bitmap not in group (block %u)",
				72	input->block_bitmap);
				73	else if (outside(input->inode_bitmap, start, end))
				74	ext3_warning(sb, __FUNCTION__,
				75	"Inode bitmap not in group (block %u)",
				76	input->inode_bitmap);
				77	else if (outside(input->inode_table, start, end) \|\|
				78	outside(itend - 1, start, end))
				79	ext3_warning(sb, __FUNCTION__,
				80	"Inode table not in group (blocks %u-%u)",
				81	input->inode_table, itend - 1);
				82	else if (input->inode_bitmap == input->block_bitmap)
				83	ext3_warning(sb, __FUNCTION__,
				84	"Block bitmap same as inode bitmap (%u)",
				85	input->block_bitmap);
				86	else if (inside(input->block_bitmap, input->inode_table, itend))
				87	ext3_warning(sb, __FUNCTION__,
				88	"Block bitmap (%u) in inode table (%u-%u)",
				89	input->block_bitmap, input->inode_table, itend-1);
				90	else if (inside(input->inode_bitmap, input->inode_table, itend))
				91	ext3_warning(sb, __FUNCTION__,
				92	"Inode bitmap (%u) in inode table (%u-%u)",
				93	input->inode_bitmap, input->inode_table, itend-1);
				94	else if (inside(input->block_bitmap, start, metaend))
				95	ext3_warning(sb, __FUNCTION__,
				96	"Block bitmap (%u) in GDT table (%u-%u)",
				97	input->block_bitmap, start, metaend - 1);
				98	else if (inside(input->inode_bitmap, start, metaend))
				99	ext3_warning(sb, __FUNCTION__,
				100	"Inode bitmap (%u) in GDT table (%u-%u)",
				101	input->inode_bitmap, start, metaend - 1);
				102	else if (inside(input->inode_table, start, metaend) \|\|
				103	inside(itend - 1, start, metaend))
				104	ext3_warning(sb, __FUNCTION__,
				105	"Inode table (%u-%u) overlaps GDT table (%u-%u)",
				106	input->inode_table, itend - 1, start, metaend - 1);
				107	else
				108	err = 0;
				109	brelse(bh);
				110
				111	return err;
				112	}
				113
				114	static struct buffer_head bclean(handle_t handle, struct super_block *sb,
				115	unsigned long blk)
				116	{
				117	struct buffer_head *bh;
				118	int err;
				119
				120	bh = sb_getblk(sb, blk);
Glauber de Oliveira Costa	2973dfd	2005-10-30 15:03:05 -0800	[diff] [blame]	121	if (!bh)
				122	return ERR_PTR(-EIO);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	123	if ((err = ext3_journal_get_write_access(handle, bh))) {
				124	brelse(bh);
				125	bh = ERR_PTR(err);
				126	} else {
				127	lock_buffer(bh);
				128	memset(bh->b_data, 0, sb->s_blocksize);
				129	set_buffer_uptodate(bh);
				130	unlock_buffer(bh);
				131	}
				132
				133	return bh;
				134	}
				135
				136	/*
				137	* To avoid calling the atomic setbit hundreds or thousands of times, we only
				138	* need to use it within a single byte (to ensure we get endianness right).
				139	* We can use memset for the rest of the bitmap as there are no other users.
				140	*/
				141	static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
				142	{
				143	int i;
				144
				145	if (start_bit >= end_bit)
				146	return;
				147
				148	ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
				149	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
				150	ext3_set_bit(i, bitmap);
				151	if (i < end_bit)
				152	memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
				153	}
				154
				155	/*
				156	* Set up the block and inode bitmaps, and the inode table for the new group.
				157	* This doesn't need to be part of the main transaction, since we are only
				158	* changing blocks outside the actual filesystem. We still do journaling to
				159	* ensure the recovery is correct in case of a failure just after resize.
				160	* If any part of this fails, we simply abort the resize.
				161	*/
				162	static int setup_new_group_blocks(struct super_block *sb,
				163	struct ext3_new_group_data *input)
				164	{
				165	struct ext3_sb_info *sbi = EXT3_SB(sb);
				166	unsigned long start = input->group * sbi->s_blocks_per_group +
				167	le32_to_cpu(sbi->s_es->s_first_data_block);
				168	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
				169	le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
				170	unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
				171	struct buffer_head *bh;
				172	handle_t *handle;
				173	unsigned long block;
				174	int bit;
				175	int i;
				176	int err = 0, err2;
				177
				178	handle = ext3_journal_start_sb(sb, reserved_gdb + gdblocks +
				179	2 + sbi->s_itb_per_group);
				180	if (IS_ERR(handle))
				181	return PTR_ERR(handle);
				182
				183	lock_super(sb);
				184	if (input->group != sbi->s_groups_count) {
				185	err = -EBUSY;
				186	goto exit_journal;
				187	}
				188
				189	if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
				190	err = PTR_ERR(bh);
				191	goto exit_journal;
				192	}
				193
				194	if (ext3_bg_has_super(sb, input->group)) {
				195	ext3_debug("mark backup superblock %#04lx (+0)\n", start);
				196	ext3_set_bit(0, bh->b_data);
				197	}
				198
				199	/* Copy all of the GDT blocks into the backup in this group */
				200	for (i = 0, bit = 1, block = start + 1;
				201	i < gdblocks; i++, block++, bit++) {
				202	struct buffer_head *gdb;
				203
				204	ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
				205
				206	gdb = sb_getblk(sb, block);
Glauber de Oliveira Costa	2973dfd	2005-10-30 15:03:05 -0800	[diff] [blame]	207	if (!gdb) {
				208	err = -EIO;
				209	goto exit_bh;
				210	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	211	if ((err = ext3_journal_get_write_access(handle, gdb))) {
				212	brelse(gdb);
				213	goto exit_bh;
				214	}
				215	lock_buffer(bh);
Al Viro	de0bb97	2006-04-26 07:26:09 +0100	[diff] [blame]	216	memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	217	set_buffer_uptodate(gdb);
				218	unlock_buffer(bh);
				219	ext3_journal_dirty_metadata(handle, gdb);
				220	ext3_set_bit(bit, bh->b_data);
				221	brelse(gdb);
				222	}
				223
				224	/* Zero out all of the reserved backup group descriptor table blocks */
				225	for (i = 0, bit = gdblocks + 1, block = start + bit;
				226	i < reserved_gdb; i++, block++, bit++) {
				227	struct buffer_head *gdb;
				228
				229	ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
				230
				231	if (IS_ERR(gdb = bclean(handle, sb, block))) {
				232	err = PTR_ERR(bh);
				233	goto exit_bh;
				234	}
				235	ext3_journal_dirty_metadata(handle, gdb);
				236	ext3_set_bit(bit, bh->b_data);
				237	brelse(gdb);
				238	}
				239	ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
				240	input->block_bitmap - start);
				241	ext3_set_bit(input->block_bitmap - start, bh->b_data);
				242	ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
				243	input->inode_bitmap - start);
				244	ext3_set_bit(input->inode_bitmap - start, bh->b_data);
				245
				246	/* Zero out all of the inode table blocks */
				247	for (i = 0, block = input->inode_table, bit = block - start;
				248	i < sbi->s_itb_per_group; i++, bit++, block++) {
				249	struct buffer_head *it;
				250
Glauber de Oliveira Costa	8bdac5d	2005-09-22 21:44:26 -0700	[diff] [blame]	251	ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	252	if (IS_ERR(it = bclean(handle, sb, block))) {
				253	err = PTR_ERR(it);
				254	goto exit_bh;
				255	}
				256	ext3_journal_dirty_metadata(handle, it);
				257	brelse(it);
				258	ext3_set_bit(bit, bh->b_data);
				259	}
				260	mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
				261	bh->b_data);
				262	ext3_journal_dirty_metadata(handle, bh);
				263	brelse(bh);
				264
				265	/* Mark unused entries in inode bitmap used */
				266	ext3_debug("clear inode bitmap %#04x (+%ld)\n",
				267	input->inode_bitmap, input->inode_bitmap - start);
				268	if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
				269	err = PTR_ERR(bh);
				270	goto exit_journal;
				271	}
				272
				273	mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
				274	bh->b_data);
				275	ext3_journal_dirty_metadata(handle, bh);
				276	exit_bh:
				277	brelse(bh);
				278
				279	exit_journal:
				280	unlock_super(sb);
				281	if ((err2 = ext3_journal_stop(handle)) && !err)
				282	err = err2;
				283
				284	return err;
				285	}
				286
				287	/*
				288	* Iterate through the groups which hold BACKUP superblock/GDT copies in an
				289	* ext3 filesystem. The counters should be initialized to 1, 5, and 7 before
				290	* calling this for the first time. In a sparse filesystem it will be the
				291	* sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
				292	* For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
				293	*/
				294	static unsigned ext3_list_backups(struct super_block sb, unsigned three,
				295	unsigned five, unsigned seven)
				296	{
				297	unsigned *min = three;
				298	int mult = 3;
				299	unsigned ret;
				300
				301	if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
				302	EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
				303	ret = *min;
				304	*min += 1;
				305	return ret;
				306	}
				307
				308	if (five < min) {
				309	min = five;
				310	mult = 5;
				311	}
				312	if (seven < min) {
				313	min = seven;
				314	mult = 7;
				315	}
				316
				317	ret = *min;
				318	min = mult;
				319
				320	return ret;
				321	}
				322
				323	/*
				324	* Check that all of the backup GDT blocks are held in the primary GDT block.
				325	* It is assumed that they are stored in group order. Returns the number of
				326	* groups in current filesystem that have BACKUPS, or -ve error code.
				327	*/
				328	static int verify_reserved_gdb(struct super_block *sb,
				329	struct buffer_head *primary)
				330	{
				331	const unsigned long blk = primary->b_blocknr;
				332	const unsigned long end = EXT3_SB(sb)->s_groups_count;
				333	unsigned three = 1;
				334	unsigned five = 5;
				335	unsigned seven = 7;
				336	unsigned grp;
				337	__u32 p = (__u32 )primary->b_data;
				338	int gdbackups = 0;
				339
				340	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
				341	if (le32_to_cpu(p++) != grp EXT3_BLOCKS_PER_GROUP(sb) + blk){
				342	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	343	"reserved GDT %ld missing grp %d (%ld)",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	344	blk, grp,
				345	grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
				346	return -EINVAL;
				347	}
				348	if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
				349	return -EFBIG;
				350	}
				351
				352	return gdbackups;
				353	}
				354
				355	/*
				356	* Called when we need to bring a reserved group descriptor table block into
				357	* use from the resize inode. The primary copy of the new GDT block currently
				358	* is an indirect block (under the double indirect block in the resize inode).
				359	* The new backup GDT blocks will be stored as leaf blocks in this indirect
				360	* block, in group order. Even though we know all the block numbers we need,
				361	* we check to ensure that the resize inode has actually reserved these blocks.
				362	*
				363	* Don't need to update the block bitmaps because the blocks are still in use.
				364	*
				365	* We get all of the error cases out of the way, so that we are sure to not
				366	* fail once we start modifying the data on disk, because JBD has no rollback.
				367	*/
				368	static int add_new_gdb(handle_t handle, struct inode inode,
				369	struct ext3_new_group_data *input,
				370	struct buffer_head **primary)
				371	{
				372	struct super_block *sb = inode->i_sb;
				373	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				374	unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
				375	unsigned long gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
				376	struct buffer_head o_group_desc, n_group_desc;
				377	struct buffer_head *dind;
				378	int gdbackups;
				379	struct ext3_iloc iloc;
				380	__u32 *data;
				381	int err;
				382
				383	if (test_opt(sb, DEBUG))
				384	printk(KERN_DEBUG
				385	"EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
				386	gdb_num);
				387
				388	/*
				389	* If we are not using the primary superblock/GDT copy don't resize,
				390	* because the user tools have no way of handling this. Probably a
				391	* bad time to do it anyways.
				392	*/
				393	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
				394	le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
				395	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	396	"won't resize using backup superblock at %llu",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	397	(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
				398	return -EPERM;
				399	}
				400
				401	*primary = sb_bread(sb, gdblock);
				402	if (!*primary)
				403	return -EIO;
				404
				405	if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
				406	err = gdbackups;
				407	goto exit_bh;
				408	}
				409
				410	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
				411	dind = sb_bread(sb, le32_to_cpu(*data));
				412	if (!dind) {
				413	err = -EIO;
				414	goto exit_bh;
				415	}
				416
				417	data = (__u32 *)dind->b_data;
				418	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
				419	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	420	"new group %u GDT block %lu not reserved",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	421	input->group, gdblock);
				422	err = -EINVAL;
				423	goto exit_dind;
				424	}
				425
				426	if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
				427	goto exit_dind;
				428
				429	if ((err = ext3_journal_get_write_access(handle, *primary)))
				430	goto exit_sbh;
				431
				432	if ((err = ext3_journal_get_write_access(handle, dind)))
				433	goto exit_primary;
				434
				435	/* ext3_reserve_inode_write() gets a reference on the iloc */
				436	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
				437	goto exit_dindj;
				438
				439	n_group_desc = (struct buffer_head *)kmalloc((gdb_num + 1)
				440	sizeof(struct buffer_head *), GFP_KERNEL);
				441	if (!n_group_desc) {
				442	err = -ENOMEM;
				443	ext3_warning (sb, __FUNCTION__,
				444	"not enough memory for %lu groups", gdb_num + 1);
				445	goto exit_inode;
				446	}
				447
				448	/*
				449	* Finally, we have all of the possible failures behind us...
				450	*
				451	* Remove new GDT block from inode double-indirect block and clear out
				452	* the new GDT block for use (which also "frees" the backup GDT blocks
				453	* from the reserved inode). We don't need to change the bitmaps for
				454	* these blocks, because they are marked as in-use from being in the
				455	* reserved inode, and will become GDT blocks (primary and backup).
				456	*/
				457	data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
				458	ext3_journal_dirty_metadata(handle, dind);
				459	brelse(dind);
				460	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
				461	ext3_mark_iloc_dirty(handle, inode, &iloc);
				462	memset((*primary)->b_data, 0, sb->s_blocksize);
				463	ext3_journal_dirty_metadata(handle, *primary);
				464
				465	o_group_desc = EXT3_SB(sb)->s_group_desc;
				466	memcpy(n_group_desc, o_group_desc,
				467	EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
				468	n_group_desc[gdb_num] = *primary;
				469	EXT3_SB(sb)->s_group_desc = n_group_desc;
				470	EXT3_SB(sb)->s_gdb_count++;
				471	kfree(o_group_desc);
				472
				473	es->s_reserved_gdt_blocks =
				474	cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
				475	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
				476
				477	return 0;
				478
				479	exit_inode:
				480	//ext3_journal_release_buffer(handle, iloc.bh);
				481	brelse(iloc.bh);
				482	exit_dindj:
				483	//ext3_journal_release_buffer(handle, dind);
				484	exit_primary:
				485	//ext3_journal_release_buffer(handle, *primary);
				486	exit_sbh:
				487	//ext3_journal_release_buffer(handle, *primary);
				488	exit_dind:
				489	brelse(dind);
				490	exit_bh:
				491	brelse(*primary);
				492
				493	ext3_debug("leaving with error %d\n", err);
				494	return err;
				495	}
				496
				497	/*
				498	* Called when we are adding a new group which has a backup copy of each of
				499	* the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
				500	* We need to add these reserved backup GDT blocks to the resize inode, so
				501	* that they are kept for future resizing and not allocated to files.
				502	*
				503	* Each reserved backup GDT block will go into a different indirect block.
				504	* The indirect blocks are actually the primary reserved GDT blocks,
				505	* so we know in advance what their block numbers are. We only get the
				506	* double-indirect block to verify it is pointing to the primary reserved
				507	* GDT blocks so we don't overwrite a data block by accident. The reserved
				508	* backup GDT blocks are stored in their reserved primary GDT block.
				509	*/
				510	static int reserve_backup_gdb(handle_t handle, struct inode inode,
				511	struct ext3_new_group_data *input)
				512	{
				513	struct super_block *sb = inode->i_sb;
				514	int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
				515	struct buffer_head **primary;
				516	struct buffer_head *dind;
				517	struct ext3_iloc iloc;
				518	unsigned long blk;
				519	__u32 data, end;
				520	int gdbackups = 0;
				521	int res, i;
				522	int err;
				523
				524	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
				525	if (!primary)
				526	return -ENOMEM;
				527
				528	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
				529	dind = sb_bread(sb, le32_to_cpu(*data));
				530	if (!dind) {
				531	err = -EIO;
				532	goto exit_free;
				533	}
				534
				535	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
				536	data = (__u32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
				537	end = (__u32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
				538
				539	/* Get each reserved primary GDT block and verify it holds backups */
				540	for (res = 0; res < reserved_gdb; res++, blk++) {
				541	if (le32_to_cpu(*data) != blk) {
				542	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	543	"reserved block %lu not at offset %ld",
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	544	blk, (long)(data - (__u32 *)dind->b_data));
				545	err = -EINVAL;
				546	goto exit_bh;
				547	}
				548	primary[res] = sb_bread(sb, blk);
				549	if (!primary[res]) {
				550	err = -EIO;
				551	goto exit_bh;
				552	}
				553	if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
				554	brelse(primary[res]);
				555	err = gdbackups;
				556	goto exit_bh;
				557	}
				558	if (++data >= end)
				559	data = (__u32 *)dind->b_data;
				560	}
				561
				562	for (i = 0; i < reserved_gdb; i++) {
				563	if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
				564	/*
				565	int j;
				566	for (j = 0; j < i; j++)
				567	ext3_journal_release_buffer(handle, primary[j]);
				568	*/
				569	goto exit_bh;
				570	}
				571	}
				572
				573	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
				574	goto exit_bh;
				575
				576	/*
				577	* Finally we can add each of the reserved backup GDT blocks from
				578	* the new group to its reserved primary GDT block.
				579	*/
				580	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
				581	for (i = 0; i < reserved_gdb; i++) {
				582	int err2;
				583	data = (__u32 *)primary[i]->b_data;
				584	/* printk("reserving backup %lu[%u] = %lu\n",
				585	primary[i]->b_blocknr, gdbackups,
				586	blk + primary[i]->b_blocknr); */
				587	data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
				588	err2 = ext3_journal_dirty_metadata(handle, primary[i]);
				589	if (!err)
				590	err = err2;
				591	}
				592	inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
				593	ext3_mark_iloc_dirty(handle, inode, &iloc);
				594
				595	exit_bh:
				596	while (--res >= 0)
				597	brelse(primary[res]);
				598	brelse(dind);
				599
				600	exit_free:
				601	kfree(primary);
				602
				603	return err;
				604	}
				605
				606	/*
				607	* Update the backup copies of the ext3 metadata. These don't need to be part
				608	* of the main resize transaction, because e2fsck will re-write them if there
				609	* is a problem (basically only OOM will cause a problem). However, we
				610	* _should_ update the backups if possible, in case the primary gets trashed
				611	* for some reason and we need to run e2fsck from a backup superblock. The
				612	* important part is that the new block and inode counts are in the backup
				613	* superblocks, and the location of the new group metadata in the GDT backups.
				614	*
				615	* We do not need lock_super() for this, because these blocks are not
				616	* otherwise touched by the filesystem code when it is mounted. We don't
				617	* need to worry about last changing from sbi->s_groups_count, because the
				618	* worst that can happen is that we do not copy the full number of backups
				619	* at this time. The resize which changed s_groups_count will backup again.
				620	*/
				621	static void update_backups(struct super_block *sb,
				622	int blk_off, char *data, int size)
				623	{
				624	struct ext3_sb_info *sbi = EXT3_SB(sb);
				625	const unsigned long last = sbi->s_groups_count;
				626	const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
				627	unsigned three = 1;
				628	unsigned five = 5;
				629	unsigned seven = 7;
				630	unsigned group;
				631	int rest = sb->s_blocksize - size;
				632	handle_t *handle;
				633	int err = 0, err2;
				634
				635	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
				636	if (IS_ERR(handle)) {
				637	group = 1;
				638	err = PTR_ERR(handle);
				639	goto exit_err;
				640	}
				641
				642	while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
				643	struct buffer_head *bh;
				644
				645	/* Out of journal space, and can't get more - abort - so sad */
				646	if (handle->h_buffer_credits == 0 &&
				647	ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
				648	(err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
				649	break;
				650
				651	bh = sb_getblk(sb, group * bpg + blk_off);
Glauber de Oliveira Costa	2973dfd	2005-10-30 15:03:05 -0800	[diff] [blame]	652	if (!bh) {
				653	err = -EIO;
				654	break;
				655	}
Glauber de Oliveira Costa	8bdac5d	2005-09-22 21:44:26 -0700	[diff] [blame]	656	ext3_debug("update metadata backup %#04lx\n",
				657	(unsigned long)bh->b_blocknr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	658	if ((err = ext3_journal_get_write_access(handle, bh)))
				659	break;
				660	lock_buffer(bh);
				661	memcpy(bh->b_data, data, size);
				662	if (rest)
				663	memset(bh->b_data + size, 0, rest);
				664	set_buffer_uptodate(bh);
				665	unlock_buffer(bh);
				666	ext3_journal_dirty_metadata(handle, bh);
				667	brelse(bh);
				668	}
				669	if ((err2 = ext3_journal_stop(handle)) && !err)
				670	err = err2;
				671
				672	/*
				673	* Ugh! Need to have e2fsck write the backup copies. It is too
				674	* late to revert the resize, we shouldn't fail just because of
				675	* the backup copies (they are only needed in case of corruption).
				676	*
				677	* However, if we got here we have a journal problem too, so we
				678	* can't really start a transaction to mark the superblock.
				679	* Chicken out and just set the flag on the hope it will be written
				680	* to disk, and if not - we will simply wait until next fsck.
				681	*/
				682	exit_err:
				683	if (err) {
				684	ext3_warning(sb, __FUNCTION__,
				685	"can't update backup for group %d (err %d), "
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	686	"forcing fsck on next reboot", group, err);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	687	sbi->s_mount_state &= ~EXT3_VALID_FS;
				688	sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
				689	mark_buffer_dirty(sbi->s_sbh);
				690	}
				691	}
				692
				693	/* Add group descriptor data to an existing or new group descriptor block.
				694	* Ensure we handle all possible error conditions _before_ we start modifying
				695	* the filesystem, because we cannot abort the transaction and not have it
				696	* write the data to disk.
				697	*
				698	* If we are on a GDT block boundary, we need to get the reserved GDT block.
				699	* Otherwise, we may need to add backup GDT blocks for a sparse group.
				700	*
				701	* We only need to hold the superblock lock while we are actually adding
				702	* in the new group's counts to the superblock. Prior to that we have
				703	* not really "added" the group at all. We re-check that we are still
				704	* adding in the last group in case things have changed since verifying.
				705	*/
				706	int ext3_group_add(struct super_block sb, struct ext3_new_group_data input)
				707	{
				708	struct ext3_sb_info *sbi = EXT3_SB(sb);
				709	struct ext3_super_block *es = sbi->s_es;
				710	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
				711	le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
				712	struct buffer_head *primary = NULL;
				713	struct ext3_group_desc *gdp;
				714	struct inode *inode = NULL;
				715	handle_t *handle;
				716	int gdb_off, gdb_num;
				717	int err, err2;
				718
				719	gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
				720	gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
				721
				722	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
				723	EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
				724	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	725	"Can't resize non-sparse filesystem further");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	726	return -EPERM;
				727	}
				728
				729	if (reserved_gdb \|\| gdb_off == 0) {
				730	if (!EXT3_HAS_COMPAT_FEATURE(sb,
				731	EXT3_FEATURE_COMPAT_RESIZE_INODE)){
				732	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	733	"No reserved GDT blocks, can't resize");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	734	return -EPERM;
				735	}
				736	inode = iget(sb, EXT3_RESIZE_INO);
				737	if (!inode \|\| is_bad_inode(inode)) {
				738	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	739	"Error opening resize inode");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	740	iput(inode);
				741	return -ENOENT;
				742	}
				743	}
				744
				745	if ((err = verify_group_input(sb, input)))
				746	goto exit_put;
				747
				748	if ((err = setup_new_group_blocks(sb, input)))
				749	goto exit_put;
				750
				751	/*
				752	* We will always be modifying at least the superblock and a GDT
				753	* block. If we are adding a group past the last current GDT block,
				754	* we will also modify the inode and the dindirect block. If we
				755	* are adding a group with superblock/GDT backups we will also
				756	* modify each of the reserved GDT dindirect blocks.
				757	*/
				758	handle = ext3_journal_start_sb(sb,
				759	ext3_bg_has_super(sb, input->group) ?
				760	3 + reserved_gdb : 4);
				761	if (IS_ERR(handle)) {
				762	err = PTR_ERR(handle);
				763	goto exit_put;
				764	}
				765
				766	lock_super(sb);
Glauber de Oliveira Costa	29ba172	2006-01-08 01:03:23 -0800	[diff] [blame]	767	if (input->group != sbi->s_groups_count) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	768	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	769	"multiple resizers run on filesystem!");
Glauber de Oliveira Costa	aa877b3	2005-11-28 13:44:02 -0800	[diff] [blame]	770	err = -EBUSY;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	771	goto exit_journal;
				772	}
				773
				774	if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
				775	goto exit_journal;
				776
				777	/*
				778	* We will only either add reserved group blocks to a backup group
				779	* or remove reserved blocks for the first group in a new group block.
				780	* Doing both would be mean more complex code, and sane people don't
				781	* use non-sparse filesystems anymore. This is already checked above.
				782	*/
				783	if (gdb_off) {
				784	primary = sbi->s_group_desc[gdb_num];
				785	if ((err = ext3_journal_get_write_access(handle, primary)))
				786	goto exit_journal;
				787
				788	if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
				789	(err = reserve_backup_gdb(handle, inode, input)))
				790	goto exit_journal;
				791	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
				792	goto exit_journal;
				793
				794	/*
				795	* OK, now we've set up the new group. Time to make it active.
				796	*
				797	* Current kernels don't lock all allocations via lock_super(),
				798	* so we have to be safe wrt. concurrent accesses the group
				799	* data. So we need to be careful to set all of the relevant
				800	* group descriptor data etc. before we enable the group.
				801	*
Glauber de Oliveira Costa	29ba172	2006-01-08 01:03:23 -0800	[diff] [blame]	802	* The key field here is sbi->s_groups_count: as long as
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	803	* that retains its old value, nobody is going to access the new
				804	* group.
				805	*
				806	* So first we update all the descriptor metadata for the new
				807	* group; then we update the total disk blocks count; then we
				808	* update the groups count to enable the group; then finally we
				809	* update the free space counts so that the system can start
				810	* using the new disk blocks.
				811	*/
				812
				813	/* Update group descriptor block for new group */
				814	gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
				815
				816	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
				817	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
				818	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
				819	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
				820	gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
				821
				822	/*
				823	* Make the new blocks and inodes valid next. We do this before
				824	* increasing the group count so that once the group is enabled,
				825	* all of its blocks and inodes are already valid.
				826	*
				827	* We always allocate group-by-group, then block-by-block or
				828	* inode-by-inode within a group, so enabling these
				829	* blocks/inodes before the group is live won't actually let us
				830	* allocate the new space yet.
				831	*/
				832	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
				833	input->blocks_count);
				834	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
				835	EXT3_INODES_PER_GROUP(sb));
				836
				837	/*
				838	* We need to protect s_groups_count against other CPUs seeing
				839	* inconsistent state in the superblock.
				840	*
				841	* The precise rules we use are:
				842	*
				843	* * Writers of s_groups_count must hold lock_super
				844	* AND
				845	* * Writers must perform a smp_wmb() after updating all dependent
				846	* data and before modifying the groups count
				847	*
				848	* * Readers must hold lock_super() over the access
				849	* OR
				850	* * Readers must perform an smp_rmb() after reading the groups count
				851	* and before reading any dependent data.
				852	*
				853	* NB. These rules can be relaxed when checking the group count
				854	* while freeing data, as we can only allocate from a block
				855	* group after serialising against the group count, and we can
				856	* only then free after serialising in turn against that
				857	* allocation.
				858	*/
				859	smp_wmb();
				860
				861	/* Update the global fs size fields */
Glauber de Oliveira Costa	29ba172	2006-01-08 01:03:23 -0800	[diff] [blame]	862	sbi->s_groups_count++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	863
				864	ext3_journal_dirty_metadata(handle, primary);
				865
				866	/* Update the reserved block counts only once the new group is
				867	* active. */
				868	es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) +
				869	input->reserved_blocks);
				870
				871	/* Update the free space counts */
				872	percpu_counter_mod(&sbi->s_freeblocks_counter,
				873	input->free_blocks_count);
				874	percpu_counter_mod(&sbi->s_freeinodes_counter,
				875	EXT3_INODES_PER_GROUP(sb));
				876
Glauber de Oliveira Costa	29ba172	2006-01-08 01:03:23 -0800	[diff] [blame]	877	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	878	sb->s_dirt = 1;
				879
				880	exit_journal:
				881	unlock_super(sb);
				882	if ((err2 = ext3_journal_stop(handle)) && !err)
				883	err = err2;
				884	if (!err) {
				885	update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
				886	sizeof(struct ext3_super_block));
				887	update_backups(sb, primary->b_blocknr, primary->b_data,
				888	primary->b_size);
				889	}
				890	exit_put:
				891	iput(inode);
				892	return err;
				893	} /* ext3_group_add */
				894
				895	/* Extend the filesystem to the new number of blocks specified. This entry
				896	* point is only used to extend the current filesystem to the end of the last
				897	* existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
				898	* for emergencies (because it has no dependencies on reserved blocks).
				899	*
				900	* If we _really_ wanted, we could use default values to call ext3_group_add()
				901	* allow the "remount" trick to work for arbitrary resizing, assuming enough
				902	* GDT blocks are reserved to grow to the desired size.
				903	*/
				904	int ext3_group_extend(struct super_block sb, struct ext3_super_block es,
				905	unsigned long n_blocks_count)
				906	{
				907	unsigned long o_blocks_count;
				908	unsigned long o_groups_count;
				909	unsigned long last;
				910	int add;
				911	struct buffer_head * bh;
				912	handle_t *handle;
				913	int err, freed_blocks;
				914
				915	/* We don't need to worry about locking wrt other resizers just
				916	* yet: we're going to revalidate es->s_blocks_count after
				917	* taking lock_super() below. */
				918	o_blocks_count = le32_to_cpu(es->s_blocks_count);
				919	o_groups_count = EXT3_SB(sb)->s_groups_count;
				920
				921	if (test_opt(sb, DEBUG))
				922	printk(KERN_DEBUG "EXT3-fs: extending last group from %lu to %lu blocks\n",
				923	o_blocks_count, n_blocks_count);
				924
				925	if (n_blocks_count == 0 \|\| n_blocks_count == o_blocks_count)
				926	return 0;
				927
				928	if (n_blocks_count < o_blocks_count) {
				929	ext3_warning(sb, __FUNCTION__,
				930	"can't shrink FS - resize aborted");
				931	return -EBUSY;
				932	}
				933
				934	/* Handle the remaining blocks in the last group only. */
				935	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
				936	EXT3_BLOCKS_PER_GROUP(sb);
				937
				938	if (last == 0) {
				939	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	940	"need to use ext2online to resize further");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	941	return -EPERM;
				942	}
				943
				944	add = EXT3_BLOCKS_PER_GROUP(sb) - last;
				945
				946	if (o_blocks_count + add > n_blocks_count)
				947	add = n_blocks_count - o_blocks_count;
				948
				949	if (o_blocks_count + add < n_blocks_count)
				950	ext3_warning(sb, __FUNCTION__,
				951	"will only finish group (%lu blocks, %u new)",
				952	o_blocks_count + add, add);
				953
				954	/* See if the device is actually as big as what was requested */
				955	bh = sb_bread(sb, o_blocks_count + add -1);
				956	if (!bh) {
				957	ext3_warning(sb, __FUNCTION__,
				958	"can't read last block, resize aborted");
				959	return -ENOSPC;
				960	}
				961	brelse(bh);
				962
				963	/* We will update the superblock, one block bitmap, and
				964	* one group descriptor via ext3_free_blocks().
				965	*/
				966	handle = ext3_journal_start_sb(sb, 3);
				967	if (IS_ERR(handle)) {
				968	err = PTR_ERR(handle);
				969	ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
				970	goto exit_put;
				971	}
				972
				973	lock_super(sb);
				974	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
				975	ext3_warning(sb, __FUNCTION__,
Glauber de Oliveira Costa	9f40668	2006-01-08 01:03:22 -0800	[diff] [blame]	976	"multiple resizers run on filesystem!");
Ananiev, Leonid I	389ed39	2006-04-10 22:54:38 -0700	[diff] [blame]	977	unlock_super(sb);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	978	err = -EBUSY;
				979	goto exit_put;
				980	}
				981
				982	if ((err = ext3_journal_get_write_access(handle,
				983	EXT3_SB(sb)->s_sbh))) {
				984	ext3_warning(sb, __FUNCTION__,
				985	"error %d on journal write access", err);
				986	unlock_super(sb);
				987	ext3_journal_stop(handle);
				988	goto exit_put;
				989	}
				990	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
				991	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
				992	sb->s_dirt = 1;
				993	unlock_super(sb);
				994	ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count,
				995	o_blocks_count + add);
				996	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
				997	ext3_debug("freed blocks %ld through %ld\n", o_blocks_count,
				998	o_blocks_count + add);
				999	if ((err = ext3_journal_stop(handle)))
				1000	goto exit_put;
				1001	if (test_opt(sb, DEBUG))
				1002	printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
				1003	le32_to_cpu(es->s_blocks_count));
				1004	update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
				1005	sizeof(struct ext3_super_block));
				1006	exit_put:
				1007	return err;
				1008	} /* ext3_group_extend */