Blame - fs/ext3/resize.c - kernel/msm-4.9

blob: 2c9f81278d5dab6749ef9001a8b57808e9dfb35d [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	* linux/fs/ext3/resize.c
				3	*
				4	* Support for resizing an ext3 filesystem while it is mounted.
				5	*
				6	* Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
				7	*
				8	* This could probably be made into a module, because it is not often in use.
				9	*/
				10
				11	#include <linux/config.h>
				12
				13	#define EXT3FS_DEBUG
				14
				15	#include <linux/sched.h>
				16	#include <linux/smp_lock.h>
				17	#include <linux/ext3_jbd.h>
				18
				19	#include <linux/errno.h>
				20	#include <linux/slab.h>
				21
				22
				23	#define outside(b, first, last) ((b) < (first) \|\| (b) >= (last))
				24	#define inside(b, first, last) ((b) >= (first) && (b) < (last))
				25
				26	static int verify_group_input(struct super_block *sb,
				27	struct ext3_new_group_data *input)
				28	{
				29	struct ext3_sb_info *sbi = EXT3_SB(sb);
				30	struct ext3_super_block *es = sbi->s_es;
				31	unsigned start = le32_to_cpu(es->s_blocks_count);
				32	unsigned end = start + input->blocks_count;
				33	unsigned group = input->group;
				34	unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group;
				35	unsigned overhead = ext3_bg_has_super(sb, group) ?
				36	(1 + ext3_bg_num_gdb(sb, group) +
				37	le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
				38	unsigned metaend = start + overhead;
				39	struct buffer_head *bh = NULL;
				40	int free_blocks_count;
				41	int err = -EINVAL;
				42
				43	input->free_blocks_count = free_blocks_count =
				44	input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
				45
				46	if (test_opt(sb, DEBUG))
				47	printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
				48	"(%d free, %u reserved)\n",
				49	ext3_bg_has_super(sb, input->group) ? "normal" :
				50	"no-super", input->group, input->blocks_count,
				51	free_blocks_count, input->reserved_blocks);
				52
				53	if (group != sbi->s_groups_count)
				54	ext3_warning(sb, __FUNCTION__,
				55	"Cannot add at group %u (only %lu groups)",
				56	input->group, sbi->s_groups_count);
				57	else if ((start - le32_to_cpu(es->s_first_data_block)) %
				58	EXT3_BLOCKS_PER_GROUP(sb))
				59	ext3_warning(sb, __FUNCTION__, "Last group not full");
				60	else if (input->reserved_blocks > input->blocks_count / 5)
				61	ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
				62	input->reserved_blocks);
				63	else if (free_blocks_count < 0)
				64	ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
				65	input->blocks_count);
				66	else if (!(bh = sb_bread(sb, end - 1)))
				67	ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)",
				68	end - 1);
				69	else if (outside(input->block_bitmap, start, end))
				70	ext3_warning(sb, __FUNCTION__,
				71	"Block bitmap not in group (block %u)",
				72	input->block_bitmap);
				73	else if (outside(input->inode_bitmap, start, end))
				74	ext3_warning(sb, __FUNCTION__,
				75	"Inode bitmap not in group (block %u)",
				76	input->inode_bitmap);
				77	else if (outside(input->inode_table, start, end) \|\|
				78	outside(itend - 1, start, end))
				79	ext3_warning(sb, __FUNCTION__,
				80	"Inode table not in group (blocks %u-%u)",
				81	input->inode_table, itend - 1);
				82	else if (input->inode_bitmap == input->block_bitmap)
				83	ext3_warning(sb, __FUNCTION__,
				84	"Block bitmap same as inode bitmap (%u)",
				85	input->block_bitmap);
				86	else if (inside(input->block_bitmap, input->inode_table, itend))
				87	ext3_warning(sb, __FUNCTION__,
				88	"Block bitmap (%u) in inode table (%u-%u)",
				89	input->block_bitmap, input->inode_table, itend-1);
				90	else if (inside(input->inode_bitmap, input->inode_table, itend))
				91	ext3_warning(sb, __FUNCTION__,
				92	"Inode bitmap (%u) in inode table (%u-%u)",
				93	input->inode_bitmap, input->inode_table, itend-1);
				94	else if (inside(input->block_bitmap, start, metaend))
				95	ext3_warning(sb, __FUNCTION__,
				96	"Block bitmap (%u) in GDT table (%u-%u)",
				97	input->block_bitmap, start, metaend - 1);
				98	else if (inside(input->inode_bitmap, start, metaend))
				99	ext3_warning(sb, __FUNCTION__,
				100	"Inode bitmap (%u) in GDT table (%u-%u)",
				101	input->inode_bitmap, start, metaend - 1);
				102	else if (inside(input->inode_table, start, metaend) \|\|
				103	inside(itend - 1, start, metaend))
				104	ext3_warning(sb, __FUNCTION__,
				105	"Inode table (%u-%u) overlaps GDT table (%u-%u)",
				106	input->inode_table, itend - 1, start, metaend - 1);
				107	else
				108	err = 0;
				109	brelse(bh);
				110
				111	return err;
				112	}
				113
				114	static struct buffer_head bclean(handle_t handle, struct super_block *sb,
				115	unsigned long blk)
				116	{
				117	struct buffer_head *bh;
				118	int err;
				119
				120	bh = sb_getblk(sb, blk);
				121	if ((err = ext3_journal_get_write_access(handle, bh))) {
				122	brelse(bh);
				123	bh = ERR_PTR(err);
				124	} else {
				125	lock_buffer(bh);
				126	memset(bh->b_data, 0, sb->s_blocksize);
				127	set_buffer_uptodate(bh);
				128	unlock_buffer(bh);
				129	}
				130
				131	return bh;
				132	}
				133
				134	/*
				135	* To avoid calling the atomic setbit hundreds or thousands of times, we only
				136	* need to use it within a single byte (to ensure we get endianness right).
				137	* We can use memset for the rest of the bitmap as there are no other users.
				138	*/
				139	static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
				140	{
				141	int i;
				142
				143	if (start_bit >= end_bit)
				144	return;
				145
				146	ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
				147	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
				148	ext3_set_bit(i, bitmap);
				149	if (i < end_bit)
				150	memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
				151	}
				152
				153	/*
				154	* Set up the block and inode bitmaps, and the inode table for the new group.
				155	* This doesn't need to be part of the main transaction, since we are only
				156	* changing blocks outside the actual filesystem. We still do journaling to
				157	* ensure the recovery is correct in case of a failure just after resize.
				158	* If any part of this fails, we simply abort the resize.
				159	*/
				160	static int setup_new_group_blocks(struct super_block *sb,
				161	struct ext3_new_group_data *input)
				162	{
				163	struct ext3_sb_info *sbi = EXT3_SB(sb);
				164	unsigned long start = input->group * sbi->s_blocks_per_group +
				165	le32_to_cpu(sbi->s_es->s_first_data_block);
				166	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
				167	le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
				168	unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
				169	struct buffer_head *bh;
				170	handle_t *handle;
				171	unsigned long block;
				172	int bit;
				173	int i;
				174	int err = 0, err2;
				175
				176	handle = ext3_journal_start_sb(sb, reserved_gdb + gdblocks +
				177	2 + sbi->s_itb_per_group);
				178	if (IS_ERR(handle))
				179	return PTR_ERR(handle);
				180
				181	lock_super(sb);
				182	if (input->group != sbi->s_groups_count) {
				183	err = -EBUSY;
				184	goto exit_journal;
				185	}
				186
				187	if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
				188	err = PTR_ERR(bh);
				189	goto exit_journal;
				190	}
				191
				192	if (ext3_bg_has_super(sb, input->group)) {
				193	ext3_debug("mark backup superblock %#04lx (+0)\n", start);
				194	ext3_set_bit(0, bh->b_data);
				195	}
				196
				197	/* Copy all of the GDT blocks into the backup in this group */
				198	for (i = 0, bit = 1, block = start + 1;
				199	i < gdblocks; i++, block++, bit++) {
				200	struct buffer_head *gdb;
				201
				202	ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
				203
				204	gdb = sb_getblk(sb, block);
				205	if ((err = ext3_journal_get_write_access(handle, gdb))) {
				206	brelse(gdb);
				207	goto exit_bh;
				208	}
				209	lock_buffer(bh);
				210	memcpy(gdb->b_data, sbi->s_group_desc[i], bh->b_size);
				211	set_buffer_uptodate(gdb);
				212	unlock_buffer(bh);
				213	ext3_journal_dirty_metadata(handle, gdb);
				214	ext3_set_bit(bit, bh->b_data);
				215	brelse(gdb);
				216	}
				217
				218	/* Zero out all of the reserved backup group descriptor table blocks */
				219	for (i = 0, bit = gdblocks + 1, block = start + bit;
				220	i < reserved_gdb; i++, block++, bit++) {
				221	struct buffer_head *gdb;
				222
				223	ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
				224
				225	if (IS_ERR(gdb = bclean(handle, sb, block))) {
				226	err = PTR_ERR(bh);
				227	goto exit_bh;
				228	}
				229	ext3_journal_dirty_metadata(handle, gdb);
				230	ext3_set_bit(bit, bh->b_data);
				231	brelse(gdb);
				232	}
				233	ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
				234	input->block_bitmap - start);
				235	ext3_set_bit(input->block_bitmap - start, bh->b_data);
				236	ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
				237	input->inode_bitmap - start);
				238	ext3_set_bit(input->inode_bitmap - start, bh->b_data);
				239
				240	/* Zero out all of the inode table blocks */
				241	for (i = 0, block = input->inode_table, bit = block - start;
				242	i < sbi->s_itb_per_group; i++, bit++, block++) {
				243	struct buffer_head *it;
				244
				245	ext3_debug("clear inode block %#04x (+%ld)\n", block, bit);
				246	if (IS_ERR(it = bclean(handle, sb, block))) {
				247	err = PTR_ERR(it);
				248	goto exit_bh;
				249	}
				250	ext3_journal_dirty_metadata(handle, it);
				251	brelse(it);
				252	ext3_set_bit(bit, bh->b_data);
				253	}
				254	mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
				255	bh->b_data);
				256	ext3_journal_dirty_metadata(handle, bh);
				257	brelse(bh);
				258
				259	/* Mark unused entries in inode bitmap used */
				260	ext3_debug("clear inode bitmap %#04x (+%ld)\n",
				261	input->inode_bitmap, input->inode_bitmap - start);
				262	if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
				263	err = PTR_ERR(bh);
				264	goto exit_journal;
				265	}
				266
				267	mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
				268	bh->b_data);
				269	ext3_journal_dirty_metadata(handle, bh);
				270	exit_bh:
				271	brelse(bh);
				272
				273	exit_journal:
				274	unlock_super(sb);
				275	if ((err2 = ext3_journal_stop(handle)) && !err)
				276	err = err2;
				277
				278	return err;
				279	}
				280
				281	/*
				282	* Iterate through the groups which hold BACKUP superblock/GDT copies in an
				283	* ext3 filesystem. The counters should be initialized to 1, 5, and 7 before
				284	* calling this for the first time. In a sparse filesystem it will be the
				285	* sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
				286	* For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
				287	*/
				288	static unsigned ext3_list_backups(struct super_block sb, unsigned three,
				289	unsigned five, unsigned seven)
				290	{
				291	unsigned *min = three;
				292	int mult = 3;
				293	unsigned ret;
				294
				295	if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
				296	EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
				297	ret = *min;
				298	*min += 1;
				299	return ret;
				300	}
				301
				302	if (five < min) {
				303	min = five;
				304	mult = 5;
				305	}
				306	if (seven < min) {
				307	min = seven;
				308	mult = 7;
				309	}
				310
				311	ret = *min;
				312	min = mult;
				313
				314	return ret;
				315	}
				316
				317	/*
				318	* Check that all of the backup GDT blocks are held in the primary GDT block.
				319	* It is assumed that they are stored in group order. Returns the number of
				320	* groups in current filesystem that have BACKUPS, or -ve error code.
				321	*/
				322	static int verify_reserved_gdb(struct super_block *sb,
				323	struct buffer_head *primary)
				324	{
				325	const unsigned long blk = primary->b_blocknr;
				326	const unsigned long end = EXT3_SB(sb)->s_groups_count;
				327	unsigned three = 1;
				328	unsigned five = 5;
				329	unsigned seven = 7;
				330	unsigned grp;
				331	__u32 p = (__u32 )primary->b_data;
				332	int gdbackups = 0;
				333
				334	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
				335	if (le32_to_cpu(p++) != grp EXT3_BLOCKS_PER_GROUP(sb) + blk){
				336	ext3_warning(sb, __FUNCTION__,
				337	"reserved GDT %ld missing grp %d (%ld)\n",
				338	blk, grp,
				339	grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
				340	return -EINVAL;
				341	}
				342	if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
				343	return -EFBIG;
				344	}
				345
				346	return gdbackups;
				347	}
				348
				349	/*
				350	* Called when we need to bring a reserved group descriptor table block into
				351	* use from the resize inode. The primary copy of the new GDT block currently
				352	* is an indirect block (under the double indirect block in the resize inode).
				353	* The new backup GDT blocks will be stored as leaf blocks in this indirect
				354	* block, in group order. Even though we know all the block numbers we need,
				355	* we check to ensure that the resize inode has actually reserved these blocks.
				356	*
				357	* Don't need to update the block bitmaps because the blocks are still in use.
				358	*
				359	* We get all of the error cases out of the way, so that we are sure to not
				360	* fail once we start modifying the data on disk, because JBD has no rollback.
				361	*/
				362	static int add_new_gdb(handle_t handle, struct inode inode,
				363	struct ext3_new_group_data *input,
				364	struct buffer_head **primary)
				365	{
				366	struct super_block *sb = inode->i_sb;
				367	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
				368	unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
				369	unsigned long gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
				370	struct buffer_head o_group_desc, n_group_desc;
				371	struct buffer_head *dind;
				372	int gdbackups;
				373	struct ext3_iloc iloc;
				374	__u32 *data;
				375	int err;
				376
				377	if (test_opt(sb, DEBUG))
				378	printk(KERN_DEBUG
				379	"EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
				380	gdb_num);
				381
				382	/*
				383	* If we are not using the primary superblock/GDT copy don't resize,
				384	* because the user tools have no way of handling this. Probably a
				385	* bad time to do it anyways.
				386	*/
				387	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
				388	le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
				389	ext3_warning(sb, __FUNCTION__,
				390	"won't resize using backup superblock at %llu\n",
				391	(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
				392	return -EPERM;
				393	}
				394
				395	*primary = sb_bread(sb, gdblock);
				396	if (!*primary)
				397	return -EIO;
				398
				399	if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
				400	err = gdbackups;
				401	goto exit_bh;
				402	}
				403
				404	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
				405	dind = sb_bread(sb, le32_to_cpu(*data));
				406	if (!dind) {
				407	err = -EIO;
				408	goto exit_bh;
				409	}
				410
				411	data = (__u32 *)dind->b_data;
				412	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
				413	ext3_warning(sb, __FUNCTION__,
				414	"new group %u GDT block %lu not reserved\n",
				415	input->group, gdblock);
				416	err = -EINVAL;
				417	goto exit_dind;
				418	}
				419
				420	if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
				421	goto exit_dind;
				422
				423	if ((err = ext3_journal_get_write_access(handle, *primary)))
				424	goto exit_sbh;
				425
				426	if ((err = ext3_journal_get_write_access(handle, dind)))
				427	goto exit_primary;
				428
				429	/* ext3_reserve_inode_write() gets a reference on the iloc */
				430	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
				431	goto exit_dindj;
				432
				433	n_group_desc = (struct buffer_head *)kmalloc((gdb_num + 1)
				434	sizeof(struct buffer_head *), GFP_KERNEL);
				435	if (!n_group_desc) {
				436	err = -ENOMEM;
				437	ext3_warning (sb, __FUNCTION__,
				438	"not enough memory for %lu groups", gdb_num + 1);
				439	goto exit_inode;
				440	}
				441
				442	/*
				443	* Finally, we have all of the possible failures behind us...
				444	*
				445	* Remove new GDT block from inode double-indirect block and clear out
				446	* the new GDT block for use (which also "frees" the backup GDT blocks
				447	* from the reserved inode). We don't need to change the bitmaps for
				448	* these blocks, because they are marked as in-use from being in the
				449	* reserved inode, and will become GDT blocks (primary and backup).
				450	*/
				451	data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
				452	ext3_journal_dirty_metadata(handle, dind);
				453	brelse(dind);
				454	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
				455	ext3_mark_iloc_dirty(handle, inode, &iloc);
				456	memset((*primary)->b_data, 0, sb->s_blocksize);
				457	ext3_journal_dirty_metadata(handle, *primary);
				458
				459	o_group_desc = EXT3_SB(sb)->s_group_desc;
				460	memcpy(n_group_desc, o_group_desc,
				461	EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
				462	n_group_desc[gdb_num] = *primary;
				463	EXT3_SB(sb)->s_group_desc = n_group_desc;
				464	EXT3_SB(sb)->s_gdb_count++;
				465	kfree(o_group_desc);
				466
				467	es->s_reserved_gdt_blocks =
				468	cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
				469	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
				470
				471	return 0;
				472
				473	exit_inode:
				474	//ext3_journal_release_buffer(handle, iloc.bh);
				475	brelse(iloc.bh);
				476	exit_dindj:
				477	//ext3_journal_release_buffer(handle, dind);
				478	exit_primary:
				479	//ext3_journal_release_buffer(handle, *primary);
				480	exit_sbh:
				481	//ext3_journal_release_buffer(handle, *primary);
				482	exit_dind:
				483	brelse(dind);
				484	exit_bh:
				485	brelse(*primary);
				486
				487	ext3_debug("leaving with error %d\n", err);
				488	return err;
				489	}
				490
				491	/*
				492	* Called when we are adding a new group which has a backup copy of each of
				493	* the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
				494	* We need to add these reserved backup GDT blocks to the resize inode, so
				495	* that they are kept for future resizing and not allocated to files.
				496	*
				497	* Each reserved backup GDT block will go into a different indirect block.
				498	* The indirect blocks are actually the primary reserved GDT blocks,
				499	* so we know in advance what their block numbers are. We only get the
				500	* double-indirect block to verify it is pointing to the primary reserved
				501	* GDT blocks so we don't overwrite a data block by accident. The reserved
				502	* backup GDT blocks are stored in their reserved primary GDT block.
				503	*/
				504	static int reserve_backup_gdb(handle_t handle, struct inode inode,
				505	struct ext3_new_group_data *input)
				506	{
				507	struct super_block *sb = inode->i_sb;
				508	int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
				509	struct buffer_head **primary;
				510	struct buffer_head *dind;
				511	struct ext3_iloc iloc;
				512	unsigned long blk;
				513	__u32 data, end;
				514	int gdbackups = 0;
				515	int res, i;
				516	int err;
				517
				518	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
				519	if (!primary)
				520	return -ENOMEM;
				521
				522	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
				523	dind = sb_bread(sb, le32_to_cpu(*data));
				524	if (!dind) {
				525	err = -EIO;
				526	goto exit_free;
				527	}
				528
				529	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
				530	data = (__u32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
				531	end = (__u32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
				532
				533	/* Get each reserved primary GDT block and verify it holds backups */
				534	for (res = 0; res < reserved_gdb; res++, blk++) {
				535	if (le32_to_cpu(*data) != blk) {
				536	ext3_warning(sb, __FUNCTION__,
				537	"reserved block %lu not at offset %ld\n",
				538	blk, (long)(data - (__u32 *)dind->b_data));
				539	err = -EINVAL;
				540	goto exit_bh;
				541	}
				542	primary[res] = sb_bread(sb, blk);
				543	if (!primary[res]) {
				544	err = -EIO;
				545	goto exit_bh;
				546	}
				547	if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
				548	brelse(primary[res]);
				549	err = gdbackups;
				550	goto exit_bh;
				551	}
				552	if (++data >= end)
				553	data = (__u32 *)dind->b_data;
				554	}
				555
				556	for (i = 0; i < reserved_gdb; i++) {
				557	if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
				558	/*
				559	int j;
				560	for (j = 0; j < i; j++)
				561	ext3_journal_release_buffer(handle, primary[j]);
				562	*/
				563	goto exit_bh;
				564	}
				565	}
				566
				567	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
				568	goto exit_bh;
				569
				570	/*
				571	* Finally we can add each of the reserved backup GDT blocks from
				572	* the new group to its reserved primary GDT block.
				573	*/
				574	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
				575	for (i = 0; i < reserved_gdb; i++) {
				576	int err2;
				577	data = (__u32 *)primary[i]->b_data;
				578	/* printk("reserving backup %lu[%u] = %lu\n",
				579	primary[i]->b_blocknr, gdbackups,
				580	blk + primary[i]->b_blocknr); */
				581	data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
				582	err2 = ext3_journal_dirty_metadata(handle, primary[i]);
				583	if (!err)
				584	err = err2;
				585	}
				586	inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
				587	ext3_mark_iloc_dirty(handle, inode, &iloc);
				588
				589	exit_bh:
				590	while (--res >= 0)
				591	brelse(primary[res]);
				592	brelse(dind);
				593
				594	exit_free:
				595	kfree(primary);
				596
				597	return err;
				598	}
				599
				600	/*
				601	* Update the backup copies of the ext3 metadata. These don't need to be part
				602	* of the main resize transaction, because e2fsck will re-write them if there
				603	* is a problem (basically only OOM will cause a problem). However, we
				604	* _should_ update the backups if possible, in case the primary gets trashed
				605	* for some reason and we need to run e2fsck from a backup superblock. The
				606	* important part is that the new block and inode counts are in the backup
				607	* superblocks, and the location of the new group metadata in the GDT backups.
				608	*
				609	* We do not need lock_super() for this, because these blocks are not
				610	* otherwise touched by the filesystem code when it is mounted. We don't
				611	* need to worry about last changing from sbi->s_groups_count, because the
				612	* worst that can happen is that we do not copy the full number of backups
				613	* at this time. The resize which changed s_groups_count will backup again.
				614	*/
				615	static void update_backups(struct super_block *sb,
				616	int blk_off, char *data, int size)
				617	{
				618	struct ext3_sb_info *sbi = EXT3_SB(sb);
				619	const unsigned long last = sbi->s_groups_count;
				620	const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
				621	unsigned three = 1;
				622	unsigned five = 5;
				623	unsigned seven = 7;
				624	unsigned group;
				625	int rest = sb->s_blocksize - size;
				626	handle_t *handle;
				627	int err = 0, err2;
				628
				629	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
				630	if (IS_ERR(handle)) {
				631	group = 1;
				632	err = PTR_ERR(handle);
				633	goto exit_err;
				634	}
				635
				636	while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
				637	struct buffer_head *bh;
				638
				639	/* Out of journal space, and can't get more - abort - so sad */
				640	if (handle->h_buffer_credits == 0 &&
				641	ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
				642	(err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
				643	break;
				644
				645	bh = sb_getblk(sb, group * bpg + blk_off);
				646	ext3_debug(sb, __FUNCTION__, "update metadata backup %#04lx\n",
				647	bh->b_blocknr);
				648	if ((err = ext3_journal_get_write_access(handle, bh)))
				649	break;
				650	lock_buffer(bh);
				651	memcpy(bh->b_data, data, size);
				652	if (rest)
				653	memset(bh->b_data + size, 0, rest);
				654	set_buffer_uptodate(bh);
				655	unlock_buffer(bh);
				656	ext3_journal_dirty_metadata(handle, bh);
				657	brelse(bh);
				658	}
				659	if ((err2 = ext3_journal_stop(handle)) && !err)
				660	err = err2;
				661
				662	/*
				663	* Ugh! Need to have e2fsck write the backup copies. It is too
				664	* late to revert the resize, we shouldn't fail just because of
				665	* the backup copies (they are only needed in case of corruption).
				666	*
				667	* However, if we got here we have a journal problem too, so we
				668	* can't really start a transaction to mark the superblock.
				669	* Chicken out and just set the flag on the hope it will be written
				670	* to disk, and if not - we will simply wait until next fsck.
				671	*/
				672	exit_err:
				673	if (err) {
				674	ext3_warning(sb, __FUNCTION__,
				675	"can't update backup for group %d (err %d), "
				676	"forcing fsck on next reboot\n", group, err);
				677	sbi->s_mount_state &= ~EXT3_VALID_FS;
				678	sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
				679	mark_buffer_dirty(sbi->s_sbh);
				680	}
				681	}
				682
				683	/* Add group descriptor data to an existing or new group descriptor block.
				684	* Ensure we handle all possible error conditions _before_ we start modifying
				685	* the filesystem, because we cannot abort the transaction and not have it
				686	* write the data to disk.
				687	*
				688	* If we are on a GDT block boundary, we need to get the reserved GDT block.
				689	* Otherwise, we may need to add backup GDT blocks for a sparse group.
				690	*
				691	* We only need to hold the superblock lock while we are actually adding
				692	* in the new group's counts to the superblock. Prior to that we have
				693	* not really "added" the group at all. We re-check that we are still
				694	* adding in the last group in case things have changed since verifying.
				695	*/
				696	int ext3_group_add(struct super_block sb, struct ext3_new_group_data input)
				697	{
				698	struct ext3_sb_info *sbi = EXT3_SB(sb);
				699	struct ext3_super_block *es = sbi->s_es;
				700	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
				701	le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
				702	struct buffer_head *primary = NULL;
				703	struct ext3_group_desc *gdp;
				704	struct inode *inode = NULL;
				705	handle_t *handle;
				706	int gdb_off, gdb_num;
				707	int err, err2;
				708
				709	gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
				710	gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
				711
				712	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
				713	EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
				714	ext3_warning(sb, __FUNCTION__,
				715	"Can't resize non-sparse filesystem further\n");
				716	return -EPERM;
				717	}
				718
				719	if (reserved_gdb \|\| gdb_off == 0) {
				720	if (!EXT3_HAS_COMPAT_FEATURE(sb,
				721	EXT3_FEATURE_COMPAT_RESIZE_INODE)){
				722	ext3_warning(sb, __FUNCTION__,
				723	"No reserved GDT blocks, can't resize\n");
				724	return -EPERM;
				725	}
				726	inode = iget(sb, EXT3_RESIZE_INO);
				727	if (!inode \|\| is_bad_inode(inode)) {
				728	ext3_warning(sb, __FUNCTION__,
				729	"Error opening resize inode\n");
				730	iput(inode);
				731	return -ENOENT;
				732	}
				733	}
				734
				735	if ((err = verify_group_input(sb, input)))
				736	goto exit_put;
				737
				738	if ((err = setup_new_group_blocks(sb, input)))
				739	goto exit_put;
				740
				741	/*
				742	* We will always be modifying at least the superblock and a GDT
				743	* block. If we are adding a group past the last current GDT block,
				744	* we will also modify the inode and the dindirect block. If we
				745	* are adding a group with superblock/GDT backups we will also
				746	* modify each of the reserved GDT dindirect blocks.
				747	*/
				748	handle = ext3_journal_start_sb(sb,
				749	ext3_bg_has_super(sb, input->group) ?
				750	3 + reserved_gdb : 4);
				751	if (IS_ERR(handle)) {
				752	err = PTR_ERR(handle);
				753	goto exit_put;
				754	}
				755
				756	lock_super(sb);
				757	if (input->group != EXT3_SB(sb)->s_groups_count) {
				758	ext3_warning(sb, __FUNCTION__,
				759	"multiple resizers run on filesystem!\n");
				760	goto exit_journal;
				761	}
				762
				763	if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
				764	goto exit_journal;
				765
				766	/*
				767	* We will only either add reserved group blocks to a backup group
				768	* or remove reserved blocks for the first group in a new group block.
				769	* Doing both would be mean more complex code, and sane people don't
				770	* use non-sparse filesystems anymore. This is already checked above.
				771	*/
				772	if (gdb_off) {
				773	primary = sbi->s_group_desc[gdb_num];
				774	if ((err = ext3_journal_get_write_access(handle, primary)))
				775	goto exit_journal;
				776
				777	if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
				778	(err = reserve_backup_gdb(handle, inode, input)))
				779	goto exit_journal;
				780	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
				781	goto exit_journal;
				782
				783	/*
				784	* OK, now we've set up the new group. Time to make it active.
				785	*
				786	* Current kernels don't lock all allocations via lock_super(),
				787	* so we have to be safe wrt. concurrent accesses the group
				788	* data. So we need to be careful to set all of the relevant
				789	* group descriptor data etc. before we enable the group.
				790	*
				791	* The key field here is EXT3_SB(sb)->s_groups_count: as long as
				792	* that retains its old value, nobody is going to access the new
				793	* group.
				794	*
				795	* So first we update all the descriptor metadata for the new
				796	* group; then we update the total disk blocks count; then we
				797	* update the groups count to enable the group; then finally we
				798	* update the free space counts so that the system can start
				799	* using the new disk blocks.
				800	*/
				801
				802	/* Update group descriptor block for new group */
				803	gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
				804
				805	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
				806	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
				807	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
				808	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
				809	gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
				810
				811	/*
				812	* Make the new blocks and inodes valid next. We do this before
				813	* increasing the group count so that once the group is enabled,
				814	* all of its blocks and inodes are already valid.
				815	*
				816	* We always allocate group-by-group, then block-by-block or
				817	* inode-by-inode within a group, so enabling these
				818	* blocks/inodes before the group is live won't actually let us
				819	* allocate the new space yet.
				820	*/
				821	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
				822	input->blocks_count);
				823	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
				824	EXT3_INODES_PER_GROUP(sb));
				825
				826	/*
				827	* We need to protect s_groups_count against other CPUs seeing
				828	* inconsistent state in the superblock.
				829	*
				830	* The precise rules we use are:
				831	*
				832	* * Writers of s_groups_count must hold lock_super
				833	* AND
				834	* * Writers must perform a smp_wmb() after updating all dependent
				835	* data and before modifying the groups count
				836	*
				837	* * Readers must hold lock_super() over the access
				838	* OR
				839	* * Readers must perform an smp_rmb() after reading the groups count
				840	* and before reading any dependent data.
				841	*
				842	* NB. These rules can be relaxed when checking the group count
				843	* while freeing data, as we can only allocate from a block
				844	* group after serialising against the group count, and we can
				845	* only then free after serialising in turn against that
				846	* allocation.
				847	*/
				848	smp_wmb();
				849
				850	/* Update the global fs size fields */
				851	EXT3_SB(sb)->s_groups_count++;
				852
				853	ext3_journal_dirty_metadata(handle, primary);
				854
				855	/* Update the reserved block counts only once the new group is
				856	* active. */
				857	es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) +
				858	input->reserved_blocks);
				859
				860	/* Update the free space counts */
				861	percpu_counter_mod(&sbi->s_freeblocks_counter,
				862	input->free_blocks_count);
				863	percpu_counter_mod(&sbi->s_freeinodes_counter,
				864	EXT3_INODES_PER_GROUP(sb));
				865
				866	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
				867	sb->s_dirt = 1;
				868
				869	exit_journal:
				870	unlock_super(sb);
				871	if ((err2 = ext3_journal_stop(handle)) && !err)
				872	err = err2;
				873	if (!err) {
				874	update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
				875	sizeof(struct ext3_super_block));
				876	update_backups(sb, primary->b_blocknr, primary->b_data,
				877	primary->b_size);
				878	}
				879	exit_put:
				880	iput(inode);
				881	return err;
				882	} /* ext3_group_add */
				883
				884	/* Extend the filesystem to the new number of blocks specified. This entry
				885	* point is only used to extend the current filesystem to the end of the last
				886	* existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
				887	* for emergencies (because it has no dependencies on reserved blocks).
				888	*
				889	* If we _really_ wanted, we could use default values to call ext3_group_add()
				890	* allow the "remount" trick to work for arbitrary resizing, assuming enough
				891	* GDT blocks are reserved to grow to the desired size.
				892	*/
				893	int ext3_group_extend(struct super_block sb, struct ext3_super_block es,
				894	unsigned long n_blocks_count)
				895	{
				896	unsigned long o_blocks_count;
				897	unsigned long o_groups_count;
				898	unsigned long last;
				899	int add;
				900	struct buffer_head * bh;
				901	handle_t *handle;
				902	int err, freed_blocks;
				903
				904	/* We don't need to worry about locking wrt other resizers just
				905	* yet: we're going to revalidate es->s_blocks_count after
				906	* taking lock_super() below. */
				907	o_blocks_count = le32_to_cpu(es->s_blocks_count);
				908	o_groups_count = EXT3_SB(sb)->s_groups_count;
				909
				910	if (test_opt(sb, DEBUG))
				911	printk(KERN_DEBUG "EXT3-fs: extending last group from %lu to %lu blocks\n",
				912	o_blocks_count, n_blocks_count);
				913
				914	if (n_blocks_count == 0 \|\| n_blocks_count == o_blocks_count)
				915	return 0;
				916
				917	if (n_blocks_count < o_blocks_count) {
				918	ext3_warning(sb, __FUNCTION__,
				919	"can't shrink FS - resize aborted");
				920	return -EBUSY;
				921	}
				922
				923	/* Handle the remaining blocks in the last group only. */
				924	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
				925	EXT3_BLOCKS_PER_GROUP(sb);
				926
				927	if (last == 0) {
				928	ext3_warning(sb, __FUNCTION__,
				929	"need to use ext2online to resize further\n");
				930	return -EPERM;
				931	}
				932
				933	add = EXT3_BLOCKS_PER_GROUP(sb) - last;
				934
				935	if (o_blocks_count + add > n_blocks_count)
				936	add = n_blocks_count - o_blocks_count;
				937
				938	if (o_blocks_count + add < n_blocks_count)
				939	ext3_warning(sb, __FUNCTION__,
				940	"will only finish group (%lu blocks, %u new)",
				941	o_blocks_count + add, add);
				942
				943	/* See if the device is actually as big as what was requested */
				944	bh = sb_bread(sb, o_blocks_count + add -1);
				945	if (!bh) {
				946	ext3_warning(sb, __FUNCTION__,
				947	"can't read last block, resize aborted");
				948	return -ENOSPC;
				949	}
				950	brelse(bh);
				951
				952	/* We will update the superblock, one block bitmap, and
				953	* one group descriptor via ext3_free_blocks().
				954	*/
				955	handle = ext3_journal_start_sb(sb, 3);
				956	if (IS_ERR(handle)) {
				957	err = PTR_ERR(handle);
				958	ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
				959	goto exit_put;
				960	}
				961
				962	lock_super(sb);
				963	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
				964	ext3_warning(sb, __FUNCTION__,
				965	"multiple resizers run on filesystem!\n");
				966	err = -EBUSY;
				967	goto exit_put;
				968	}
				969
				970	if ((err = ext3_journal_get_write_access(handle,
				971	EXT3_SB(sb)->s_sbh))) {
				972	ext3_warning(sb, __FUNCTION__,
				973	"error %d on journal write access", err);
				974	unlock_super(sb);
				975	ext3_journal_stop(handle);
				976	goto exit_put;
				977	}
				978	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
				979	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
				980	sb->s_dirt = 1;
				981	unlock_super(sb);
				982	ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count,
				983	o_blocks_count + add);
				984	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
				985	ext3_debug("freed blocks %ld through %ld\n", o_blocks_count,
				986	o_blocks_count + add);
				987	if ((err = ext3_journal_stop(handle)))
				988	goto exit_put;
				989	if (test_opt(sb, DEBUG))
				990	printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
				991	le32_to_cpu(es->s_blocks_count));
				992	update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
				993	sizeof(struct ext3_super_block));
				994	exit_put:
				995	return err;
				996	} /* ext3_group_extend */