Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs-2.6:
quota: Improve checking of quota file header
jbd: jbd-debug and jbd2-debug should be writable
ext4: fix sleep inside spinlock issue with quota and dealloc (#14739)
ext4: Fix potential quota deadlock
quota: Fix 64-bit limits setting on 32-bit archs
ext3: Replace lock/unlock_super() with an explicit lock for resizing
ext3: Replace lock/unlock_super() with an explicit lock for the orphan list
ext3: ext3_mark_recovery_complete() doesn't need to use lock_super
ext3: Remove outdated comment about lock_super()
quota: Move duplicated code to separate functions
ext4: Convert to generic reserved quota's space management.
quota: decouple fs reserved space from quota reservation
Add unlocked version of inode_add_bytes() function
ext3: quota macros cleanup [V2]
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad14227..455e6e6 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -970,7 +970,7 @@
if (max_blocks > DIO_MAX_BLOCKS)
max_blocks = DIO_MAX_BLOCKS;
handle = ext3_journal_start(inode, DIO_CREDITS +
- 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb));
+ EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -3146,8 +3146,8 @@
/* (user+group)*(old+new) structure, inode write (sb,
* inode block, ? - but truncate inode update has it) */
- handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
- EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+ handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+ EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto err_out;
@@ -3239,7 +3239,7 @@
#ifdef CONFIG_QUOTA
/* We know that structure was already allocated during vfs_dq_init so
* we will be updating only the data blocks + inodes */
- ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+ ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
#endif
return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index aad6400..7b0e44f7 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1699,7 +1699,7 @@
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1733,7 +1733,7 @@
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1769,7 +1769,7 @@
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1920,7 +1920,7 @@
struct ext3_iloc iloc;
int err = 0, rc;
- lock_super(sb);
+ mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
if (!list_empty(&EXT3_I(inode)->i_orphan))
goto out_unlock;
@@ -1929,9 +1929,13 @@
/* @@@ FIXME: Observation from aviro:
* I think I can trigger J_ASSERT in ext3_orphan_add(). We block
- * here (on lock_super()), so race with ext3_link() which might bump
+ * here (on s_orphan_lock), so race with ext3_link() which might bump
* ->i_nlink. For, say it, character device. Not a regular file,
* not a directory, not a symlink and ->i_nlink > 0.
+ *
+ * tytso, 4/25/2009: I'm not sure how that could happen;
+ * shouldn't the fs core protect us from these sort of
+ * unlink()/link() races?
*/
J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1968,7 +1972,7 @@
jbd_debug(4, "orphan inode %lu will point to %d\n",
inode->i_ino, NEXT_ORPHAN(inode));
out_unlock:
- unlock_super(sb);
+ mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
ext3_std_error(inode->i_sb, err);
return err;
}
@@ -1986,11 +1990,9 @@
struct ext3_iloc iloc;
int err = 0;
- lock_super(inode->i_sb);
- if (list_empty(&ei->i_orphan)) {
- unlock_super(inode->i_sb);
- return 0;
- }
+ mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
+ if (list_empty(&ei->i_orphan))
+ goto out;
ino_next = NEXT_ORPHAN(inode);
prev = ei->i_orphan.prev;
@@ -2040,7 +2042,7 @@
out_err:
ext3_std_error(inode->i_sb, err);
out:
- unlock_super(inode->i_sb);
+ mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
return err;
out_brelse:
@@ -2175,7 +2177,7 @@
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
- 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5f83b61..54351ac 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -209,7 +209,7 @@
if (IS_ERR(handle))
return PTR_ERR(handle);
- lock_super(sb);
+ mutex_lock(&sbi->s_resize_lock);
if (input->group != sbi->s_groups_count) {
err = -EBUSY;
goto exit_journal;
@@ -324,7 +324,7 @@
brelse(bh);
exit_journal:
- unlock_super(sb);
+ mutex_unlock(&sbi->s_resize_lock);
if ((err2 = ext3_journal_stop(handle)) && !err)
err = err2;
@@ -662,11 +662,12 @@
* important part is that the new block and inode counts are in the backup
* superblocks, and the location of the new group metadata in the GDT backups.
*
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted. We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time. The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted. We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time. The resize
+ * which changed s_groups_count will backup again.
*/
static void update_backups(struct super_block *sb,
int blk_off, char *data, int size)
@@ -825,7 +826,7 @@
goto exit_put;
}
- lock_super(sb);
+ mutex_lock(&sbi->s_resize_lock);
if (input->group != sbi->s_groups_count) {
ext3_warning(sb, __func__,
"multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@
/*
* OK, now we've set up the new group. Time to make it active.
*
- * Current kernels don't lock all allocations via lock_super(),
+ * We do not lock all allocations via s_resize_lock
* so we have to be safe wrt. concurrent accesses the group
* data. So we need to be careful to set all of the relevant
* group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@
*
* The precise rules we use are:
*
- * * Writers of s_groups_count *must* hold lock_super
+ * * Writers of s_groups_count *must* hold s_resize_lock
* AND
* * Writers must perform a smp_wmb() after updating all dependent
* data and before modifying the groups count
*
- * * Readers must hold lock_super() over the access
+ * * Readers must hold s_resize_lock over the access
* OR
* * Readers must perform an smp_rmb() after reading the groups count
* and before reading any dependent data.
@@ -936,7 +937,7 @@
ext3_journal_dirty_metadata(handle, sbi->s_sbh);
exit_journal:
- unlock_super(sb);
+ mutex_unlock(&sbi->s_resize_lock);
if ((err2 = ext3_journal_stop(handle)) && !err)
err = err2;
if (!err) {
@@ -973,7 +974,7 @@
/* We don't need to worry about locking wrt other resizers just
* yet: we're going to revalidate es->s_blocks_count after
- * taking lock_super() below. */
+ * taking the s_resize_lock below. */
o_blocks_count = le32_to_cpu(es->s_blocks_count);
o_groups_count = EXT3_SB(sb)->s_groups_count;
@@ -1045,11 +1046,11 @@
goto exit_put;
}
- lock_super(sb);
+ mutex_lock(&EXT3_SB(sb)->s_resize_lock);
if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
ext3_warning(sb, __func__,
"multiple resizers run on filesystem!");
- unlock_super(sb);
+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
ext3_journal_stop(handle);
err = -EBUSY;
goto exit_put;
@@ -1059,13 +1060,13 @@
EXT3_SB(sb)->s_sbh))) {
ext3_warning(sb, __func__,
"error %d on journal write access", err);
- unlock_super(sb);
+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
ext3_journal_stop(handle);
goto exit_put;
}
es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
- unlock_super(sb);
+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
o_blocks_count + add);
ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7ad1e8c..afa2b56 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1928,6 +1928,8 @@
sb->dq_op = &ext3_quota_operations;
#endif
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+ mutex_init(&sbi->s_orphan_lock);
+ mutex_init(&sbi->s_resize_lock);
sb->s_root = NULL;
@@ -2014,14 +2016,7 @@
}
ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
- /*
- * akpm: core read_super() calls in here with the superblock locked.
- * That deadlocks, because orphan cleanup needs to lock the superblock
- * in numerous places. Here we just pop the lock - it's relatively
- * harmless, because we are now ready to accept write_super() requests,
- * and aviro says that's the only reason for hanging onto the
- * superblock lock.
- */
+
EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
ext3_orphan_cleanup(sb, es);
EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
@@ -2403,13 +2398,11 @@
if (journal_flush(journal) < 0)
goto out;
- lock_super(sb);
if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
sb->s_flags & MS_RDONLY) {
EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
ext3_commit_super(sb, es, 1);
}
- unlock_super(sb);
out:
journal_unlock_updates(journal);
@@ -2601,13 +2594,7 @@
(sbi->s_mount_state & EXT3_VALID_FS))
es->s_state = cpu_to_le16(sbi->s_mount_state);
- /*
- * We have to unlock super so that we can wait for
- * transactions.
- */
- unlock_super(sb);
ext3_mark_recovery_complete(sb, es);
- lock_super(sb);
} else {
__le32 ret;
if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ab31e65..56f9271 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -704,6 +704,10 @@
__u16 i_extra_isize;
spinlock_t i_block_reservation_lock;
+#ifdef CONFIG_QUOTA
+ /* quota space reservation, managed internally by quota code */
+ qsize_t i_reserved_quota;
+#endif
/* completed async DIOs that might need unwritten extents handling */
struct list_head i_aio_dio_complete_list;
@@ -1435,7 +1439,7 @@
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
-extern qsize_t ext4_get_reserved_space(struct inode *inode);
+extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int flush_aio_dio_completed_IO(struct inode *inode);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5352db1..ab80796 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1003,17 +1003,12 @@
return err;
}
-qsize_t ext4_get_reserved_space(struct inode *inode)
+#ifdef CONFIG_QUOTA
+qsize_t *ext4_get_reserved_space(struct inode *inode)
{
- unsigned long long total;
-
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- total = EXT4_I(inode)->i_reserved_data_blocks +
- EXT4_I(inode)->i_reserved_meta_blocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
- return (total << inode->i_blkbits);
+ return &EXT4_I(inode)->i_reserved_quota;
}
+#endif
/*
* Calculate the number of metadata blocks need to reserve
* to allocate @blocks for non extent file based file
@@ -1051,7 +1046,7 @@
static void ext4_da_update_reserve_space(struct inode *inode, int used)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int total, mdb, mdb_free;
+ int total, mdb, mdb_free, mdb_claim = 0;
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
/* recalculate the number of metablocks still need to be reserved */
@@ -1064,7 +1059,9 @@
if (mdb_free) {
/* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+ mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks;
+ BUG_ON(mdb_free < mdb_claim);
+ mdb_free -= mdb_claim;
/* update fs dirty blocks counter */
percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
@@ -1075,8 +1072,11 @@
/* update per-inode reservations */
BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
EXT4_I(inode)->i_reserved_data_blocks -= used;
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ vfs_dq_claim_block(inode, used + mdb_claim);
+
/*
* free those over-booking quota for metadata blocks
*/
@@ -1816,19 +1816,17 @@
md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
total = md_needed + nrblocks;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
/*
* Make quota reservation here to prevent quota overflow
* later. Real quota accounting is done at pages writeout
* time.
*/
- if (vfs_dq_reserve_block(inode, total)) {
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ if (vfs_dq_reserve_block(inode, total))
return -EDQUOT;
- }
if (ext4_claim_free_blocks(sbi, total)) {
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
vfs_dq_release_reservation_block(inode, total);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
@@ -1836,10 +1834,11 @@
}
return -ENOSPC;
}
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
- EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
-
+ EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+
return 0; /* success */
}
@@ -4794,6 +4793,9 @@
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(raw_inode);
ei->i_disksize = inode->i_size;
+#ifdef CONFIG_QUOTA
+ ei->i_reserved_quota = 0;
+#endif
inode->i_generation = le32_to_cpu(raw_inode->i_generation);
ei->i_block_group = iloc.block_group;
ei->i_last_alloc_group = ~0;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b1fd3da..d34afad 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2755,12 +2755,6 @@
if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
- else {
- percpu_counter_sub(&sbi->s_dirtyblocks_counter,
- ac->ac_b_ex.fe_len);
- /* convert reserved quota blocks to real quota blocks */
- vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
- }
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 827bde1..6ed9aa9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -704,6 +704,9 @@
ei->i_allocated_meta_blocks = 0;
ei->i_delalloc_reserved_flag = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
+#ifdef CONFIG_QUOTA
+ ei->i_reserved_quota = 0;
+#endif
INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0;
@@ -1014,7 +1017,9 @@
.reserve_space = dquot_reserve_space,
.claim_space = dquot_claim_space,
.release_rsv = dquot_release_reserved_space,
+#ifdef CONFIG_QUOTA
.get_reserved_space = ext4_get_reserved_space,
+#endif
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
.free_inode = dquot_free_inode,
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 4160afa..bd224ee 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1913,7 +1913,7 @@
{
jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
if (jbd_debugfs_dir)
- jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
+ jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR,
jbd_debugfs_dir,
&journal_enable_debug);
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b7ca3a9..17af879 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2115,7 +2115,8 @@
{
jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
if (jbd2_debugfs_dir)
- jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO,
+ jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME,
+ S_IRUGO | S_IWUSR,
jbd2_debugfs_dir,
&jbd2_journal_enable_debug);
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd6bb9a..dea86ab 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -323,6 +323,30 @@
}
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
+/* Dirtify all the dquots - this can block when journalling */
+static inline int mark_all_dquot_dirty(struct dquot * const *dquot)
+{
+ int ret, err, cnt;
+
+ ret = err = 0;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (dquot[cnt])
+ /* Even in case of error we have to continue */
+ ret = mark_dquot_dirty(dquot[cnt]);
+ if (!err)
+ err = ret;
+ }
+ return err;
+}
+
+static inline void dqput_all(struct dquot **dquot)
+{
+ unsigned int cnt;
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ dqput(dquot[cnt]);
+}
+
/* This function needs dq_list_lock */
static inline int clear_dquot_dirty(struct dquot *dquot)
{
@@ -1268,8 +1292,7 @@
out_err:
up_write(&sb_dqopt(sb)->dqptr_sem);
/* Drop unused references */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- dqput(got[cnt]);
+ dqput_all(got);
return ret;
}
EXPORT_SYMBOL(dquot_initialize);
@@ -1288,9 +1311,7 @@
inode->i_dquot[cnt] = NULL;
}
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- dqput(put[cnt]);
+ dqput_all(put);
return 0;
}
EXPORT_SYMBOL(dquot_drop);
@@ -1319,6 +1340,67 @@
EXPORT_SYMBOL(vfs_dq_drop);
/*
+ * inode_reserved_space is managed internally by quota, and protected by
+ * i_lock similar to i_blocks+i_bytes.
+ */
+static qsize_t *inode_reserved_space(struct inode * inode)
+{
+ /* Filesystem must explicitly define it's own method in order to use
+ * quota reservation interface */
+ BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
+ return inode->i_sb->dq_op->get_reserved_space(inode);
+}
+
+static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) += number;
+ spin_unlock(&inode->i_lock);
+}
+
+
+static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ __inode_add_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
+}
+
+static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ spin_unlock(&inode->i_lock);
+}
+
+static qsize_t inode_get_rsv_space(struct inode *inode)
+{
+ qsize_t ret;
+ spin_lock(&inode->i_lock);
+ ret = *inode_reserved_space(inode);
+ spin_unlock(&inode->i_lock);
+ return ret;
+}
+
+static void inode_incr_space(struct inode *inode, qsize_t number,
+ int reserve)
+{
+ if (reserve)
+ inode_add_rsv_space(inode, number);
+ else
+ inode_add_bytes(inode, number);
+}
+
+static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
+{
+ if (reserve)
+ inode_sub_rsv_space(inode, number);
+ else
+ inode_sub_bytes(inode, number);
+}
+
+/*
* Following four functions update i_blocks+i_bytes fields and
* quota information (together with appropriate checks)
* NOTE: We absolutely rely on the fact that caller dirties
@@ -1336,6 +1418,21 @@
int cnt, ret = QUOTA_OK;
char warntype[MAXQUOTAS];
+ /*
+ * First test before acquiring mutex - solves deadlocks when we
+ * re-enter the quota code and are already holding the mutex
+ */
+ if (IS_NOQUOTA(inode)) {
+ inode_incr_space(inode, number, reserve);
+ goto out;
+ }
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ if (IS_NOQUOTA(inode)) {
+ inode_incr_space(inode, number, reserve);
+ goto out_unlock;
+ }
+
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;
@@ -1346,7 +1443,8 @@
if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
== NO_QUOTA) {
ret = NO_QUOTA;
- goto out_unlock;
+ spin_unlock(&dq_data_lock);
+ goto out_flush_warn;
}
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1357,64 +1455,29 @@
else
dquot_incr_space(inode->i_dquot[cnt], number);
}
- if (!reserve)
- inode_add_bytes(inode, number);
-out_unlock:
+ inode_incr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
+
+ if (reserve)
+ goto out_flush_warn;
+ mark_all_dquot_dirty(inode->i_dquot);
+out_flush_warn:
flush_warnings(inode->i_dquot, warntype);
+out_unlock:
+ up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
return ret;
}
int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
{
- int cnt, ret = QUOTA_OK;
-
- /*
- * First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex
- */
- if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
- goto out;
- }
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
- goto out_unlock;
- }
-
- ret = __dquot_alloc_space(inode, number, warn, 0);
- if (ret == NO_QUOTA)
- goto out_unlock;
-
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return ret;
+ return __dquot_alloc_space(inode, number, warn, 0);
}
EXPORT_SYMBOL(dquot_alloc_space);
int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
{
- int ret = QUOTA_OK;
-
- if (IS_NOQUOTA(inode))
- goto out;
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode))
- goto out_unlock;
-
- ret = __dquot_alloc_space(inode, number, warn, 1);
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return ret;
+ return __dquot_alloc_space(inode, number, warn, 1);
}
EXPORT_SYMBOL(dquot_reserve_space);
@@ -1455,10 +1518,7 @@
warn_put_all:
spin_unlock(&dq_data_lock);
if (ret == QUOTA_OK)
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
+ mark_all_dquot_dirty(inode->i_dquot);
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return ret;
@@ -1471,14 +1531,14 @@
int ret = QUOTA_OK;
if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
goto out;
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
if (IS_NOQUOTA(inode)) {
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
goto out;
}
@@ -1490,12 +1550,9 @@
number);
}
/* Update inode bytes */
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
+ mark_all_dquot_dirty(inode->i_dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
out:
return ret;
@@ -1503,38 +1560,9 @@
EXPORT_SYMBOL(dquot_claim_space);
/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
- int cnt;
-
- if (IS_NOQUOTA(inode))
- goto out;
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode))
- goto out_unlock;
-
- spin_lock(&dq_data_lock);
- /* Release reserved dquots */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt])
- dquot_free_reserved_space(inode->i_dquot[cnt], number);
- }
- spin_unlock(&dq_data_lock);
-
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return;
-}
-EXPORT_SYMBOL(dquot_release_reserved_space);
-
-/*
* This operation can block, but only after everything is updated
*/
-int dquot_free_space(struct inode *inode, qsize_t number)
+int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
@@ -1543,7 +1571,7 @@
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode)) {
out_sub:
- inode_sub_bytes(inode, number);
+ inode_decr_space(inode, number, reserve);
return QUOTA_OK;
}
@@ -1558,21 +1586,40 @@
if (!inode->i_dquot[cnt])
continue;
warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
- dquot_decr_space(inode->i_dquot[cnt], number);
+ if (reserve)
+ dquot_free_reserved_space(inode->i_dquot[cnt], number);
+ else
+ dquot_decr_space(inode->i_dquot[cnt], number);
}
- inode_sub_bytes(inode, number);
+ inode_decr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
+
+ if (reserve)
+ goto out_unlock;
+ mark_all_dquot_dirty(inode->i_dquot);
+out_unlock:
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
+
+int dquot_free_space(struct inode *inode, qsize_t number)
+{
+ return __dquot_free_space(inode, number, 0);
+}
EXPORT_SYMBOL(dquot_free_space);
/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+ __dquot_free_space(inode, number, 1);
+
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
+/*
* This operation can block, but only after everything is updated
*/
int dquot_free_inode(const struct inode *inode, qsize_t number)
@@ -1599,10 +1646,7 @@
dquot_decr_inodes(inode->i_dquot[cnt], number);
}
spin_unlock(&dq_data_lock);
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
+ mark_all_dquot_dirty(inode->i_dquot);
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
@@ -1610,19 +1654,6 @@
EXPORT_SYMBOL(dquot_free_inode);
/*
- * call back function, get reserved quota space from underlying fs
- */
-qsize_t dquot_get_reserved_space(struct inode *inode)
-{
- qsize_t reserved_space = 0;
-
- if (sb_any_quota_active(inode->i_sb) &&
- inode->i_sb->dq_op->get_reserved_space)
- reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
- return reserved_space;
-}
-
-/*
* Transfer the number of inode and blocks from one diskquota to an other.
*
* This operation can block, but only after everything is updated
@@ -1665,7 +1696,7 @@
}
spin_lock(&dq_data_lock);
cur_space = inode_get_bytes(inode);
- rsv_space = dquot_get_reserved_space(inode);
+ rsv_space = inode_get_rsv_space(inode);
space = cur_space + rsv_space;
/* Build the transfer_from list and check the limits */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1709,25 +1740,18 @@
spin_unlock(&dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (transfer_from[cnt])
- mark_dquot_dirty(transfer_from[cnt]);
- if (transfer_to[cnt]) {
- mark_dquot_dirty(transfer_to[cnt]);
- /* The reference we got is transferred to the inode */
- transfer_to[cnt] = NULL;
- }
- }
+ mark_all_dquot_dirty(transfer_from);
+ mark_all_dquot_dirty(transfer_to);
+ /* The reference we got is transferred to the inode */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ transfer_to[cnt] = NULL;
warn_put_all:
flush_warnings(transfer_to, warntype_to);
flush_warnings(transfer_from, warntype_from_inodes);
flush_warnings(transfer_from, warntype_from_space);
put_all:
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- dqput(transfer_from[cnt]);
- dqput(transfer_to[cnt]);
- }
+ dqput_all(transfer_from);
+ dqput_all(transfer_to);
return ret;
over_quota:
spin_unlock(&dq_data_lock);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 3dfc23e..e3da02f 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -97,8 +97,11 @@
unsigned int version;
if (!v2_read_header(sb, type, &dqhead))
- return 0;
+ return -1;
version = le32_to_cpu(dqhead.dqh_version);
+ if ((info->dqi_fmt_id == QFMT_VFS_V0 && version != 0) ||
+ (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1))
+ return -1;
size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -120,8 +123,8 @@
info->dqi_maxilimit = 0xffffffff;
} else {
/* used space is stored as unsigned 64-bit value */
- info->dqi_maxblimit = 0xffffffffffffffff; /* 2^64-1 */
- info->dqi_maxilimit = 0xffffffffffffffff;
+ info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */
+ info->dqi_maxilimit = 0xffffffffffffffffULL;
}
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
diff --git a/fs/stat.c b/fs/stat.c
index 075694e..c4ecd52 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -401,9 +401,9 @@
}
#endif /* __ARCH_WANT_STAT64 */
-void inode_add_bytes(struct inode *inode, loff_t bytes)
+/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
+void __inode_add_bytes(struct inode *inode, loff_t bytes)
{
- spin_lock(&inode->i_lock);
inode->i_blocks += bytes >> 9;
bytes &= 511;
inode->i_bytes += bytes;
@@ -411,6 +411,12 @@
inode->i_blocks++;
inode->i_bytes -= 512;
}
+}
+
+void inode_add_bytes(struct inode *inode, loff_t bytes)
+{
+ spin_lock(&inode->i_lock);
+ __inode_add_bytes(inode, bytes);
spin_unlock(&inode->i_lock);
}
diff --git a/include/linux/ext3_fs_sb.h b/include/linux/ext3_fs_sb.h
index f07f34d..258088a 100644
--- a/include/linux/ext3_fs_sb.h
+++ b/include/linux/ext3_fs_sb.h
@@ -72,6 +72,8 @@
struct inode * s_journal_inode;
struct journal_s * s_journal;
struct list_head s_orphan;
+ struct mutex s_orphan_lock;
+ struct mutex s_resize_lock;
unsigned long s_commit_interval;
struct block_device *journal_bdev;
#ifdef CONFIG_JBD_DEBUG
diff --git a/include/linux/ext3_jbd.h b/include/linux/ext3_jbd.h
index cf82d51..d7b5ddc 100644
--- a/include/linux/ext3_jbd.h
+++ b/include/linux/ext3_jbd.h
@@ -44,13 +44,13 @@
#define EXT3_DATA_TRANS_BLOCKS(sb) (EXT3_SINGLEDATA_TRANS_BLOCKS + \
EXT3_XATTR_TRANS_BLOCKS - 2 + \
- 2*EXT3_QUOTA_TRANS_BLOCKS(sb))
+ EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
/* Delete operations potentially hit one directory's namespace plus an
* entire inode, plus arbitrary amounts of bitmap/indirection data. Be
* generous. We can grow the delete transaction later if necessary. */
-#define EXT3_DELETE_TRANS_BLOCKS(sb) (2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64)
+#define EXT3_DELETE_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
/* Define an arbitrary limit for the amount of data we will anticipate
* writing to any given transaction. For unbounded transactions such as
@@ -86,6 +86,9 @@
#define EXT3_QUOTA_INIT_BLOCKS(sb) 0
#define EXT3_QUOTA_DEL_BLOCKS(sb) 0
#endif
+#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
int
ext3_mark_iloc_dirty(handle_t *handle,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e3012e..9147ca8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2297,6 +2297,7 @@
extern int generic_readlink(struct dentry *, char __user *, int);
extern void generic_fillattr(struct inode *, struct kstat *);
extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+void __inode_add_bytes(struct inode *inode, loff_t bytes);
void inode_add_bytes(struct inode *inode, loff_t bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes);
loff_t inode_get_bytes(struct inode *inode);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index e70e621..a6861f1 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -315,8 +315,9 @@
int (*claim_space) (struct inode *, qsize_t);
/* release rsved quota for delayed alloc */
void (*release_rsv) (struct inode *, qsize_t);
- /* get reserved quota for delayed alloc */
- qsize_t (*get_reserved_space) (struct inode *);
+ /* get reserved quota for delayed alloc, value returned is managed by
+ * quota code only */
+ qsize_t *(*get_reserved_space) (struct inode *);
};
/* Operations handling requests from userspace */