ext4: add max_dir_size_kb mount option

Very large directories can cause significant performance problems, or
perhaps even invoke the OOM killer, if the process is running in a
highly constrained memory environment (whether it is VM's with a small
amount of memory or in a small memory cgroup).

So it is useful, in cloud server/data center environments, to be able
to set a filesystem-wide cap on the maximum size of a directory, to
ensure that directories never get larger than a sane size.  We do this
via a new mount option, max_dir_size_kb.  If there is an attempt to
grow the directory larger than max_dir_size_kb, the system call will
return ENOSPC instead.

Google-Bug-Id: 6863013

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>




diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c3411d4..7c0841e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1243,6 +1243,7 @@
 	unsigned int s_mb_order2_reqs;
 	unsigned int s_mb_group_prealloc;
 	unsigned int s_max_writeback_mb_bump;
+	unsigned int s_max_dir_size_kb;
 	/* where last allocation was done - for stream allocation */
 	unsigned long s_mb_last_group;
 	unsigned long s_mb_last_start;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2a42cc0..7450ff01 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -55,6 +55,13 @@
 {
 	struct buffer_head *bh;
 
+	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
+		     ((inode->i_size >> 10) >=
+		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
+		*err = -ENOSPC;
+		return NULL;
+	}
+
 	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
 
 	bh = ext4_bread(handle, inode, *block, 1, err);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5984989..5a97e59 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1230,6 +1230,7 @@
 	Opt_inode_readahead_blks, Opt_journal_ioprio,
 	Opt_dioread_nolock, Opt_dioread_lock,
 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
+	Opt_max_dir_size_kb,
 };
 
 static const match_table_t tokens = {
@@ -1303,6 +1304,7 @@
 	{Opt_init_itable, "init_itable=%u"},
 	{Opt_init_itable, "init_itable"},
 	{Opt_noinit_itable, "noinit_itable"},
+	{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
 	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
 	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
 	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
@@ -1483,6 +1485,7 @@
 	{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
 	{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
 	{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
+	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
 	{Opt_err, 0, 0}
 };
 
@@ -1598,6 +1601,8 @@
 			if (!args->from)
 				arg = EXT4_DEF_LI_WAIT_MULT;
 			sbi->s_li_wait_mult = arg;
+		} else if (token == Opt_max_dir_size_kb) {
+			sbi->s_max_dir_size_kb = arg;
 		} else if (token == Opt_stripe) {
 			sbi->s_stripe = arg;
 		} else if (m->flags & MOPT_DATAJ) {
@@ -1829,6 +1834,8 @@
 	if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
 		       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
 		SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
+	if (nodefs || sbi->s_max_dir_size_kb)
+		SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
 
 	ext4_show_quota_options(seq, sb);
 	return 0;