[PATCH] BLOCK: Make it possible to disable the block layer [try #6]

Make it possible to disable the block layer.  Not all embedded devices require
it, some can make do with just JFFS2, NFS, ramfs, etc - none of which require
the block layer to be present.

This patch does the following:

 (*) Introduces CONFIG_BLOCK to disable the block layer, buffering and blockdev
     support.

 (*) Adds dependencies on CONFIG_BLOCK to any configuration item that controls
     an item that uses the block layer.  This includes:

     (*) Block I/O tracing.

     (*) Disk partition code.

     (*) All filesystems that are block based, eg: Ext3, ReiserFS, ISOFS.

     (*) The SCSI layer.  As far as I can tell, even SCSI chardevs use the
     	 block layer to do scheduling.  Some drivers that use SCSI facilities -
     	 such as USB storage - end up disabled indirectly from this.

     (*) Various block-based device drivers, such as IDE and the old CDROM
     	 drivers.

     (*) MTD blockdev handling and FTL.

     (*) JFFS - which uses set_bdev_super(), something it could avoid doing by
     	 taking a leaf out of JFFS2's book.

 (*) Makes most of the contents of linux/blkdev.h, linux/buffer_head.h and
     linux/elevator.h contingent on CONFIG_BLOCK being set.  sector_div() is,
     however, still used in places, and so is still available.

 (*) Also made contingent are the contents of linux/mpage.h, linux/genhd.h and
     parts of linux/fs.h.

 (*) Makes a number of files in fs/ contingent on CONFIG_BLOCK.

 (*) Makes mm/bounce.c (bounce buffering) contingent on CONFIG_BLOCK.

 (*) set_page_dirty() doesn't call __set_page_dirty_buffers() if CONFIG_BLOCK
     is not enabled.

 (*) fs/no-block.c is created to hold out-of-line stubs and things that are
     required when CONFIG_BLOCK is not set:

     (*) Default blockdev file operations (to give error ENODEV on opening).

 (*) Makes some /proc changes:

     (*) /proc/devices does not list any blockdevs.

     (*) /proc/diskstats and /proc/partitions are contingent on CONFIG_BLOCK.

 (*) Makes some compat ioctl handling contingent on CONFIG_BLOCK.

 (*) If CONFIG_BLOCK is not defined, makes sys_quotactl() return -ENODEV if
     given command other than Q_SYNC or if a special device is specified.

 (*) In init/do_mounts.c, no reference is made to the blockdev routines if
     CONFIG_BLOCK is not defined.  This does not prohibit NFS roots or JFFS2.

 (*) The bdflush, ioprio_set and ioprio_get syscalls can now be absent (return
     error ENOSYS by way of cond_syscall if so).

 (*) The seclvl_bd_claim() and seclvl_bd_release() security calls do nothing if
     CONFIG_BLOCK is not set, since they can't then happen.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/fs/Kconfig b/fs/Kconfig
index 4fd9efa..1453d2d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,8 @@
 
 menu "File systems"
 
+if BLOCK
+
 config EXT2_FS
 	tristate "Second extended fs support"
 	help
@@ -399,6 +401,8 @@
 	  If you don't know whether you need it, then you don't need it:
 	  answer N.
 
+endif
+
 config INOTIFY
 	bool "Inotify file change notification support"
 	default y
@@ -530,6 +534,7 @@
 	  If you want to develop a userspace FS, or if you want to use
 	  a filesystem based on FUSE, answer Y or M.
 
+if BLOCK
 menu "CD-ROM/DVD Filesystems"
 
 config ISO9660_FS
@@ -597,7 +602,9 @@
 	depends on (UDF_FS=m && NLS) || (UDF_FS=y && NLS=y)
 
 endmenu
+endif
 
+if BLOCK
 menu "DOS/FAT/NT Filesystems"
 
 config FAT_FS
@@ -782,6 +789,7 @@
 	  It is perfectly safe to say N here.
 
 endmenu
+endif
 
 menu "Pseudo filesystems"
 
@@ -939,7 +947,7 @@
 
 config ADFS_FS
 	tristate "ADFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  The Acorn Disc Filing System is the standard file system of the
 	  RiscOS operating system which runs on Acorn's ARM-based Risc PC
@@ -967,7 +975,7 @@
 
 config AFFS_FS
 	tristate "Amiga FFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  The Fast File System (FFS) is the common file system used on hard
 	  disks by Amiga(tm) systems since AmigaOS Version 1.3 (34.20).  Say Y
@@ -989,7 +997,7 @@
 
 config HFS_FS
 	tristate "Apple Macintosh file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	select NLS
 	help
 	  If you say Y here, you will be able to mount Macintosh-formatted
@@ -1002,6 +1010,7 @@
 
 config HFSPLUS_FS
 	tristate "Apple Extended HFS file system support"
+	depends on BLOCK
 	select NLS
 	select NLS_UTF8
 	help
@@ -1015,7 +1024,7 @@
 
 config BEFS_FS
 	tristate "BeOS file system (BeFS) support (read only) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	select NLS
 	help
 	  The BeOS File System (BeFS) is the native file system of Be, Inc's
@@ -1042,7 +1051,7 @@
 
 config BFS_FS
 	tristate "BFS file system support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  Boot File System (BFS) is a file system used under SCO UnixWare to
 	  allow the bootloader access to the kernel image and other important
@@ -1064,7 +1073,7 @@
 
 config EFS_FS
 	tristate "EFS file system support (read only) (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	depends on BLOCK && EXPERIMENTAL
 	help
 	  EFS is an older file system used for non-ISO9660 CD-ROMs and hard
 	  disk partitions by SGI's IRIX operating system (IRIX 6.0 and newer
@@ -1079,7 +1088,7 @@
 
 config JFFS_FS
 	tristate "Journalling Flash File System (JFFS) support"
-	depends on MTD
+	depends on MTD && BLOCK
 	help
 	  JFFS is the Journaling Flash File System developed by Axis
 	  Communications in Sweden, aimed at providing a crash/powerdown-safe
@@ -1264,6 +1273,7 @@
 
 config CRAMFS
 	tristate "Compressed ROM file system support (cramfs)"
+	depends on BLOCK
 	select ZLIB_INFLATE
 	help
 	  Saying Y here includes support for CramFs (Compressed ROM File
@@ -1283,6 +1293,7 @@
 
 config VXFS_FS
 	tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
+	depends on BLOCK
 	help
 	  FreeVxFS is a file system driver that support the VERITAS VxFS(TM)
 	  file system format.  VERITAS VxFS(TM) is the standard file system
@@ -1300,6 +1311,7 @@
 
 config HPFS_FS
 	tristate "OS/2 HPFS file system support"
+	depends on BLOCK
 	help
 	  OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
 	  is the file system used for organizing files on OS/2 hard disk
@@ -1316,6 +1328,7 @@
 
 config QNX4FS_FS
 	tristate "QNX4 file system support (read only)"
+	depends on BLOCK
 	help
 	  This is the file system used by the real-time operating systems
 	  QNX 4 and QNX 6 (the latter is also called QNX RTP).
@@ -1343,6 +1356,7 @@
 
 config SYSV_FS
 	tristate "System V/Xenix/V7/Coherent file system support"
+	depends on BLOCK
 	help
 	  SCO, Xenix and Coherent are commercial Unix systems for Intel
 	  machines, and Version 7 was used on the DEC PDP-11. Saying Y
@@ -1381,6 +1395,7 @@
 
 config UFS_FS
 	tristate "UFS file system support (read only)"
+	depends on BLOCK
 	help
 	  BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
 	  OpenBSD and NeXTstep) use a file system called UFS. Some System V
@@ -1959,11 +1974,13 @@
 
 endmenu
 
+if BLOCK
 menu "Partition Types"
 
 source "fs/partitions/Kconfig"
 
 endmenu
+endif
 
 source "fs/nls/Kconfig"
 
diff --git a/fs/Makefile b/fs/Makefile
index 46b8cfe..a503e6c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -5,12 +5,18 @@
 # Rewritten to use lists instead of if-statements.
 # 
 
-obj-y :=	open.o read_write.o file_table.o buffer.o  bio.o super.o \
-		block_dev.o char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
+obj-y :=	open.o read_write.o file_table.o super.o \
+		char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
-		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o splice.o sync.o
+		seq_file.o xattr.o libfs.o fs-writeback.o \
+		pnode.o drop_caches.o splice.o sync.o
+
+ifeq ($(CONFIG_BLOCK),y)
+obj-y +=	buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
+else
+obj-y +=	no-block.o
+endif
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index e1a5643..64b3453 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -645,6 +645,7 @@
 }
 #endif
 
+#ifdef CONFIG_BLOCK
 struct hd_geometry32 {
 	unsigned char heads;
 	unsigned char sectors;
@@ -869,6 +870,7 @@
 	}
 	return err;
 }
+#endif /* CONFIG_BLOCK */
 
 struct sock_fprog32 {
 	unsigned short	len;
@@ -992,6 +994,7 @@
 }
 
 
+#ifdef CONFIG_BLOCK
 struct mtget32 {
 	compat_long_t	mt_type;
 	compat_long_t	mt_resid;
@@ -1164,6 +1167,7 @@
 
 	return err;
 }
+#endif /* CONFIG_BLOCK */
 
 #ifdef CONFIG_VT
 
@@ -1491,6 +1495,7 @@
 	return -EINVAL;
 }
 
+#ifdef CONFIG_BLOCK
 static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	/* The mkswap binary hard codes it to Intel value :-((( */
@@ -1525,12 +1530,14 @@
 
 	return sys_ioctl(fd, cmd, (unsigned long)a);
 }
+#endif
 
 static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
 	return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
 }
 
+#ifdef CONFIG_BLOCK
 /* Fix sizeof(sizeof()) breakage */
 #define BLKBSZGET_32   _IOR(0x12,112,int)
 #define BLKBSZSET_32   _IOW(0x12,113,int)
@@ -1551,6 +1558,7 @@
 {
        return sys_ioctl(fd, BLKGETSIZE64, (unsigned long)compat_ptr(arg));
 }
+#endif
 
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO	_IOW('U', 200, int)
@@ -1571,6 +1579,7 @@
 #define HIDPGETCONNLIST	_IOR('H', 210, int)
 #define HIDPGETCONNINFO	_IOR('H', 211, int)
 
+#ifdef CONFIG_BLOCK
 struct floppy_struct32 {
 	compat_uint_t	size;
 	compat_uint_t	sect;
@@ -1895,6 +1904,7 @@
 	kfree(karg);
 	return err;
 }
+#endif
 
 struct mtd_oob_buf32 {
 	u_int32_t start;
@@ -1936,6 +1946,7 @@
 	return err;
 }	
 
+#ifdef CONFIG_BLOCK
 struct raw32_config_request
 {
         compat_int_t    raw_minor;
@@ -2000,6 +2011,7 @@
         }
         return ret;
 }
+#endif /* CONFIG_BLOCK */
 
 struct serial_struct32 {
         compat_int_t    type;
@@ -2606,6 +2618,7 @@
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
 #endif
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
 HANDLE_IOCTL(BLKRAGET, w_long)
 HANDLE_IOCTL(BLKGETSIZE, w_long)
@@ -2631,14 +2644,17 @@
 HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
 HANDLE_IOCTL(SG_IO,sg_ioctl_trans)
 HANDLE_IOCTL(SG_GET_REQUEST_TABLE, sg_grt_trans)
+#endif
 HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSPASS32, ppp_sock_fprog_ioctl_trans)
 HANDLE_IOCTL(PPPIOCSACTIVE32, ppp_sock_fprog_ioctl_trans)
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
 HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
 HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
 HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
+#endif
 #define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
 HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
 #ifdef CONFIG_VT
@@ -2677,12 +2693,14 @@
 HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
 HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
 /* block stuff */
+#ifdef CONFIG_BLOCK
 HANDLE_IOCTL(BLKBSZGET_32, do_blkbszget)
 HANDLE_IOCTL(BLKBSZSET_32, do_blkbszset)
 HANDLE_IOCTL(BLKGETSIZE64_32, do_blkgetsize64)
 /* Raw devices */
 HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
 HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
+#endif
 /* Serial */
 HANDLE_IOCTL(TIOCGSERIAL, serial_struct_ioctl)
 HANDLE_IOCTL(TIOCSSERIAL, serial_struct_ioctl)
diff --git a/fs/internal.h b/fs/internal.h
index f662b70..f07147d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -14,10 +14,16 @@
 /*
  * block_dev.c
  */
+#ifdef CONFIG_BLOCK
 extern struct super_block *blockdev_superblock;
 extern void __init bdev_cache_init(void);
 
 #define sb_is_blkdev_sb(sb) ((sb) == blockdev_superblock)
+#else
+static inline void bdev_cache_init(void) {}
+
+#define sb_is_blkdev_sb(sb) 0
+#endif
 
 /*
  * char_dev.c
diff --git a/fs/no-block.c b/fs/no-block.c
new file mode 100644
index 0000000..d269a93
--- /dev/null
+++ b/fs/no-block.c
@@ -0,0 +1,22 @@
+/* no-block.c: implementation of routines required for non-BLOCK configuration
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+
+static int no_blkdev_open(struct inode * inode, struct file * filp)
+{
+	return -ENODEV;
+}
+
+const struct file_operations def_blk_fops = {
+	.open		= no_blkdev_open,
+};
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
index d713ce6..67e665f 100644
--- a/fs/partitions/Makefile
+++ b/fs/partitions/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y := check.o
+obj-$(CONFIG_BLOCK) := check.o
 
 obj-$(CONFIG_ACORN_PARTITION) += acorn.o
 obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 5bbd608..66bc425 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -277,12 +277,15 @@
 		if (i == 0)
 			seq_printf(f, "Character devices:\n");
 		chrdev_show(f, i);
-	} else {
+	}
+#ifdef CONFIG_BLOCK
+	else {
 		i -= CHRDEV_MAJOR_HASH_SIZE;
 		if (i == 0)
 			seq_printf(f, "\nBlock devices:\n");
 		blkdev_show(f, i);
 	}
+#endif
 	return 0;
 }
 
@@ -355,6 +358,7 @@
 }
 #endif
 
+#ifdef CONFIG_BLOCK
 extern struct seq_operations partitions_op;
 static int partitions_open(struct inode *inode, struct file *file)
 {
@@ -378,6 +382,7 @@
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 };
+#endif
 
 #ifdef CONFIG_MODULES
 extern struct seq_operations modules_op;
@@ -695,7 +700,9 @@
 		entry->proc_fops = &proc_kmsg_operations;
 	create_seq_entry("devices", 0, &proc_devinfo_operations);
 	create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
+#ifdef CONFIG_BLOCK
 	create_seq_entry("partitions", 0, &proc_partitions_operations);
+#endif
 	create_seq_entry("stat", 0, &proc_stat_operations);
 	create_seq_entry("interrupts", 0, &proc_interrupts_operations);
 #ifdef CONFIG_SLAB
@@ -707,7 +714,9 @@
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
 	create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
+#ifdef CONFIG_BLOCK
 	create_seq_entry("diskstats", 0, &proc_diskstats_operations);
+#endif
 #ifdef CONFIG_MODULES
 	create_seq_entry("modules", 0, &proc_modules_operations);
 #endif
diff --git a/fs/quota.c b/fs/quota.c
index d6a2be8..b9dae76 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -338,6 +338,34 @@
 }
 
 /*
+ * look up a superblock on which quota ops will be performed
+ * - use the name of a block device to find the superblock thereon
+ */
+static inline struct super_block *quotactl_block(const char __user *special)
+{
+#ifdef CONFIG_BLOCK
+	struct block_device *bdev;
+	struct super_block *sb;
+	char *tmp = getname(special);
+
+	if (IS_ERR(tmp))
+		return ERR_PTR(PTR_ERR(tmp));
+	bdev = lookup_bdev(tmp);
+	putname(tmp);
+	if (IS_ERR(bdev))
+		return ERR_PTR(PTR_ERR(bdev));
+	sb = get_super(bdev);
+	bdput(bdev);
+	if (!sb)
+		return ERR_PTR(-ENODEV);
+
+	return sb;
+#else
+	return ERR_PTR(-ENODEV);
+#endif
+}
+
+/*
  * This is the system call interface. This communicates with
  * the user-level programs. Currently this only supports diskquota
  * calls. Maybe we need to add the process quotas etc. in the future,
@@ -347,25 +375,15 @@
 {
 	uint cmds, type;
 	struct super_block *sb = NULL;
-	struct block_device *bdev;
-	char *tmp;
 	int ret;
 
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
 	if (cmds != Q_SYNC || special) {
-		tmp = getname(special);
-		if (IS_ERR(tmp))
-			return PTR_ERR(tmp);
-		bdev = lookup_bdev(tmp);
-		putname(tmp);
-		if (IS_ERR(bdev))
-			return PTR_ERR(bdev);
-		sb = get_super(bdev);
-		bdput(bdev);
-		if (!sb)
-			return -ENODEV;
+		sb = quotactl_block(special);
+		if (IS_ERR(sb))
+			return PTR_ERR(sb);
 	}
 
 	ret = check_quotactl_valid(sb, type, cmds, id);
diff --git a/fs/super.c b/fs/super.c
index 15671cd..aec99dd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -571,8 +571,10 @@
 {
 	int retval;
 	
+#ifdef CONFIG_BLOCK
 	if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
 		return -EACCES;
+#endif
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
@@ -692,6 +694,7 @@
 
 EXPORT_SYMBOL(kill_litter_super);
 
+#ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
 {
 	s->s_bdev = data;
@@ -787,6 +790,7 @@
 }
 
 EXPORT_SYMBOL(kill_block_super);
+#endif
 
 int get_sb_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 26b364c..35115bc 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -1,5 +1,6 @@
 config XFS_FS
 	tristate "XFS filesystem support"
+	depends on BLOCK
 	help
 	  XFS is a high performance journaling filesystem which originated
 	  on the SGI IRIX platform.  It is completely multi-threaded, can