bmap support
diff --git a/ChangeLog b/ChangeLog
index a14f301..942c2d9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2006-09-30  Miklos Szeredi <miklos@szeredi.hu>
 
+	* Add support for block device backed filesystems.  This mode is
+	selected with the 'blkdev' option, which is privileged.
+
+	* Add support for the bmap (FIBMAP ioctl) operation on block
+	device backed filesystems.  This allows swapon and lilo to work on
+	such filesystems.
+
 	* kernel changes:
 
 	* Drop support for kernels earlier than 2.6.9.  Kernel module from
diff --git a/README b/README
index eeb6d96..b97d56b 100644
--- a/README
+++ b/README
@@ -259,3 +259,8 @@
 gid=N
 
   Override the 'st_gid' field set by the filesystem.
+
+blkdev
+
+  Mount a filesystem backed by a block device.  This is a privileged
+  option.  The device must be specified with the 'fsname=NAME' option.
diff --git a/doc/kernel.txt b/doc/kernel.txt
index a584f05..e94e98b 100644
--- a/doc/kernel.txt
+++ b/doc/kernel.txt
@@ -51,6 +51,22 @@
 
   http://fuse.sourceforge.net/
 
+Filesystem type
+~~~~~~~~~~~~~~~
+
+The filesystem type given to mount(2) can be one of the following:
+
+'fuse'
+
+  This is the usual way to mount a FUSE filesystem.  The first
+  argument of the mount system call may contain an arbitrary string,
+  which is not interpreted by the kernel.
+
+'fuseblk'
+
+  The filesystem is block device based.  The first argument of the
+  mount system call is interpreted as the name of the device.
+
 Mount options
 ~~~~~~~~~~~~~
 
@@ -94,6 +110,11 @@
   The default is infinite.  Note that the size of read requests is
   limited anyway to 32 pages (which is 128kbyte on i386).
 
+'blksize=N'
+
+  Set the block size for the filesystem.  The default is 512.  This
+  option is only valid for 'fuseblk' type mounts.
+
 Control filesystem
 ~~~~~~~~~~~~~~~~~~
 
diff --git a/include/fuse.h b/include/fuse.h
index 8237422..797f225 100644
--- a/include/fuse.h
+++ b/include/fuse.h
@@ -408,6 +408,16 @@
      * Introduced in version 2.6
      */
     int (*utimens) (const char *, const struct timespec tv[2]);
+
+    /**
+     * Map block index within file to block index within device
+     *
+     * Note: This makes sense only for block device backed filesystems
+     * mounted with the 'blkdev' option
+     *
+     * Introduced in version 2.6
+     */
+    int (*bmap) (const char *, size_t blocksize, uint64_t *idx);
 };
 
 /** Extra context that may be needed by some filesystems
diff --git a/include/fuse_lowlevel.h b/include/fuse_lowlevel.h
index 00b22bc..83e9739 100644
--- a/include/fuse_lowlevel.h
+++ b/include/fuse_lowlevel.h
@@ -733,6 +733,8 @@
     /**
      * Test for a POSIX file lock
      *
+     * Introduced in version 2.6
+     *
      * Valid replies:
      *   fuse_reply_lock
      *   fuse_reply_err
@@ -759,6 +761,8 @@
      * will still allow file locking to work locally.  Hence these are
      * only interesting for network filesystems and similar.
      *
+     * Introduced in version 2.6
+     *
      * Valid replies:
      *   fuse_reply_err
      *
@@ -771,6 +775,26 @@
      */
     void (*setlk) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
                    struct flock *lock, uint64_t owner, int sleep);
+
+    /**
+     * Map block index within file to block index within device
+     *
+     * Note: This makes sense only for block device backed filesystems
+     * mounted with the 'blkdev' option
+     *
+     * Introduced in version 2.6
+     *
+     * Valid replies:
+     *   fuse_reply_bmap
+     *   fuse_reply_err
+     *
+     * @param req request handle
+     * @param ino the inode number
+     * @param blocksize unit of block index
+     * @param idx block index within file
+     */
+    void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize,
+                  uint64_t idx);
 };
 
 /**
@@ -929,6 +953,18 @@
  */
 int fuse_reply_lock(fuse_req_t req, struct flock *lock);
 
+/**
+ * Reply with block index
+ *
+ * Possible requests:
+ *   bmap
+ *
+ * @param req request handle
+ * @param idx block index within device 
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_bmap(fuse_req_t req, uint64_t idx);
+
 /* ----------------------------------------------------------- *
  * Filling a buffer in readdir                                 *
  * ----------------------------------------------------------- */
diff --git a/kernel/file.c b/kernel/file.c
index 9e50109..fb381da 100644
--- a/kernel/file.c
+++ b/kernel/file.c
@@ -805,6 +805,42 @@
 	return err;
 }
 
+static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
+{
+	struct inode *inode = mapping->host;
+	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_req *req;
+	struct fuse_bmap_in inarg;
+	struct fuse_bmap_out outarg;
+	int err;
+
+	if (!inode->i_sb->s_bdev || fc->no_bmap)
+		return 0;
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req))
+		return 0;
+
+	memset(&inarg, 0, sizeof(inarg));
+	inarg.block = block;
+	inarg.blocksize = inode->i_sb->s_blocksize;
+	req->in.h.opcode = FUSE_BMAP;
+	req->in.h.nodeid = get_node_id(inode);
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	req->out.numargs = 1;
+	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].value = &outarg;
+	request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+	if (err == -ENOSYS)
+		fc->no_bmap = 1;
+
+	return err ? 0 : outarg.block;
+}
+
 static struct file_operations fuse_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_file_read,
@@ -836,6 +872,7 @@
 	.commit_write	= fuse_commit_write,
 	.readpages	= fuse_readpages,
 	.set_page_dirty	= fuse_set_page_dirty,
+	.bmap		= fuse_bmap,
 };
 
 void fuse_init_file_inode(struct inode *inode)
diff --git a/kernel/fuse_i.h b/kernel/fuse_i.h
index e44dd9a..0841e44 100644
--- a/kernel/fuse_i.h
+++ b/kernel/fuse_i.h
@@ -398,6 +398,9 @@
 	/** Is interrupt not implemented by fs? */
 	unsigned no_interrupt : 1;
 
+	/** Is bmap not implemented by fs? */
+	unsigned no_bmap : 1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
diff --git a/kernel/fuse_kernel.h b/kernel/fuse_kernel.h
index 179c7eb..936ea95 100644
--- a/kernel/fuse_kernel.h
+++ b/kernel/fuse_kernel.h
@@ -162,6 +162,7 @@
 	FUSE_ACCESS        = 34,
 	FUSE_CREATE        = 35,
 	FUSE_INTERRUPT     = 36,
+	FUSE_BMAP          = 37,
 };
 
 /* The read buffer is required to be at least 8k, but may be much larger */
@@ -331,6 +332,16 @@
 	__u64	unique;
 };
 
+struct fuse_bmap_in {
+	__u64	block;
+	__u32	blocksize;
+	__u32	padding;
+};
+
+struct fuse_bmap_out {
+	__u64	block;
+};
+
 struct fuse_in_header {
 	__u32	len;
 	__u32	opcode;
diff --git a/kernel/inode.c b/kernel/inode.c
index 2d4749e..7800892 100644
--- a/kernel/inode.c
+++ b/kernel/inode.c
@@ -44,6 +44,7 @@
 	unsigned group_id_present : 1;
 	unsigned flags;
 	unsigned max_read;
+	unsigned blksize;
 };
 
 static struct inode *fuse_alloc_inode(struct super_block *sb)
@@ -302,6 +303,7 @@
 	OPT_DEFAULT_PERMISSIONS,
 	OPT_ALLOW_OTHER,
 	OPT_MAX_READ,
+	OPT_BLKSIZE,
 	OPT_ERR
 };
 
@@ -313,14 +315,16 @@
 	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
 	{OPT_ALLOW_OTHER,		"allow_other"},
 	{OPT_MAX_READ,			"max_read=%u"},
+	{OPT_BLKSIZE,			"blksize=%u"},
 	{OPT_ERR,			NULL}
 };
 
-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
 {
 	char *p;
 	memset(d, 0, sizeof(struct fuse_mount_data));
 	d->max_read = ~0;
+	d->blksize = 512;
 
 	while ((p = strsep(&opt, ",")) != NULL) {
 		int token;
@@ -373,6 +377,12 @@
 			d->max_read = value;
 			break;
 
+		case OPT_BLKSIZE:
+			if (!is_bdev || match_int(&args[0], &value))
+				return 0;
+			d->blksize = value;
+			break;
+
 		default:
 			return 0;
 		}
@@ -599,15 +609,21 @@
 	struct dentry *root_dentry;
 	struct fuse_req *init_req;
 	int err;
+	int is_bdev = sb->s_bdev != NULL;
 
 	if (sb->s_flags & MS_MANDLOCK)
 		return -EINVAL;
 
-	if (!parse_fuse_opt((char *) data, &d))
+	if (!parse_fuse_opt((char *) data, &d, is_bdev))
 		return -EINVAL;
 
-	sb->s_blocksize = PAGE_CACHE_SIZE;
-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	if (is_bdev) {
+		if (!sb_set_blocksize(sb, d.blksize))
+			return -EINVAL;
+	} else {
+		sb->s_blocksize = PAGE_CACHE_SIZE;
+		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	}
 	sb->s_magic = FUSE_SUPER_MAGIC;
 	sb->s_op = &fuse_super_operations;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -693,6 +709,14 @@
 {
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
+
+static int fuse_get_sb_blk(struct file_system_type *fs_type,
+			   int flags, const char *dev_name,
+			   void *raw_data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
+			   mnt);
+}
 #else
 static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
 				       int flags, const char *dev_name,
@@ -700,6 +724,14 @@
 {
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
 }
+
+static struct super_block *fuse_get_sb_blk(struct file_system_type *fs_type,
+					   int flags, const char *dev_name,
+					   void *raw_data)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, raw_data,
+			   fuse_fill_super);
+}
 #endif
 
 static struct file_system_type fuse_fs_type = {
@@ -709,6 +741,14 @@
 	.kill_sb	= kill_anon_super,
 };
 
+static struct file_system_type fuseblk_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "fuseblk",
+	.get_sb		= fuse_get_sb_blk,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+
 #ifndef HAVE_FS_SUBSYS
 static decl_subsys(fs, NULL, NULL);
 #endif
@@ -731,24 +771,34 @@
 
 	err = register_filesystem(&fuse_fs_type);
 	if (err)
-		printk("fuse: failed to register filesystem\n");
-	else {
-		fuse_inode_cachep = kmem_cache_create("fuse_inode",
-						      sizeof(struct fuse_inode),
-						      0, SLAB_HWCACHE_ALIGN,
-						      fuse_inode_init_once, NULL);
-		if (!fuse_inode_cachep) {
-			unregister_filesystem(&fuse_fs_type);
-			err = -ENOMEM;
-		}
-	}
+		goto out;
 
+	err = register_filesystem(&fuseblk_fs_type);
+	if (err)
+		goto out_unreg;
+
+	fuse_inode_cachep = kmem_cache_create("fuse_inode",
+					      sizeof(struct fuse_inode),
+					      0, SLAB_HWCACHE_ALIGN,
+					      fuse_inode_init_once, NULL);
+	err = -ENOMEM;
+	if (!fuse_inode_cachep)
+		goto out_unreg2;
+
+	return 0;
+
+ out_unreg2:
+	unregister_filesystem(&fuseblk_fs_type);
+ out_unreg:
+	unregister_filesystem(&fuse_fs_type);
+ out:
 	return err;
 }
 
 static void fuse_fs_cleanup(void)
 {
 	unregister_filesystem(&fuse_fs_type);
+	unregister_filesystem(&fuseblk_fs_type);
 	kmem_cache_destroy(fuse_inode_cachep);
 }
 
diff --git a/lib/fuse.c b/lib/fuse.c
index 14789b7..0e3e1d1 100644
--- a/lib/fuse.c
+++ b/lib/fuse.c
@@ -2371,6 +2371,29 @@
     reply_err(req, err);
 }
 
+static void fuse_bmap(fuse_req_t req, fuse_ino_t ino, size_t blocksize,
+                      uint64_t idx)
+{
+    struct fuse *f = req_fuse_prepare(req);
+    char *path;
+    int err;
+
+    err = -ENOENT;
+    pthread_rwlock_rdlock(&f->tree_lock);
+    path = get_path(f, ino);
+    if (path != NULL) {
+        err = -ENOSYS;
+        if (f->op.bmap)
+            err = f->op.bmap(path, blocksize, &idx);
+        free(path);
+    }
+    pthread_rwlock_unlock(&f->tree_lock);
+    if (!err)
+        fuse_reply_bmap(req, idx);
+    else
+        reply_err(req, err);
+}
+
 static struct fuse_lowlevel_ops fuse_path_ops = {
     .init = fuse_data_init,
     .destroy = fuse_data_destroy,
@@ -2405,6 +2428,7 @@
     .removexattr = fuse_removexattr,
     .getlk = fuse_getlk,
     .setlk = fuse_setlk,
+    .bmap = fuse_bmap,
 };
 
 static void free_cmd(struct fuse_cmd *cmd)
diff --git a/lib/fuse_lowlevel.c b/lib/fuse_lowlevel.c
index f014e6e..8ea6779 100644
--- a/lib/fuse_lowlevel.c
+++ b/lib/fuse_lowlevel.c
@@ -403,6 +403,16 @@
     return send_reply_ok(req, &arg, sizeof(arg));
 }
 
+int fuse_reply_bmap(fuse_req_t req, uint64_t idx)
+{
+    struct fuse_bmap_out arg;
+
+    memset(&arg, 0, sizeof(arg));
+    arg.block = idx;
+
+    return send_reply_ok(req, &arg, sizeof(arg));
+}
+
 static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
 {
     char *name = (char *) inarg;
@@ -907,6 +917,16 @@
         return NULL;
 }
 
+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
+{
+    struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg;
+
+    if (req->f->op.bmap)
+        req->f->op.bmap(req, nodeid, arg->blocksize, arg->block);
+    else
+        fuse_reply_err(req, ENOSYS);
+}
+
 static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
 {
     struct fuse_init_in *arg = (struct fuse_init_in *) inarg;
@@ -1040,6 +1060,7 @@
     [FUSE_ACCESS]      = { do_access,      "ACCESS"      },
     [FUSE_CREATE]      = { do_create,      "CREATE"      },
     [FUSE_INTERRUPT]   = { do_interrupt,   "INTERRUPT"   },
+    [FUSE_BMAP]        = { do_bmap,        "BMAP"        },
 };
 
 #define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
diff --git a/lib/mount.c b/lib/mount.c
index 8ac9787..2ed0381 100644
--- a/lib/mount.c
+++ b/lib/mount.c
@@ -49,6 +49,8 @@
     FUSE_OPT_KEY("allow_other",         KEY_KERN),
     FUSE_OPT_KEY("allow_root",          KEY_ALLOW_ROOT),
     FUSE_OPT_KEY("nonempty",            KEY_KERN),
+    FUSE_OPT_KEY("blkdev",              KEY_KERN),
+    FUSE_OPT_KEY("blksize=",            KEY_KERN),
     FUSE_OPT_KEY("default_permissions", KEY_KERN),
     FUSE_OPT_KEY("fsname=",             KEY_KERN),
     FUSE_OPT_KEY("large_read",          KEY_KERN),
diff --git a/util/fusermount.c b/util/fusermount.c
index 46d0b6b..1843f50 100644
--- a/util/fusermount.c
+++ b/util/fusermount.c
@@ -257,7 +257,8 @@
     while ((entp = getmntent(fp)) != NULL) {
         int removed = 0;
         if (!found && strcmp(entp->mnt_dir, mnt) == 0 &&
-           strcmp(entp->mnt_type, "fuse") == 0) {
+            (strcmp(entp->mnt_type, "fuse") == 0 ||
+             strcmp(entp->mnt_type, "fuseblk") == 0)) {
             if (user == NULL)
                 removed = 1;
             else {
@@ -567,7 +568,7 @@
     return 0;
 }
 
-static int do_mount(const char *mnt, const char *type, mode_t rootmode,
+static int do_mount(const char *mnt, const char **type, mode_t rootmode,
                     int fd, const char *opts, const char *dev, char **fsnamep,
                     char **mnt_optsp, off_t rootsize)
 {
@@ -579,6 +580,7 @@
     char *d;
     char *fsname = NULL;
     int check_empty = 1;
+    int blkdev = 0;
 
     optbuf = (char *) malloc(strlen(opts) + 128);
     if (!optbuf) {
@@ -601,6 +603,12 @@
             }
             memcpy(fsname, s + fsname_str_len, len - fsname_str_len);
             fsname[len - fsname_str_len] = '\0';
+        } else if (opt_eq(s, len, "blkdev")) {
+            if (getuid() != 0) {
+                fprintf(stderr, "%s: option blkdev is privileged\n", progname);
+                goto err;
+            }
+            blkdev = 1;
         } else if (opt_eq(s, len, "nonempty")) {
             check_empty = 0;
         } else if (!begins_with(s, "fd=") &&
@@ -662,11 +670,13 @@
     if (check_empty && check_mountpoint_empty(mnt, rootmode, rootsize) == -1)
         goto err;
 
-    res = mount(fsname, mnt, type, flags, optbuf);
+    if (blkdev)
+        *type = "fuseblk";
+    res = mount(fsname, mnt, *type, flags, optbuf);
     if (res == -1 && errno == EINVAL) {
         /* It could be an old version not supporting group_id */
         sprintf(d, "fd=%i,rootmode=%o,user_id=%i", fd, rootmode, getuid());
-        res = mount(fsname, mnt, type, flags, optbuf);
+        res = mount(fsname, mnt, *type, flags, optbuf);
     }
     if (res == -1) {
         fprintf(stderr, "%s: mount failed: %s\n", progname, strerror(errno));
@@ -906,7 +916,7 @@
         res = check_perm(&real_mnt, &stbuf, &currdir_fd, &mountpoint_fd);
         restore_privs();
         if (res != -1)
-            res = do_mount(real_mnt, type, stbuf.st_mode & S_IFMT, fd, opts,
+            res = do_mount(real_mnt, &type, stbuf.st_mode & S_IFMT, fd, opts,
                            dev, &fsname, &mnt_opts, stbuf.st_size);
     } else
         restore_privs();