Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block
Pull block IO update from Jens Axboe:
"Core block IO bits for 3.7. Not a huge round this time, it contains:
- First series from Kent cleaning up and generalizing bio allocation
and freeing.
- WRITE_SAME support from Martin.
- Mikulas patches to prevent O_DIRECT crashes when someone changes
the block size of a device.
- Make bio_split() work on data-less bio's (like trim/discards).
- A few other minor fixups."
Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew
Morton. It is due to the VM no longer using a prio-tree (see commit
6b2dbba8b6ac: "mm: replace vma prio_tree with an interval tree").
So make set_blocksize() use mapping_mapped() instead of open-coding the
internal VM knowledge that has changed.
* 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits)
block: makes bio_split support bio without data
scatterlist: refactor the sg_nents
scatterlist: add sg_nents
fs: fix include/percpu-rwsem.h export error
percpu-rw-semaphore: fix documentation typos
fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared
blockdev: turn a rw semaphore into a percpu rw semaphore
Fix a crash when block device is read and block size is changed at the same time
block: fix request_queue->flags initialization
block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue()
block: ioctl to zero block ranges
block: Make blkdev_issue_zeroout use WRITE SAME
block: Implement support for WRITE SAME
block: Consolidate command flag and queue limit checks for merges
block: Clean up special command handling logic
block/blk-tag.c: Remove useless kfree
block: remove the duplicated setting for congestion_threshold
block: reject invalid queue attribute values
block: Add bio_clone_bioset(), bio_clone_kmalloc()
block: Consolidate bio_alloc_bioset(), bio_kmalloc()
...
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 38e721b..b3c1d3d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -116,6 +116,8 @@
int set_blocksize(struct block_device *bdev, int size)
{
+ struct address_space *mapping;
+
/* Size must be a power of two, and between 512 and PAGE_SIZE */
if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
return -EINVAL;
@@ -124,6 +126,19 @@
if (size < bdev_logical_block_size(bdev))
return -EINVAL;
+ /* Prevent starting I/O or mapping the device */
+ percpu_down_write(&bdev->bd_block_size_semaphore);
+
+ /* Check that the block device is not memory mapped */
+ mapping = bdev->bd_inode->i_mapping;
+ mutex_lock(&mapping->i_mmap_mutex);
+ if (mapping_mapped(mapping)) {
+ mutex_unlock(&mapping->i_mmap_mutex);
+ percpu_up_write(&bdev->bd_block_size_semaphore);
+ return -EBUSY;
+ }
+ mutex_unlock(&mapping->i_mmap_mutex);
+
/* Don't change the size if it is same as current */
if (bdev->bd_block_size != size) {
sync_blockdev(bdev);
@@ -131,6 +146,9 @@
bdev->bd_inode->i_blkbits = blksize_bits(size);
kill_bdev(bdev);
}
+
+ percpu_up_write(&bdev->bd_block_size_semaphore);
+
return 0;
}
@@ -441,6 +459,12 @@
struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
if (!ei)
return NULL;
+
+ if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
+ kmem_cache_free(bdev_cachep, ei);
+ return NULL;
+ }
+
return &ei->vfs_inode;
}
@@ -449,6 +473,8 @@
struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
+ percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
+
kmem_cache_free(bdev_cachep, bdi);
}
@@ -1567,6 +1593,22 @@
return blkdev_ioctl(bdev, mode, cmd, arg);
}
+ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ ssize_t ret;
+ struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
+
+ percpu_down_read(&bdev->bd_block_size_semaphore);
+
+ ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+
+ percpu_up_read(&bdev->bd_block_size_semaphore);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_aio_read);
+
/*
* Write data to the block device. Only intended for the block device itself
* and the raw driver which basically is a fake block device.
@@ -1578,12 +1620,16 @@
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
+ struct block_device *bdev = I_BDEV(file->f_mapping->host);
struct blk_plug plug;
ssize_t ret;
BUG_ON(iocb->ki_pos != pos);
blk_start_plug(&plug);
+
+ percpu_down_read(&bdev->bd_block_size_semaphore);
+
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
@@ -1592,11 +1638,29 @@
if (err < 0 && ret > 0)
ret = err;
}
+
+ percpu_up_read(&bdev->bd_block_size_semaphore);
+
blk_finish_plug(&plug);
+
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_aio_write);
+static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int ret;
+ struct block_device *bdev = I_BDEV(file->f_mapping->host);
+
+ percpu_down_read(&bdev->bd_block_size_semaphore);
+
+ ret = generic_file_mmap(file, vma);
+
+ percpu_up_read(&bdev->bd_block_size_semaphore);
+
+ return ret;
+}
+
/*
* Try to release a page associated with block device when the system
* is under memory pressure.
@@ -1627,9 +1691,9 @@
.llseek = block_llseek,
.read = do_sync_read,
.write = do_sync_write,
- .aio_read = generic_file_aio_read,
+ .aio_read = blkdev_aio_read,
.aio_write = blkdev_aio_write,
- .mmap = generic_file_mmap,
+ .mmap = blkdev_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
#ifdef CONFIG_COMPAT