block: fix diskstats access

There are two variants of stat functions - ones prefixed with double
underbars which don't care about preemption and ones without which
disable preemption before manipulating per-cpu counters.  It's unclear
whether the underbarred ones assume that preemtion is disabled on
entry as some callers don't do that.

This patch unifies diskstats access by implementing disk_stat_lock()
and disk_stat_unlock() which take care of both RCU (for partition
access) and preemption (for per-cpu counter access).  diskstats access
should always be enclosed between the two functions.  As such, there's
no need for the versions which disables preemption.  They're removed
and double underbars ones are renamed to drop the underbars.  As an
extra argument is added, there's no danger of using the old version
unconverted.

disk_stat_lock() uses get_cpu() and returns the cpu index and all
diskstat functions which access per-cpu counters now has @cpu
argument to help RT.

This change adds RCU or preemption operations at some places but also
collapses several preemption ops into one at others.  Overall, the
performance difference should be negligible as all involved ops are
very lightweight per-cpu ones.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 84c03d6..17eed8c 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -756,16 +756,17 @@
 	unsigned long n_sect = bio->bi_size >> 9;
 	const int rw = bio_data_dir(bio);
 	struct hd_struct *part;
+	int cpu;
 
-	rcu_read_lock();
-
+	cpu = disk_stat_lock();
 	part = disk_map_sector_rcu(disk, sector);
-	all_stat_inc(disk, part, ios[rw], sector);
-	all_stat_add(disk, part, ticks[rw], duration, sector);
-	all_stat_add(disk, part, sectors[rw], n_sect, sector);
-	all_stat_add(disk, part, io_ticks, duration, sector);
 
-	rcu_read_unlock();
+	all_stat_inc(cpu, disk, part, ios[rw], sector);
+	all_stat_add(cpu, disk, part, ticks[rw], duration, sector);
+	all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector);
+	all_stat_add(cpu, disk, part, io_ticks, duration, sector);
+
+	disk_stat_unlock();
 }
 
 void
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a78caad..6536247 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -377,12 +377,13 @@
 static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
+	int cpu;
 
 	io->start_time = jiffies;
 
-	preempt_disable();
-	disk_round_stats(dm_disk(md));
-	preempt_enable();
+	cpu = disk_stat_lock();
+	disk_round_stats(cpu, dm_disk(md));
+	disk_stat_unlock();
 	dm_disk(md)->in_flight = atomic_inc_return(&md->pending);
 }
 
@@ -391,15 +392,15 @@
 	struct mapped_device *md = io->md;
 	struct bio *bio = io->bio;
 	unsigned long duration = jiffies - io->start_time;
-	int pending;
+	int pending, cpu;
 	int rw = bio_data_dir(bio);
 
-	preempt_disable();
-	disk_round_stats(dm_disk(md));
-	preempt_enable();
-	dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending);
+	cpu = disk_stat_lock();
+	disk_round_stats(cpu, dm_disk(md));
+	disk_stat_add(cpu, dm_disk(md), ticks[rw], duration);
+	disk_stat_unlock();
 
-	disk_stat_add(dm_disk(md), ticks[rw], duration);
+	dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending);
 
 	return !pending;
 }
@@ -885,6 +886,7 @@
 	int r = -EIO;
 	int rw = bio_data_dir(bio);
 	struct mapped_device *md = q->queuedata;
+	int cpu;
 
 	/*
 	 * There is no use in forwarding any barrier request since we can't
@@ -897,8 +899,10 @@
 
 	down_read(&md->io_lock);
 
-	disk_stat_inc(dm_disk(md), ios[rw]);
-	disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio));
+	cpu = disk_stat_lock();
+	disk_stat_inc(cpu, dm_disk(md), ios[rw]);
+	disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio));
+	disk_stat_unlock();
 
 	/*
 	 * If we're suspended we have to queue
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index b1eebf8..00cbc8e 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -318,14 +318,17 @@
 	mddev_t *mddev = q->queuedata;
 	dev_info_t *tmp_dev;
 	sector_t block;
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = disk_stat_lock();
+	disk_stat_inc(cpu, mddev->gendisk, ios[rw]);
+	disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio));
+	disk_stat_unlock();
 
 	tmp_dev = which_dev(mddev, bio->bi_sector);
 	block = bio->bi_sector >> 1;
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index c4779cc..182f5a9 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -147,6 +147,7 @@
 	struct multipath_bh * mp_bh;
 	struct multipath_info *multipath;
 	const int rw = bio_data_dir(bio);
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
@@ -158,8 +159,10 @@
 	mp_bh->master_bio = bio;
 	mp_bh->mddev = mddev;
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = disk_stat_lock();
+	disk_stat_inc(cpu, mddev->gendisk, ios[rw]);
+	disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio));
+	disk_stat_unlock();
 
 	mp_bh->path = multipath_map(conf);
 	if (mp_bh->path < 0) {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 1836106..e26030f 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -399,14 +399,17 @@
 	sector_t chunk;
 	sector_t block, rsect;
 	const int rw = bio_data_dir(bio);
+	int cpu;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = disk_stat_lock();
+	disk_stat_inc(cpu, mddev->gendisk, ios[rw]);
+	disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio));
+	disk_stat_unlock();
 
 	chunk_size = mddev->chunk_size >> 10;
 	chunk_sects = mddev->chunk_size >> 9;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 0b82030..babb130 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -779,7 +779,7 @@
 	struct page **behind_pages = NULL;
 	const int rw = bio_data_dir(bio);
 	const int do_sync = bio_sync(bio);
-	int do_barriers;
+	int cpu, do_barriers;
 	mdk_rdev_t *blocked_rdev;
 
 	/*
@@ -804,8 +804,10 @@
 
 	bitmap = mddev->bitmap;
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = disk_stat_lock();
+	disk_stat_inc(cpu, mddev->gendisk, ios[rw]);
+	disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio));
+	disk_stat_unlock();
 
 	/*
 	 * make_request() can abort the operation when READA is being
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index d3b9aa0..5ec80da 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -789,6 +789,7 @@
 	mirror_info_t *mirror;
 	r10bio_t *r10_bio;
 	struct bio *read_bio;
+	int cpu;
 	int i;
 	int chunk_sects = conf->chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
@@ -843,8 +844,10 @@
 	 */
 	wait_barrier(conf);
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
+	cpu = disk_stat_lock();
+	disk_stat_inc(cpu, mddev->gendisk, ios[rw]);
+	disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bio));
+	disk_stat_unlock();
 
 	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 37e5465..5899f21 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3387,7 +3387,7 @@
 	sector_t logical_sector, last_sector;
 	struct stripe_head *sh;
 	const int rw = bio_data_dir(bi);
-	int remaining;
+	int cpu, remaining;
 
 	if (unlikely(bio_barrier(bi))) {
 		bio_endio(bi, -EOPNOTSUPP);
@@ -3396,8 +3396,10 @@
 
 	md_write_start(mddev, bi);
 
-	disk_stat_inc(mddev->gendisk, ios[rw]);
-	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi));
+	cpu = disk_stat_lock();
+	disk_stat_inc(cpu, mddev->gendisk, ios[rw]);
+	disk_stat_add(cpu, mddev->gendisk, sectors[rw], bio_sectors(bi));
+	disk_stat_unlock();
 
 	if (rw == READ &&
 	     mddev->reshape_position == MaxSector &&