block: add partition resize function to blkpg ioctl

Add a new operation code (BLKPG_RESIZE_PARTITION) to the BLKPG ioctl that
allows altering the size of an existing partition, even if it is currently
in use.

This patch converts hd_struct->nr_sects into sequence counter because
One might extend a partition while IO is happening to it and update of
nr_sects can be non-atomic on 32bit machines with 64bit sector_t. This
can lead to issues like reading inconsistent size of a partition. Sequence
counter have been used so that readers don't have to take bdev mutex lock
as we call sector_in_part() very frequently.

Now all the access to hd_struct->nr_sects should happen using sequence
counter read/update helper functions part_nr_sects_read/part_nr_sects_write.
There is one exception though, set_capacity()/get_capacity(). I think
theoritically race should exist there too but this patch does not
modify set_capacity()/get_capacity() due to sheer number of call sites
and I am afraid that change might break something. I have left that as a
TODO item. We can handle it later if need be. This patch does not introduce
any new races as such w.r.t set_capacity()/get_capacity().

v2: Add CONFIG_LBDAF test to UP preempt case as suggested by Phillip.

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Phillip Susi <psusi@ubuntu.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 017a7fb..b88723b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,13 @@
 
 struct hd_struct {
 	sector_t start_sect;
+	/*
+	 * nr_sects is protected by sequence counter. One might extend a
+	 * partition while IO is happening to it and update of nr_sects
+	 * can be non-atomic on 32bit machines with 64bit sector_t.
+	 */
 	sector_t nr_sects;
+	seqcount_t nr_sects_seq;
 	sector_t alignment_offset;
 	unsigned int discard_alignment;
 	struct device __dev;
@@ -648,6 +654,57 @@
 		__delete_partition(part);
 }
 
+/*
+ * Any access of part->nr_sects which is not protected by partition
+ * bd_mutex or gendisk bdev bd_mutex, should be done using this
+ * accessor function.
+ *
+ * Code written along the lines of i_size_read() and i_size_write().
+ * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
+ * on.
+ */
+static inline sector_t part_nr_sects_read(struct hd_struct *part)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+	sector_t nr_sects;
+	unsigned seq;
+	do {
+		seq = read_seqcount_begin(&part->nr_sects_seq);
+		nr_sects = part->nr_sects;
+	} while (read_seqcount_retry(&part->nr_sects_seq, seq));
+	return nr_sects;
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+	sector_t nr_sects;
+
+	preempt_disable();
+	nr_sects = part->nr_sects;
+	preempt_enable();
+	return nr_sects;
+#else
+	return part->nr_sects;
+#endif
+}
+
+/*
+ * Should be called with mutex lock held (typically bd_mutex) of partition
+ * to provide mutual exlusion among writers otherwise seqcount might be
+ * left in wrong state leaving the readers spinning infinitely.
+ */
+static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
+	write_seqcount_begin(&part->nr_sects_seq);
+	part->nr_sects = size;
+	write_seqcount_end(&part->nr_sects_seq);
+#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
+	preempt_disable();
+	part->nr_sects = size;
+	preempt_enable();
+#else
+	part->nr_sects = size;
+#endif
+}
+
 #else /* CONFIG_BLOCK */
 
 static inline void printk_all_partitions(void) { }