Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
Block: use round_jiffies_up()
Add round_jiffies_up and related routines
block: fix __blkdev_get() for removable devices
generic-ipi: fix the smp_mb() placement
blk: move blk_delete_timer call in end_that_request_last
block: add timer on blkdev_dequeue_request() not elv_next_request()
bio: define __BIOVEC_PHYS_MERGEABLE
block: remove unused ll_new_mergeable()
diff --git a/block/blk-core.c b/block/blk-core.c
index c3df30c..10e8a64 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1770,8 +1770,6 @@
{
struct gendisk *disk = req->rq_disk;
- blk_delete_timer(req);
-
if (blk_rq_tagged(req))
blk_queue_end_tag(req->q, req);
@@ -1781,6 +1779,8 @@
if (unlikely(laptop_mode) && blk_fs_request(req))
laptop_io_completion();
+ blk_delete_timer(req);
+
/*
* Account IO completion. bar_rq isn't accounted as a normal
* IO on queueing nor completion. Accounting the containing
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8681cd6..b92f5b0 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -222,27 +222,6 @@
}
EXPORT_SYMBOL(blk_rq_map_sg);
-static inline int ll_new_mergeable(struct request_queue *q,
- struct request *req,
- struct bio *bio)
-{
- int nr_phys_segs = bio_phys_segments(q, bio);
-
- if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
- req->cmd_flags |= REQ_NOMERGE;
- if (req == q->last_merge)
- q->last_merge = NULL;
- return 0;
- }
-
- /*
- * A hw segment is just getting larger, bump just the phys
- * counter.
- */
- req->nr_phys_segments += nr_phys_segs;
- return 1;
-}
-
static inline int ll_new_hw_segment(struct request_queue *q,
struct request *req,
struct bio *bio)
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 972a63f..69185ea 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -75,14 +75,7 @@
{
struct request_queue *q = req->q;
- /*
- * Nothing to detach
- */
- if (!q->rq_timed_out_fn || !req->deadline)
- return;
-
list_del_init(&req->timeout_list);
-
if (list_empty(&q->timeout_list))
del_timer(&q->timeout);
}
@@ -142,7 +135,7 @@
}
if (next_set && !list_empty(&q->timeout_list))
- mod_timer(&q->timeout, round_jiffies(next));
+ mod_timer(&q->timeout, round_jiffies_up(next));
spin_unlock_irqrestore(q->queue_lock, flags);
}
@@ -198,17 +191,10 @@
/*
* If the timer isn't already pending or this timeout is earlier
- * than an existing one, modify the timer. Round to next nearest
+ * than an existing one, modify the timer. Round up to next nearest
* second.
*/
- expiry = round_jiffies(req->deadline);
-
- /*
- * We use ->deadline == 0 to detect whether a timer was added or
- * not, so just increase to next jiffy for that specific case
- */
- if (unlikely(!req->deadline))
- req->deadline = 1;
+ expiry = round_jiffies_up(req->deadline);
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires))
diff --git a/block/elevator.c b/block/elevator.c
index 59173a6..9ac82dd 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -773,12 +773,6 @@
*/
rq->cmd_flags |= REQ_STARTED;
blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
-
- /*
- * We are now handing the request to the hardware,
- * add the timeout handler
- */
- blk_add_timer(rq);
}
if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -850,6 +844,12 @@
*/
if (blk_account_rq(rq))
q->in_flight++;
+
+ /*
+ * We are now handing the request to the hardware, add the
+ * timeout handler.
+ */
+ blk_add_timer(rq);
}
EXPORT_SYMBOL(elv_dequeue_request);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 88a776f..db831ef 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -986,7 +986,6 @@
static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
{
struct gendisk *disk;
- struct hd_struct *part = NULL;
int ret;
int partno;
int perm = 0;
@@ -1004,24 +1003,25 @@
return ret;
}
- ret = -ENXIO;
-
lock_kernel();
+ ret = -ENXIO;
disk = get_gendisk(bdev->bd_dev, &partno);
if (!disk)
goto out_unlock_kernel;
- part = disk_get_part(disk, partno);
- if (!part)
- goto out_unlock_kernel;
mutex_lock_nested(&bdev->bd_mutex, for_part);
if (!bdev->bd_openers) {
bdev->bd_disk = disk;
- bdev->bd_part = part;
bdev->bd_contains = bdev;
if (!partno) {
struct backing_dev_info *bdi;
+
+ ret = -ENXIO;
+ bdev->bd_part = disk_get_part(disk, partno);
+ if (!bdev->bd_part)
+ goto out_clear;
+
if (disk->fops->open) {
ret = disk->fops->open(bdev, mode);
if (ret)
@@ -1049,18 +1049,17 @@
bdev->bd_contains = whole;
bdev->bd_inode->i_data.backing_dev_info =
whole->bd_inode->i_data.backing_dev_info;
+ bdev->bd_part = disk_get_part(disk, partno);
if (!(disk->flags & GENHD_FL_UP) ||
- !part || !part->nr_sects) {
+ !bdev->bd_part || !bdev->bd_part->nr_sects) {
ret = -ENXIO;
goto out_clear;
}
- bd_set_size(bdev, (loff_t)part->nr_sects << 9);
+ bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
}
} else {
- disk_put_part(part);
put_disk(disk);
module_put(disk->fops->owner);
- part = NULL;
disk = NULL;
if (bdev->bd_contains == bdev) {
if (bdev->bd_disk->fops->open) {
@@ -1080,6 +1079,7 @@
return 0;
out_clear:
+ disk_put_part(bdev->bd_part);
bdev->bd_disk = NULL;
bdev->bd_part = NULL;
bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
@@ -1091,7 +1091,6 @@
out_unlock_kernel:
unlock_kernel();
- disk_put_part(part);
if (disk)
module_put(disk->fops->owner);
put_disk(disk);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1c91a17..6a64209 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -236,12 +236,16 @@
#define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1)
#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx)
+/* Default implementation of BIOVEC_PHYS_MERGEABLE */
+#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
+ ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
+
/*
* allow arch override, for eg virtualized architectures (put in asm/io.h)
*/
#ifndef BIOVEC_PHYS_MERGEABLE
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
- ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
+ __BIOVEC_PHYS_MERGEABLE(vec1, vec2)
#endif
#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
diff --git a/include/linux/timer.h b/include/linux/timer.h
index d4ba792..daf9685 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -186,4 +186,9 @@
unsigned long round_jiffies(unsigned long j);
unsigned long round_jiffies_relative(unsigned long j);
+unsigned long __round_jiffies_up(unsigned long j, int cpu);
+unsigned long __round_jiffies_up_relative(unsigned long j, int cpu);
+unsigned long round_jiffies_up(unsigned long j);
+unsigned long round_jiffies_up_relative(unsigned long j);
+
#endif
diff --git a/kernel/smp.c b/kernel/smp.c
index f362a85..75c8dde 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -51,10 +51,6 @@
{
/* Wait for response */
do {
- /*
- * We need to see the flags store in the IPI handler
- */
- smp_mb();
if (!(data->flags & CSD_FLAG_WAIT))
break;
cpu_relax();
@@ -76,6 +72,11 @@
list_add_tail(&data->list, &dst->list);
spin_unlock_irqrestore(&dst->lock, flags);
+ /*
+ * Make the list addition visible before sending the ipi.
+ */
+ smp_mb();
+
if (ipi)
arch_send_call_function_single_ipi(cpu);
@@ -157,7 +158,7 @@
* Need to see other stores to list head for checking whether
* list is empty without holding q->lock
*/
- smp_mb();
+ smp_read_barrier_depends();
while (!list_empty(&q->list)) {
unsigned int data_flags;
@@ -191,7 +192,7 @@
/*
* See comment on outer loop
*/
- smp_mb();
+ smp_read_barrier_depends();
}
}
@@ -370,6 +371,11 @@
list_add_tail_rcu(&data->csd.list, &call_function_queue);
spin_unlock_irqrestore(&call_function_lock, flags);
+ /*
+ * Make the list addition visible before sending the ipi.
+ */
+ smp_mb();
+
/* Send a message to all CPUs in the map */
arch_send_call_function_ipi(mask);
diff --git a/kernel/timer.c b/kernel/timer.c
index 56becf3..dbd50fa 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -112,6 +112,44 @@
tbase_get_deferrable(timer->base));
}
+static unsigned long round_jiffies_common(unsigned long j, int cpu,
+ bool force_up)
+{
+ int rem;
+ unsigned long original = j;
+
+ /*
+ * We don't want all cpus firing their timers at once hitting the
+ * same lock or cachelines, so we skew each extra cpu with an extra
+ * 3 jiffies. This 3 jiffies came originally from the mm/ code which
+ * already did this.
+ * The skew is done by adding 3*cpunr, then round, then subtract this
+ * extra offset again.
+ */
+ j += cpu * 3;
+
+ rem = j % HZ;
+
+ /*
+ * If the target jiffie is just after a whole second (which can happen
+ * due to delays of the timer irq, long irq off times etc etc) then
+ * we should round down to the whole second, not up. Use 1/4th second
+ * as cutoff for this rounding as an extreme upper bound for this.
+ * But never round down if @force_up is set.
+ */
+ if (rem < HZ/4 && !force_up) /* round down */
+ j = j - rem;
+ else /* round up */
+ j = j - rem + HZ;
+
+ /* now that we have rounded, subtract the extra skew again */
+ j -= cpu * 3;
+
+ if (j <= jiffies) /* rounding ate our timeout entirely; */
+ return original;
+ return j;
+}
+
/**
* __round_jiffies - function to round jiffies to a full second
* @j: the time in (absolute) jiffies that should be rounded
@@ -134,38 +172,7 @@
*/
unsigned long __round_jiffies(unsigned long j, int cpu)
{
- int rem;
- unsigned long original = j;
-
- /*
- * We don't want all cpus firing their timers at once hitting the
- * same lock or cachelines, so we skew each extra cpu with an extra
- * 3 jiffies. This 3 jiffies came originally from the mm/ code which
- * already did this.
- * The skew is done by adding 3*cpunr, then round, then subtract this
- * extra offset again.
- */
- j += cpu * 3;
-
- rem = j % HZ;
-
- /*
- * If the target jiffie is just after a whole second (which can happen
- * due to delays of the timer irq, long irq off times etc etc) then
- * we should round down to the whole second, not up. Use 1/4th second
- * as cutoff for this rounding as an extreme upper bound for this.
- */
- if (rem < HZ/4) /* round down */
- j = j - rem;
- else /* round up */
- j = j - rem + HZ;
-
- /* now that we have rounded, subtract the extra skew again */
- j -= cpu * 3;
-
- if (j <= jiffies) /* rounding ate our timeout entirely; */
- return original;
- return j;
+ return round_jiffies_common(j, cpu, false);
}
EXPORT_SYMBOL_GPL(__round_jiffies);
@@ -191,13 +198,10 @@
*/
unsigned long __round_jiffies_relative(unsigned long j, int cpu)
{
- /*
- * In theory the following code can skip a jiffy in case jiffies
- * increments right between the addition and the later subtraction.
- * However since the entire point of this function is to use approximate
- * timeouts, it's entirely ok to not handle that.
- */
- return __round_jiffies(j + jiffies, cpu) - jiffies;
+ unsigned long j0 = jiffies;
+
+ /* Use j0 because jiffies might change while we run */
+ return round_jiffies_common(j + j0, cpu, false) - j0;
}
EXPORT_SYMBOL_GPL(__round_jiffies_relative);
@@ -218,7 +222,7 @@
*/
unsigned long round_jiffies(unsigned long j)
{
- return __round_jiffies(j, raw_smp_processor_id());
+ return round_jiffies_common(j, raw_smp_processor_id(), false);
}
EXPORT_SYMBOL_GPL(round_jiffies);
@@ -243,6 +247,71 @@
}
EXPORT_SYMBOL_GPL(round_jiffies_relative);
+/**
+ * __round_jiffies_up - function to round jiffies up to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * This is the same as __round_jiffies() except that it will never
+ * round down. This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long __round_jiffies_up(unsigned long j, int cpu)
+{
+ return round_jiffies_common(j, cpu, true);
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_up);
+
+/**
+ * __round_jiffies_up_relative - function to round jiffies up to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ * @cpu: the processor number on which the timeout will happen
+ *
+ * This is the same as __round_jiffies_relative() except that it will never
+ * round down. This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
+{
+ unsigned long j0 = jiffies;
+
+ /* Use j0 because jiffies might change while we run */
+ return round_jiffies_common(j + j0, cpu, true) - j0;
+}
+EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
+
+/**
+ * round_jiffies_up - function to round jiffies up to a full second
+ * @j: the time in (absolute) jiffies that should be rounded
+ *
+ * This is the same as round_jiffies() except that it will never
+ * round down. This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long round_jiffies_up(unsigned long j)
+{
+ return round_jiffies_common(j, raw_smp_processor_id(), true);
+}
+EXPORT_SYMBOL_GPL(round_jiffies_up);
+
+/**
+ * round_jiffies_up_relative - function to round jiffies up to a full second
+ * @j: the time in (relative) jiffies that should be rounded
+ *
+ * This is the same as round_jiffies_relative() except that it will never
+ * round down. This is useful for timeouts for which the exact time
+ * of firing does not matter too much, as long as they don't fire too
+ * early.
+ */
+unsigned long round_jiffies_up_relative(unsigned long j)
+{
+ return __round_jiffies_up_relative(j, raw_smp_processor_id());
+}
+EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
+
static inline void set_running_timer(struct tvec_base *base,
struct timer_list *timer)