Merge branch 'blk-softirq' of git://brick.kernel.dk/data/git/linux-2.6-block
Manual merge for trivial #include changes
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c44d6fe..8e27d0a 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -26,6 +26,8 @@
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/writeback.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
/*
* for max sense size
@@ -61,13 +63,15 @@
/*
* Controlling structure to kblockd
*/
-static struct workqueue_struct *kblockd_workqueue;
+static struct workqueue_struct *kblockd_workqueue;
unsigned long blk_max_low_pfn, blk_max_pfn;
EXPORT_SYMBOL(blk_max_low_pfn);
EXPORT_SYMBOL(blk_max_pfn);
+static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+
/* Amount of time in which a process may batch requests */
#define BLK_BATCH_TIME (HZ/50UL)
@@ -206,6 +210,13 @@
EXPORT_SYMBOL(blk_queue_merge_bvec);
+void blk_queue_softirq_done(request_queue_t *q, softirq_done_fn *fn)
+{
+ q->softirq_done_fn = fn;
+}
+
+EXPORT_SYMBOL(blk_queue_softirq_done);
+
/**
* blk_queue_make_request - define an alternate make_request function for a device
* @q: the request queue for the device to be affected
@@ -269,6 +280,7 @@
static inline void rq_init(request_queue_t *q, struct request *rq)
{
INIT_LIST_HEAD(&rq->queuelist);
+ INIT_LIST_HEAD(&rq->donelist);
rq->errors = 0;
rq->rq_status = RQ_ACTIVE;
@@ -285,6 +297,7 @@
rq->sense = NULL;
rq->end_io = NULL;
rq->end_io_data = NULL;
+ rq->completion_data = NULL;
}
/**
@@ -3262,6 +3275,87 @@
EXPORT_SYMBOL(end_that_request_chunk);
/*
+ * splice the completion data to a local structure and hand off to
+ * process_completion_queue() to complete the requests
+ */
+static void blk_done_softirq(struct softirq_action *h)
+{
+ struct list_head *cpu_list;
+ LIST_HEAD(local_list);
+
+ local_irq_disable();
+ cpu_list = &__get_cpu_var(blk_cpu_done);
+ list_splice_init(cpu_list, &local_list);
+ local_irq_enable();
+
+ while (!list_empty(&local_list)) {
+ struct request *rq = list_entry(local_list.next, struct request, donelist);
+
+ list_del_init(&rq->donelist);
+ rq->q->softirq_done_fn(rq);
+ }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
+{
+ /*
+ * If a CPU goes away, splice its entries to the current CPU
+ * and trigger a run of the softirq
+ */
+ if (action == CPU_DEAD) {
+ int cpu = (unsigned long) hcpu;
+
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_done, cpu),
+ &__get_cpu_var(blk_cpu_done));
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+ local_irq_enable();
+ }
+
+ return NOTIFY_OK;
+}
+
+
+static struct notifier_block __devinitdata blk_cpu_notifier = {
+ .notifier_call = blk_cpu_notify,
+};
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/**
+ * blk_complete_request - end I/O on a request
+ * @req: the request being processed
+ *
+ * Description:
+ * Ends all I/O on a request. It does not handle partial completions,
+ * unless the driver actually implements this in its completionc callback
+ * through requeueing. Theh actual completion happens out-of-order,
+ * through a softirq handler. The user must have registered a completion
+ * callback through blk_queue_softirq_done().
+ **/
+
+void blk_complete_request(struct request *req)
+{
+ struct list_head *cpu_list;
+ unsigned long flags;
+
+ BUG_ON(!req->q->softirq_done_fn);
+
+ local_irq_save(flags);
+
+ cpu_list = &__get_cpu_var(blk_cpu_done);
+ list_add_tail(&req->donelist, cpu_list);
+ raise_softirq_irqoff(BLOCK_SOFTIRQ);
+
+ local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL(blk_complete_request);
+
+/*
* queue lock must be held
*/
void end_that_request_last(struct request *req, int uptodate)
@@ -3339,6 +3433,8 @@
int __init blk_dev_init(void)
{
+ int i;
+
kblockd_workqueue = create_workqueue("kblockd");
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
@@ -3352,6 +3448,14 @@
iocontext_cachep = kmem_cache_create("blkdev_ioc",
sizeof(struct io_context), 0, SLAB_PANIC, NULL, NULL);
+ for (i = 0; i < NR_CPUS; i++)
+ INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
+
+ open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL);
+#ifdef CONFIG_HOTPLUG_CPU
+ register_cpu_notifier(&blk_cpu_notifier);
+#endif
+
blk_max_low_pfn = max_low_pfn;
blk_max_pfn = max_pfn;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 88452c7..e4e9f25 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2178,16 +2178,48 @@
start_io(h);
}
+
+static void cciss_softirq_done(struct request *rq)
+{
+ CommandList_struct *cmd = rq->completion_data;
+ ctlr_info_t *h = hba[cmd->ctlr];
+ u64bit temp64;
+ int i, ddir;
+
+ if (cmd->Request.Type.Direction == XFER_READ)
+ ddir = PCI_DMA_FROMDEVICE;
+ else
+ ddir = PCI_DMA_TODEVICE;
+
+ /* command did not need to be retried */
+ /* unmap the DMA mapping for all the scatter gather elements */
+ for(i=0; i<cmd->Header.SGList; i++) {
+ temp64.val32.lower = cmd->SG[i].Addr.lower;
+ temp64.val32.upper = cmd->SG[i].Addr.upper;
+ pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
+ }
+
+ complete_buffers(rq->bio, rq->errors);
+
+#ifdef CCISS_DEBUG
+ printk("Done with %p\n", rq);
+#endif /* CCISS_DEBUG */
+
+ spin_lock_irq(&h->lock);
+ end_that_request_last(rq, rq->errors);
+ cmd_free(h, cmd,1);
+ spin_unlock_irq(&h->lock);
+}
+
/* checks the status of the job and calls complete buffers to mark all
- * buffers for the completed job.
+ * buffers for the completed job. Note that this function does not need
+ * to hold the hba/queue lock.
*/
static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd,
int timeout)
{
int status = 1;
- int i;
int retry_cmd = 0;
- u64bit temp64;
if (timeout)
status = 0;
@@ -2295,24 +2327,10 @@
resend_cciss_cmd(h,cmd);
return;
}
- /* command did not need to be retried */
- /* unmap the DMA mapping for all the scatter gather elements */
- for(i=0; i<cmd->Header.SGList; i++) {
- temp64.val32.lower = cmd->SG[i].Addr.lower;
- temp64.val32.upper = cmd->SG[i].Addr.upper;
- pci_unmap_page(hba[cmd->ctlr]->pdev,
- temp64.val, cmd->SG[i].Len,
- (cmd->Request.Type.Direction == XFER_READ) ?
- PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
- }
- complete_buffers(cmd->rq->bio, status);
-#ifdef CCISS_DEBUG
- printk("Done with %p\n", cmd->rq);
-#endif /* CCISS_DEBUG */
-
- end_that_request_last(cmd->rq, status ? 1 : -EIO);
- cmd_free(h,cmd,1);
+ cmd->rq->completion_data = cmd;
+ cmd->rq->errors = status;
+ blk_complete_request(cmd->rq);
}
/*
@@ -3199,15 +3217,17 @@
drv->queue = q;
q->backing_dev_info.ra_pages = READ_AHEAD;
- blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask);
+ blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask);
- /* This is a hardware imposed limit. */
- blk_queue_max_hw_segments(q, MAXSGENTRIES);
+ /* This is a hardware imposed limit. */
+ blk_queue_max_hw_segments(q, MAXSGENTRIES);
- /* This is a limit in the driver and could be eliminated. */
- blk_queue_max_phys_segments(q, MAXSGENTRIES);
+ /* This is a limit in the driver and could be eliminated. */
+ blk_queue_max_phys_segments(q, MAXSGENTRIES);
- blk_queue_max_sectors(q, 512);
+ blk_queue_max_sectors(q, 512);
+
+ blk_queue_softirq_done(q, cciss_softirq_done);
q->queuedata = hba[i];
sprintf(disk->disk_name, "cciss/c%dd%d", i, j);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index b5dc6df..dea2d4d 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -55,9 +55,22 @@
#include <asm/io.h>
#include <asm/bitops.h>
+void ide_softirq_done(struct request *rq)
+{
+ request_queue_t *q = rq->q;
+
+ add_disk_randomness(rq->rq_disk);
+ end_that_request_chunk(rq, rq->errors, rq->data_len);
+
+ spin_lock_irq(q->queue_lock);
+ end_that_request_last(rq, rq->errors);
+ spin_unlock_irq(q->queue_lock);
+}
+
int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate,
int nr_sectors)
{
+ unsigned int nbytes;
int ret = 1;
BUG_ON(!(rq->flags & REQ_STARTED));
@@ -81,17 +94,28 @@
HWGROUP(drive)->hwif->ide_dma_on(drive);
}
- if (!end_that_request_first(rq, uptodate, nr_sectors)) {
- add_disk_randomness(rq->rq_disk);
-
- if (blk_rq_tagged(rq))
- blk_queue_end_tag(drive->queue, rq);
-
+ /*
+ * For partial completions (or non fs/pc requests), use the regular
+ * direct completion path.
+ */
+ nbytes = nr_sectors << 9;
+ if (rq_all_done(rq, nbytes)) {
+ rq->errors = uptodate;
+ rq->data_len = nbytes;
blkdev_dequeue_request(rq);
HWGROUP(drive)->rq = NULL;
- end_that_request_last(rq, uptodate);
+ blk_complete_request(rq);
ret = 0;
+ } else {
+ if (!end_that_request_first(rq, uptodate, nr_sectors)) {
+ add_disk_randomness(rq->rq_disk);
+ blkdev_dequeue_request(rq);
+ HWGROUP(drive)->rq = NULL;
+ end_that_request_last(rq, uptodate);
+ ret = 0;
+ }
}
+
return ret;
}
EXPORT_SYMBOL(__ide_end_request);
@@ -113,6 +137,10 @@
unsigned long flags;
int ret = 1;
+ /*
+ * room for locking improvements here, the calls below don't
+ * need the queue lock held at all
+ */
spin_lock_irqsave(&ide_lock, flags);
rq = HWGROUP(drive)->rq;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 02167a5..1ddaa71 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1011,6 +1011,8 @@
blk_queue_max_hw_segments(q, max_sg_entries);
blk_queue_max_phys_segments(q, max_sg_entries);
+ blk_queue_softirq_done(q, ide_softirq_done);
+
/* assign drive queue */
drive->queue = q;
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 180676d..ee5f4df 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -69,7 +69,6 @@
#include "scsi_logging.h"
static void scsi_done(struct scsi_cmnd *cmd);
-static int scsi_retry_command(struct scsi_cmnd *cmd);
/*
* Definitions and constants.
@@ -752,7 +751,7 @@
* isn't running --- used by scsi_times_out */
void __scsi_done(struct scsi_cmnd *cmd)
{
- unsigned long flags;
+ struct request *rq = cmd->request;
/*
* Set the serial numbers back to zero
@@ -763,71 +762,14 @@
if (cmd->result)
atomic_inc(&cmd->device->ioerr_cnt);
+ BUG_ON(!rq);
+
/*
- * Next, enqueue the command into the done queue.
- * It is a per-CPU queue, so we just disable local interrupts
- * and need no spinlock.
+ * The uptodate/nbytes values don't matter, as we allow partial
+ * completes and thus will check this in the softirq callback
*/
- local_irq_save(flags);
- list_add_tail(&cmd->eh_entry, &__get_cpu_var(scsi_done_q));
- raise_softirq_irqoff(SCSI_SOFTIRQ);
- local_irq_restore(flags);
-}
-
-/**
- * scsi_softirq - Perform post-interrupt processing of finished SCSI commands.
- *
- * This is the consumer of the done queue.
- *
- * This is called with all interrupts enabled. This should reduce
- * interrupt latency, stack depth, and reentrancy of the low-level
- * drivers.
- */
-static void scsi_softirq(struct softirq_action *h)
-{
- int disposition;
- LIST_HEAD(local_q);
-
- local_irq_disable();
- list_splice_init(&__get_cpu_var(scsi_done_q), &local_q);
- local_irq_enable();
-
- while (!list_empty(&local_q)) {
- struct scsi_cmnd *cmd = list_entry(local_q.next,
- struct scsi_cmnd, eh_entry);
- /* The longest time any command should be outstanding is the
- * per command timeout multiplied by the number of retries.
- *
- * For a typical command, this is 2.5 minutes */
- unsigned long wait_for
- = cmd->allowed * cmd->timeout_per_command;
- list_del_init(&cmd->eh_entry);
-
- disposition = scsi_decide_disposition(cmd);
- if (disposition != SUCCESS &&
- time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
- sdev_printk(KERN_ERR, cmd->device,
- "timing out command, waited %lus\n",
- wait_for/HZ);
- disposition = SUCCESS;
- }
-
- scsi_log_completion(cmd, disposition);
- switch (disposition) {
- case SUCCESS:
- scsi_finish_command(cmd);
- break;
- case NEEDS_RETRY:
- scsi_retry_command(cmd);
- break;
- case ADD_TO_MLQUEUE:
- scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
- break;
- default:
- if (!scsi_eh_scmd_add(cmd, 0))
- scsi_finish_command(cmd);
- }
- }
+ rq->completion_data = cmd;
+ blk_complete_request(rq);
}
/*
@@ -840,7 +782,7 @@
* level drivers should not become re-entrant as a result of
* this.
*/
-static int scsi_retry_command(struct scsi_cmnd *cmd)
+int scsi_retry_command(struct scsi_cmnd *cmd)
{
/*
* Restore the SCSI command state.
@@ -1273,38 +1215,6 @@
}
EXPORT_SYMBOL(scsi_device_cancel);
-#ifdef CONFIG_HOTPLUG_CPU
-static int scsi_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- int cpu = (unsigned long)hcpu;
-
- switch(action) {
- case CPU_DEAD:
- /* Drain scsi_done_q. */
- local_irq_disable();
- list_splice_init(&per_cpu(scsi_done_q, cpu),
- &__get_cpu_var(scsi_done_q));
- raise_softirq_irqoff(SCSI_SOFTIRQ);
- local_irq_enable();
- break;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block __devinitdata scsi_cpu_nb = {
- .notifier_call = scsi_cpu_notify,
-};
-
-#define register_scsi_cpu() register_cpu_notifier(&scsi_cpu_nb)
-#define unregister_scsi_cpu() unregister_cpu_notifier(&scsi_cpu_nb)
-#else
-#define register_scsi_cpu()
-#define unregister_scsi_cpu()
-#endif /* CONFIG_HOTPLUG_CPU */
-
MODULE_DESCRIPTION("SCSI core");
MODULE_LICENSE("GPL");
@@ -1338,8 +1248,6 @@
INIT_LIST_HEAD(&per_cpu(scsi_done_q, i));
devfs_mk_dir("scsi");
- open_softirq(SCSI_SOFTIRQ, scsi_softirq, NULL);
- register_scsi_cpu();
printk(KERN_NOTICE "SCSI subsystem initialized\n");
return 0;
@@ -1367,7 +1275,6 @@
devfs_remove("scsi");
scsi_exit_procfs();
scsi_exit_queue();
- unregister_scsi_cpu();
}
subsys_initcall(init_scsi);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ba93d6e..00c9bf3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1493,6 +1493,41 @@
__scsi_done(cmd);
}
+static void scsi_softirq_done(struct request *rq)
+{
+ struct scsi_cmnd *cmd = rq->completion_data;
+ unsigned long wait_for = cmd->allowed * cmd->timeout_per_command;
+ int disposition;
+
+ INIT_LIST_HEAD(&cmd->eh_entry);
+
+ disposition = scsi_decide_disposition(cmd);
+ if (disposition != SUCCESS &&
+ time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
+ sdev_printk(KERN_ERR, cmd->device,
+ "timing out command, waited %lus\n",
+ wait_for/HZ);
+ disposition = SUCCESS;
+ }
+
+ scsi_log_completion(cmd, disposition);
+
+ switch (disposition) {
+ case SUCCESS:
+ scsi_finish_command(cmd);
+ break;
+ case NEEDS_RETRY:
+ scsi_retry_command(cmd);
+ break;
+ case ADD_TO_MLQUEUE:
+ scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
+ break;
+ default:
+ if (!scsi_eh_scmd_add(cmd, 0))
+ scsi_finish_command(cmd);
+ }
+}
+
/*
* Function: scsi_request_fn()
*
@@ -1667,6 +1702,7 @@
blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
blk_queue_segment_boundary(q, shost->dma_boundary);
blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
+ blk_queue_softirq_done(q, scsi_softirq_done);
if (!shost->use_clustering)
clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index f04e7e1..14a6198 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -44,6 +44,7 @@
struct scsi_request *sreq);
extern void __scsi_release_request(struct scsi_request *sreq);
extern void __scsi_done(struct scsi_cmnd *cmd);
+extern int scsi_retry_command(struct scsi_cmnd *cmd);
#ifdef CONFIG_SCSI_LOGGING
void scsi_log_send(struct scsi_cmnd *cmd);
void scsi_log_completion(struct scsi_cmnd *cmd, int disposition);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 96b2339..02a585f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -118,9 +118,9 @@
* try to put the fields that are referenced together in the same cacheline
*/
struct request {
- struct list_head queuelist; /* looking for ->queue? you must _not_
- * access it directly, use
- * blkdev_dequeue_request! */
+ struct list_head queuelist;
+ struct list_head donelist;
+
unsigned long flags; /* see REQ_ bits below */
/* Maintain bio traversal state for part by part I/O submission.
@@ -141,6 +141,7 @@
struct bio *biotail;
void *elevator_private;
+ void *completion_data;
unsigned short ioprio;
@@ -291,6 +292,7 @@
typedef void (activity_fn) (void *data, int rw);
typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
typedef void (prepare_flush_fn) (request_queue_t *, struct request *);
+typedef void (softirq_done_fn)(struct request *);
enum blk_queue_state {
Queue_down,
@@ -332,6 +334,7 @@
activity_fn *activity_fn;
issue_flush_fn *issue_flush_fn;
prepare_flush_fn *prepare_flush_fn;
+ softirq_done_fn *softirq_done_fn;
/*
* Dispatch queue sorting
@@ -645,6 +648,17 @@
extern int end_that_request_chunk(struct request *, int, int);
extern void end_that_request_last(struct request *, int);
extern void end_request(struct request *req, int uptodate);
+extern void blk_complete_request(struct request *);
+
+static inline int rq_all_done(struct request *rq, unsigned int nr_bytes)
+{
+ if (blk_fs_request(rq))
+ return (nr_bytes >= (rq->hard_nr_sectors << 9));
+ else if (blk_pc_request(rq))
+ return nr_bytes >= rq->data_len;
+
+ return 0;
+}
/*
* end_that_request_first/chunk() takes an uptodate argument. we account
@@ -693,6 +707,7 @@
extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn);
extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
extern void blk_queue_dma_alignment(request_queue_t *, int);
+extern void blk_queue_softirq_done(request_queue_t *, softirq_done_fn *);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *);
extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 4dd6694..ef8d0cb 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1001,6 +1001,7 @@
extern int ide_end_request (ide_drive_t *drive, int uptodate, int nrsecs);
extern int __ide_end_request (ide_drive_t *drive, struct request *rq, int uptodate, int nrsecs);
+extern void ide_softirq_done(struct request *rq);
/*
* This is used on exit from the driver to designate the next irq handler
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index e50a95f..2c08fdc 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -112,7 +112,7 @@
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
- SCSI_SOFTIRQ,
+ BLOCK_SOFTIRQ,
TASKLET_SOFTIRQ
};