xen: support suspend/resume in pvscsi frontend
Up to now the pvscsi frontend hasn't supported domain suspend and
resume. When a domain with an assigned pvscsi device was suspended
and resumed again, it was not able to use the device any more: trying
to do so resulted in hanging processes.
Support suspend and resume of pvscsi devices.
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 34199d2..78d9506 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -63,6 +63,7 @@
#define VSCSIFRONT_OP_ADD_LUN 1
#define VSCSIFRONT_OP_DEL_LUN 2
+#define VSCSIFRONT_OP_READD_LUN 3
/* Tuning point. */
#define VSCSIIF_DEFAULT_CMD_PER_LUN 10
@@ -113,8 +114,13 @@
DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS);
struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS];
+ /* Following items are protected by the host lock. */
wait_queue_head_t wq_sync;
+ wait_queue_head_t wq_pause;
unsigned int wait_ring_available:1;
+ unsigned int waiting_pause:1;
+ unsigned int pause:1;
+ unsigned callers;
char dev_state_path[64];
struct task_struct *curr;
@@ -274,31 +280,31 @@
wake_up(&shadow->wq_reset);
}
-static int scsifront_cmd_done(struct vscsifrnt_info *info)
+static void scsifront_do_response(struct vscsifrnt_info *info,
+ struct vscsiif_response *ring_rsp)
+{
+ if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
+ test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
+ "illegal rqid %u returned by backend!\n", ring_rsp->rqid))
+ return;
+
+ if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
+ scsifront_cdb_cmd_done(info, ring_rsp);
+ else
+ scsifront_sync_cmd_done(info, ring_rsp);
+}
+
+static int scsifront_ring_drain(struct vscsifrnt_info *info)
{
struct vscsiif_response *ring_rsp;
RING_IDX i, rp;
int more_to_do = 0;
- unsigned long flags;
-
- spin_lock_irqsave(info->host->host_lock, flags);
rp = info->ring.sring->rsp_prod;
rmb(); /* ordering required respective to dom0 */
for (i = info->ring.rsp_cons; i != rp; i++) {
-
ring_rsp = RING_GET_RESPONSE(&info->ring, i);
-
- if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
- test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
- "illegal rqid %u returned by backend!\n",
- ring_rsp->rqid))
- continue;
-
- if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
- scsifront_cdb_cmd_done(info, ring_rsp);
- else
- scsifront_sync_cmd_done(info, ring_rsp);
+ scsifront_do_response(info, ring_rsp);
}
info->ring.rsp_cons = i;
@@ -308,6 +314,18 @@
else
info->ring.sring->rsp_event = i + 1;
+ return more_to_do;
+}
+
+static int scsifront_cmd_done(struct vscsifrnt_info *info)
+{
+ int more_to_do;
+ unsigned long flags;
+
+ spin_lock_irqsave(info->host->host_lock, flags);
+
+ more_to_do = scsifront_ring_drain(info);
+
info->wait_ring_available = 0;
spin_unlock_irqrestore(info->host->host_lock, flags);
@@ -328,6 +346,24 @@
return IRQ_HANDLED;
}
+static void scsifront_finish_all(struct vscsifrnt_info *info)
+{
+ unsigned i;
+ struct vscsiif_response resp;
+
+ scsifront_ring_drain(info);
+
+ for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
+ if (test_bit(i, info->shadow_free_bitmap))
+ continue;
+ resp.rqid = i;
+ resp.sense_len = 0;
+ resp.rslt = DID_RESET << 16;
+ resp.residual_len = 0;
+ scsifront_do_response(info, &resp);
+ }
+}
+
static int map_data_for_request(struct vscsifrnt_info *info,
struct scsi_cmnd *sc,
struct vscsiif_request *ring_req,
@@ -475,6 +511,27 @@
return ring_req;
}
+static int scsifront_enter(struct vscsifrnt_info *info)
+{
+ if (info->pause)
+ return 1;
+ info->callers++;
+ return 0;
+}
+
+static void scsifront_return(struct vscsifrnt_info *info)
+{
+ info->callers--;
+ if (info->callers)
+ return;
+
+ if (!info->waiting_pause)
+ return;
+
+ info->waiting_pause = 0;
+ wake_up(&info->wq_pause);
+}
+
static int scsifront_queuecommand(struct Scsi_Host *shost,
struct scsi_cmnd *sc)
{
@@ -486,6 +543,10 @@
uint16_t rqid;
spin_lock_irqsave(shost->host_lock, flags);
+ if (scsifront_enter(info)) {
+ spin_unlock_irqrestore(shost->host_lock, flags);
+ return SCSI_MLQUEUE_HOST_BUSY;
+ }
if (RING_FULL(&info->ring))
goto busy;
@@ -505,6 +566,7 @@
if (err < 0) {
pr_debug("%s: err %d\n", __func__, err);
scsifront_put_rqid(info, rqid);
+ scsifront_return(info);
spin_unlock_irqrestore(shost->host_lock, flags);
if (err == -ENOMEM)
return SCSI_MLQUEUE_HOST_BUSY;
@@ -514,11 +576,13 @@
}
scsifront_do_request(info);
+ scsifront_return(info);
spin_unlock_irqrestore(shost->host_lock, flags);
return 0;
busy:
+ scsifront_return(info);
spin_unlock_irqrestore(shost->host_lock, flags);
pr_debug("%s: busy\n", __func__);
return SCSI_MLQUEUE_HOST_BUSY;
@@ -549,7 +613,7 @@
if (ring_req)
break;
}
- if (err) {
+ if (err || info->pause) {
spin_unlock_irq(host->host_lock);
kfree(shadow);
return FAILED;
@@ -561,6 +625,11 @@
spin_lock_irq(host->host_lock);
}
+ if (scsifront_enter(info)) {
+ spin_unlock_irq(host->host_lock);
+ return FAILED;
+ }
+
ring_req->act = act;
ring_req->ref_rqid = s->rqid;
@@ -587,6 +656,7 @@
err = FAILED;
}
+ scsifront_return(info);
spin_unlock_irq(host->host_lock);
return err;
}
@@ -698,6 +768,13 @@
return err;
}
+static void scsifront_free_ring(struct vscsifrnt_info *info)
+{
+ unbind_from_irqhandler(info->irq, info);
+ gnttab_end_foreign_access(info->ring_ref, 0,
+ (unsigned long)info->ring.sring);
+}
+
static int scsifront_init_ring(struct vscsifrnt_info *info)
{
struct xenbus_device *dev = info->dev;
@@ -744,9 +821,7 @@
fail:
xenbus_transaction_end(xbt, 1);
free_sring:
- unbind_from_irqhandler(info->irq, info);
- gnttab_end_foreign_access(info->ring_ref, 0,
- (unsigned long)info->ring.sring);
+ scsifront_free_ring(info);
return err;
}
@@ -779,6 +854,7 @@
}
init_waitqueue_head(&info->wq_sync);
+ init_waitqueue_head(&info->wq_pause);
spin_lock_init(&info->shadow_lock);
snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no);
@@ -802,13 +878,60 @@
return 0;
free_sring:
- unbind_from_irqhandler(info->irq, info);
- gnttab_end_foreign_access(info->ring_ref, 0,
- (unsigned long)info->ring.sring);
+ scsifront_free_ring(info);
scsi_host_put(host);
return err;
}
+static int scsifront_resume(struct xenbus_device *dev)
+{
+ struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+ struct Scsi_Host *host = info->host;
+ int err;
+
+ spin_lock_irq(host->host_lock);
+
+ /* Finish all still pending commands. */
+ scsifront_finish_all(info);
+
+ spin_unlock_irq(host->host_lock);
+
+ /* Reconnect to dom0. */
+ scsifront_free_ring(info);
+ err = scsifront_init_ring(info);
+ if (err) {
+ dev_err(&dev->dev, "fail to resume %d\n", err);
+ scsi_host_put(host);
+ return err;
+ }
+
+ xenbus_switch_state(dev, XenbusStateInitialised);
+
+ return 0;
+}
+
+static int scsifront_suspend(struct xenbus_device *dev)
+{
+ struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+ struct Scsi_Host *host = info->host;
+ int err = 0;
+
+ /* No new commands for the backend. */
+ spin_lock_irq(host->host_lock);
+ info->pause = 1;
+ while (info->callers && !err) {
+ info->waiting_pause = 1;
+ info->wait_ring_available = 0;
+ spin_unlock_irq(host->host_lock);
+ wake_up(&info->wq_sync);
+ err = wait_event_interruptible(info->wq_pause,
+ !info->waiting_pause);
+ spin_lock_irq(host->host_lock);
+ }
+ spin_unlock_irq(host->host_lock);
+ return err;
+}
+
static int scsifront_remove(struct xenbus_device *dev)
{
struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
@@ -823,10 +946,7 @@
}
mutex_unlock(&scsifront_mutex);
- gnttab_end_foreign_access(info->ring_ref, 0,
- (unsigned long)info->ring.sring);
- unbind_from_irqhandler(info->irq, info);
-
+ scsifront_free_ring(info);
scsi_host_put(info->host);
return 0;
@@ -919,6 +1039,12 @@
scsi_device_put(sdev);
}
break;
+ case VSCSIFRONT_OP_READD_LUN:
+ if (device_state == XenbusStateConnected)
+ xenbus_printf(XBT_NIL, dev->nodename,
+ info->dev_state_path,
+ "%d", XenbusStateConnected);
+ break;
default:
break;
}
@@ -932,21 +1058,29 @@
static void scsifront_read_backend_params(struct xenbus_device *dev,
struct vscsifrnt_info *info)
{
- unsigned int sg_grant;
+ unsigned int sg_grant, nr_segs;
int ret;
struct Scsi_Host *host = info->host;
ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u",
&sg_grant);
- if (ret == 1 && sg_grant) {
- sg_grant = min_t(unsigned int, sg_grant, SG_ALL);
- sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE);
- host->sg_tablesize = min_t(unsigned int, sg_grant,
+ if (ret != 1)
+ sg_grant = 0;
+ nr_segs = min_t(unsigned int, sg_grant, SG_ALL);
+ nr_segs = max_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE);
+ nr_segs = min_t(unsigned int, nr_segs,
VSCSIIF_SG_TABLESIZE * PAGE_SIZE /
sizeof(struct scsiif_request_segment));
- host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
- }
- dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize);
+
+ if (!info->pause && sg_grant)
+ dev_info(&dev->dev, "using up to %d SG entries\n", nr_segs);
+ else if (info->pause && nr_segs < host->sg_tablesize)
+ dev_warn(&dev->dev,
+ "SG entries decreased from %d to %u - device may not work properly anymore\n",
+ host->sg_tablesize, nr_segs);
+
+ host->sg_tablesize = nr_segs;
+ host->max_sectors = (nr_segs - 1) * PAGE_SIZE / 512;
}
static void scsifront_backend_changed(struct xenbus_device *dev,
@@ -965,6 +1099,14 @@
case XenbusStateConnected:
scsifront_read_backend_params(dev, info);
+
+ if (info->pause) {
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_READD_LUN);
+ xenbus_switch_state(dev, XenbusStateConnected);
+ info->pause = 0;
+ return;
+ }
+
if (xenbus_read_driver_state(dev->nodename) ==
XenbusStateInitialised)
scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
@@ -1002,6 +1144,8 @@
.ids = scsifront_ids,
.probe = scsifront_probe,
.remove = scsifront_remove,
+ .resume = scsifront_resume,
+ .suspend = scsifront_suspend,
.otherend_changed = scsifront_backend_changed,
};