xen: support suspend/resume in pvscsi frontend

Up to now the pvscsi frontend hasn't supported domain suspend and
resume. When a domain with an assigned pvscsi device was suspended
and resumed again, it was not able to use the device any more: trying
to do so resulted in hanging processes.

Support suspend and resume of pvscsi devices.

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 34199d2..78d9506 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -63,6 +63,7 @@
 
 #define VSCSIFRONT_OP_ADD_LUN	1
 #define VSCSIFRONT_OP_DEL_LUN	2
+#define VSCSIFRONT_OP_READD_LUN	3
 
 /* Tuning point. */
 #define VSCSIIF_DEFAULT_CMD_PER_LUN 10
@@ -113,8 +114,13 @@
 	DECLARE_BITMAP(shadow_free_bitmap, VSCSIIF_MAX_REQS);
 	struct vscsifrnt_shadow *shadow[VSCSIIF_MAX_REQS];
 
+	/* Following items are protected by the host lock. */
 	wait_queue_head_t wq_sync;
+	wait_queue_head_t wq_pause;
 	unsigned int wait_ring_available:1;
+	unsigned int waiting_pause:1;
+	unsigned int pause:1;
+	unsigned callers;
 
 	char dev_state_path[64];
 	struct task_struct *curr;
@@ -274,31 +280,31 @@
 	wake_up(&shadow->wq_reset);
 }
 
-static int scsifront_cmd_done(struct vscsifrnt_info *info)
+static void scsifront_do_response(struct vscsifrnt_info *info,
+				  struct vscsiif_response *ring_rsp)
+{
+	if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
+		 test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
+		 "illegal rqid %u returned by backend!\n", ring_rsp->rqid))
+		return;
+
+	if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
+		scsifront_cdb_cmd_done(info, ring_rsp);
+	else
+		scsifront_sync_cmd_done(info, ring_rsp);
+}
+
+static int scsifront_ring_drain(struct vscsifrnt_info *info)
 {
 	struct vscsiif_response *ring_rsp;
 	RING_IDX i, rp;
 	int more_to_do = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(info->host->host_lock, flags);
 
 	rp = info->ring.sring->rsp_prod;
 	rmb();	/* ordering required respective to dom0 */
 	for (i = info->ring.rsp_cons; i != rp; i++) {
-
 		ring_rsp = RING_GET_RESPONSE(&info->ring, i);
-
-		if (WARN(ring_rsp->rqid >= VSCSIIF_MAX_REQS ||
-			 test_bit(ring_rsp->rqid, info->shadow_free_bitmap),
-			 "illegal rqid %u returned by backend!\n",
-			 ring_rsp->rqid))
-			continue;
-
-		if (info->shadow[ring_rsp->rqid]->act == VSCSIIF_ACT_SCSI_CDB)
-			scsifront_cdb_cmd_done(info, ring_rsp);
-		else
-			scsifront_sync_cmd_done(info, ring_rsp);
+		scsifront_do_response(info, ring_rsp);
 	}
 
 	info->ring.rsp_cons = i;
@@ -308,6 +314,18 @@
 	else
 		info->ring.sring->rsp_event = i + 1;
 
+	return more_to_do;
+}
+
+static int scsifront_cmd_done(struct vscsifrnt_info *info)
+{
+	int more_to_do;
+	unsigned long flags;
+
+	spin_lock_irqsave(info->host->host_lock, flags);
+
+	more_to_do = scsifront_ring_drain(info);
+
 	info->wait_ring_available = 0;
 
 	spin_unlock_irqrestore(info->host->host_lock, flags);
@@ -328,6 +346,24 @@
 	return IRQ_HANDLED;
 }
 
+static void scsifront_finish_all(struct vscsifrnt_info *info)
+{
+	unsigned i;
+	struct vscsiif_response resp;
+
+	scsifront_ring_drain(info);
+
+	for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
+		if (test_bit(i, info->shadow_free_bitmap))
+			continue;
+		resp.rqid = i;
+		resp.sense_len = 0;
+		resp.rslt = DID_RESET << 16;
+		resp.residual_len = 0;
+		scsifront_do_response(info, &resp);
+	}
+}
+
 static int map_data_for_request(struct vscsifrnt_info *info,
 				struct scsi_cmnd *sc,
 				struct vscsiif_request *ring_req,
@@ -475,6 +511,27 @@
 	return ring_req;
 }
 
+static int scsifront_enter(struct vscsifrnt_info *info)
+{
+	if (info->pause)
+		return 1;
+	info->callers++;
+	return 0;
+}
+
+static void scsifront_return(struct vscsifrnt_info *info)
+{
+	info->callers--;
+	if (info->callers)
+		return;
+
+	if (!info->waiting_pause)
+		return;
+
+	info->waiting_pause = 0;
+	wake_up(&info->wq_pause);
+}
+
 static int scsifront_queuecommand(struct Scsi_Host *shost,
 				  struct scsi_cmnd *sc)
 {
@@ -486,6 +543,10 @@
 	uint16_t rqid;
 
 	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsifront_enter(info)) {
+		spin_unlock_irqrestore(shost->host_lock, flags);
+		return SCSI_MLQUEUE_HOST_BUSY;
+	}
 	if (RING_FULL(&info->ring))
 		goto busy;
 
@@ -505,6 +566,7 @@
 	if (err < 0) {
 		pr_debug("%s: err %d\n", __func__, err);
 		scsifront_put_rqid(info, rqid);
+		scsifront_return(info);
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		if (err == -ENOMEM)
 			return SCSI_MLQUEUE_HOST_BUSY;
@@ -514,11 +576,13 @@
 	}
 
 	scsifront_do_request(info);
+	scsifront_return(info);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	return 0;
 
 busy:
+	scsifront_return(info);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	pr_debug("%s: busy\n", __func__);
 	return SCSI_MLQUEUE_HOST_BUSY;
@@ -549,7 +613,7 @@
 			if (ring_req)
 				break;
 		}
-		if (err) {
+		if (err || info->pause) {
 			spin_unlock_irq(host->host_lock);
 			kfree(shadow);
 			return FAILED;
@@ -561,6 +625,11 @@
 		spin_lock_irq(host->host_lock);
 	}
 
+	if (scsifront_enter(info)) {
+		spin_unlock_irq(host->host_lock);
+		return FAILED;
+	}
+
 	ring_req->act = act;
 	ring_req->ref_rqid = s->rqid;
 
@@ -587,6 +656,7 @@
 		err = FAILED;
 	}
 
+	scsifront_return(info);
 	spin_unlock_irq(host->host_lock);
 	return err;
 }
@@ -698,6 +768,13 @@
 	return err;
 }
 
+static void scsifront_free_ring(struct vscsifrnt_info *info)
+{
+	unbind_from_irqhandler(info->irq, info);
+	gnttab_end_foreign_access(info->ring_ref, 0,
+				  (unsigned long)info->ring.sring);
+}
+
 static int scsifront_init_ring(struct vscsifrnt_info *info)
 {
 	struct xenbus_device *dev = info->dev;
@@ -744,9 +821,7 @@
 fail:
 	xenbus_transaction_end(xbt, 1);
 free_sring:
-	unbind_from_irqhandler(info->irq, info);
-	gnttab_end_foreign_access(info->ring_ref, 0,
-				  (unsigned long)info->ring.sring);
+	scsifront_free_ring(info);
 
 	return err;
 }
@@ -779,6 +854,7 @@
 	}
 
 	init_waitqueue_head(&info->wq_sync);
+	init_waitqueue_head(&info->wq_pause);
 	spin_lock_init(&info->shadow_lock);
 
 	snprintf(name, TASK_COMM_LEN, "vscsiif.%d", host->host_no);
@@ -802,13 +878,60 @@
 	return 0;
 
 free_sring:
-	unbind_from_irqhandler(info->irq, info);
-	gnttab_end_foreign_access(info->ring_ref, 0,
-				  (unsigned long)info->ring.sring);
+	scsifront_free_ring(info);
 	scsi_host_put(host);
 	return err;
 }
 
+static int scsifront_resume(struct xenbus_device *dev)
+{
+	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+	struct Scsi_Host *host = info->host;
+	int err;
+
+	spin_lock_irq(host->host_lock);
+
+	/* Finish all still pending commands. */
+	scsifront_finish_all(info);
+
+	spin_unlock_irq(host->host_lock);
+
+	/* Reconnect to dom0. */
+	scsifront_free_ring(info);
+	err = scsifront_init_ring(info);
+	if (err) {
+		dev_err(&dev->dev, "fail to resume %d\n", err);
+		scsi_host_put(host);
+		return err;
+	}
+
+	xenbus_switch_state(dev, XenbusStateInitialised);
+
+	return 0;
+}
+
+static int scsifront_suspend(struct xenbus_device *dev)
+{
+	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
+	struct Scsi_Host *host = info->host;
+	int err = 0;
+
+	/* No new commands for the backend. */
+	spin_lock_irq(host->host_lock);
+	info->pause = 1;
+	while (info->callers && !err) {
+		info->waiting_pause = 1;
+		info->wait_ring_available = 0;
+		spin_unlock_irq(host->host_lock);
+		wake_up(&info->wq_sync);
+		err = wait_event_interruptible(info->wq_pause,
+					       !info->waiting_pause);
+		spin_lock_irq(host->host_lock);
+	}
+	spin_unlock_irq(host->host_lock);
+	return err;
+}
+
 static int scsifront_remove(struct xenbus_device *dev)
 {
 	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
@@ -823,10 +946,7 @@
 	}
 	mutex_unlock(&scsifront_mutex);
 
-	gnttab_end_foreign_access(info->ring_ref, 0,
-				  (unsigned long)info->ring.sring);
-	unbind_from_irqhandler(info->irq, info);
-
+	scsifront_free_ring(info);
 	scsi_host_put(info->host);
 
 	return 0;
@@ -919,6 +1039,12 @@
 				scsi_device_put(sdev);
 			}
 			break;
+		case VSCSIFRONT_OP_READD_LUN:
+			if (device_state == XenbusStateConnected)
+				xenbus_printf(XBT_NIL, dev->nodename,
+					      info->dev_state_path,
+					      "%d", XenbusStateConnected);
+			break;
 		default:
 			break;
 		}
@@ -932,21 +1058,29 @@
 static void scsifront_read_backend_params(struct xenbus_device *dev,
 					  struct vscsifrnt_info *info)
 {
-	unsigned int sg_grant;
+	unsigned int sg_grant, nr_segs;
 	int ret;
 	struct Scsi_Host *host = info->host;
 
 	ret = xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg-grant", "%u",
 			   &sg_grant);
-	if (ret == 1 && sg_grant) {
-		sg_grant = min_t(unsigned int, sg_grant, SG_ALL);
-		sg_grant = max_t(unsigned int, sg_grant, VSCSIIF_SG_TABLESIZE);
-		host->sg_tablesize = min_t(unsigned int, sg_grant,
+	if (ret != 1)
+		sg_grant = 0;
+	nr_segs = min_t(unsigned int, sg_grant, SG_ALL);
+	nr_segs = max_t(unsigned int, nr_segs, VSCSIIF_SG_TABLESIZE);
+	nr_segs = min_t(unsigned int, nr_segs,
 			VSCSIIF_SG_TABLESIZE * PAGE_SIZE /
 			sizeof(struct scsiif_request_segment));
-		host->max_sectors = (host->sg_tablesize - 1) * PAGE_SIZE / 512;
-	}
-	dev_info(&dev->dev, "using up to %d SG entries\n", host->sg_tablesize);
+
+	if (!info->pause && sg_grant)
+		dev_info(&dev->dev, "using up to %d SG entries\n", nr_segs);
+	else if (info->pause && nr_segs < host->sg_tablesize)
+		dev_warn(&dev->dev,
+			 "SG entries decreased from %d to %u - device may not work properly anymore\n",
+			 host->sg_tablesize, nr_segs);
+
+	host->sg_tablesize = nr_segs;
+	host->max_sectors = (nr_segs - 1) * PAGE_SIZE / 512;
 }
 
 static void scsifront_backend_changed(struct xenbus_device *dev,
@@ -965,6 +1099,14 @@
 
 	case XenbusStateConnected:
 		scsifront_read_backend_params(dev, info);
+
+		if (info->pause) {
+			scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_READD_LUN);
+			xenbus_switch_state(dev, XenbusStateConnected);
+			info->pause = 0;
+			return;
+		}
+
 		if (xenbus_read_driver_state(dev->nodename) ==
 		    XenbusStateInitialised)
 			scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
@@ -1002,6 +1144,8 @@
 	.ids			= scsifront_ids,
 	.probe			= scsifront_probe,
 	.remove			= scsifront_remove,
+	.resume			= scsifront_resume,
+	.suspend		= scsifront_suspend,
 	.otherend_changed	= scsifront_backend_changed,
 };