virtio: reset function

A reset function solves three problems:

1) It allows us to renegotiate features, eg. if we want to upgrade a
   guest driver without rebooting the guest.

2) It gives us a clean way of shutting down virtqueues: after a reset,
   we know that the buffers won't be used by the host, and

3) It helps the guest recover from messed-up drivers.

So we remove the ->shutdown hook, and the only way we now remove
feature bits is via reset.

We leave it to the driver to do the reset before it deletes queues:
the balloon driver, for example, needs to chat to the host in its
remove function.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 8ff2d8b..0f23d67 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -193,6 +193,13 @@
 #define le32_to_cpu(v32) (v32)
 #define le64_to_cpu(v64) (v64)
 
+/* The device virtqueue descriptors are followed by feature bitmasks. */
+static u8 *get_feature_bits(struct device *dev)
+{
+	return (u8 *)(dev->desc + 1)
+		+ dev->desc->num_vq * sizeof(struct lguest_vqconfig);
+}
+
 /*L:100 The Launcher code itself takes us out into userspace, that scary place
  * where pointers run wild and free!  Unfortunately, like most userspace
  * programs, it's quite boring (which is why everyone likes to hack on the
@@ -914,21 +921,58 @@
 	write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
 }
 
+/* Resetting a device is fairly easy. */
+static void reset_device(struct device *dev)
+{
+	struct virtqueue *vq;
+
+	verbose("Resetting device %s\n", dev->name);
+	/* Clear the status. */
+	dev->desc->status = 0;
+
+	/* Clear any features they've acked. */
+	memset(get_feature_bits(dev) + dev->desc->feature_len, 0,
+	       dev->desc->feature_len);
+
+	/* Zero out the virtqueues. */
+	for (vq = dev->vq; vq; vq = vq->next) {
+		memset(vq->vring.desc, 0,
+		       vring_size(vq->config.num, getpagesize()));
+		vq->last_avail_idx = 0;
+	}
+}
+
 /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */
 static void handle_output(int fd, unsigned long addr)
 {
 	struct device *i;
 	struct virtqueue *vq;
 
-	/* Check each virtqueue. */
+	/* Check each device and virtqueue. */
 	for (i = devices.dev; i; i = i->next) {
+		/* Notifications to device descriptors reset the device. */
+		if (from_guest_phys(addr) == i->desc) {
+			reset_device(i);
+			return;
+		}
+
+		/* Notifications to virtqueues mean output has occurred. */
 		for (vq = i->vq; vq; vq = vq->next) {
-			if (vq->config.pfn == addr/getpagesize()) {
-				verbose("Output to %s\n", vq->dev->name);
-				if (vq->handle_output)
-					vq->handle_output(fd, vq);
+			if (vq->config.pfn != addr/getpagesize())
+				continue;
+
+			/* Guest should acknowledge (and set features!)  before
+			 * using the device. */
+			if (i->desc->status == 0) {
+				warnx("%s gave early output", i->name);
 				return;
 			}
+
+			if (strcmp(vq->dev->name, "console") != 0)
+				verbose("Output to %s\n", vq->dev->name);
+			if (vq->handle_output)
+				vq->handle_output(fd, vq);
+			return;
 		}
 	}
 
@@ -1074,10 +1118,11 @@
 		vq->vring.used->flags = VRING_USED_F_NO_NOTIFY;
 }
 
-/* The virtqueue descriptors are followed by feature bytes. */
+/* The first half of the feature bitmask is for us to advertise features.  The
+ * second half if for the Guest to accept features. */
 static void add_feature(struct device *dev, unsigned bit)
 {
-	u8 *features;
+	u8 *features = get_feature_bits(dev);
 
 	/* We can't extend the feature bits once we've added config bytes */
 	if (dev->desc->feature_len <= bit / CHAR_BIT) {
@@ -1085,9 +1130,6 @@
 		dev->desc->feature_len = (bit / CHAR_BIT) + 1;
 	}
 
-	features = (u8 *)(dev->desc + 1)
-		+ dev->desc->num_vq * sizeof(struct lguest_vqconfig);
-
 	features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
 }
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 54a8017..6143337 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -264,12 +264,16 @@
 	struct virtio_blk *vblk = vdev->priv;
 	int major = vblk->disk->major;
 
+	/* Nothing should be pending. */
 	BUG_ON(!list_empty(&vblk->reqs));
+
+	/* Stop all the virtqueues. */
+	vdev->config->reset(vdev);
+
 	blk_cleanup_queue(vblk->disk->queue);
 	put_disk(vblk->disk);
 	unregister_blkdev(major, "virtblk");
 	mempool_destroy(vblk->pool);
-	/* There should be nothing in the queue now, so no need to shutdown */
 	vdev->config->del_vq(vblk->vq);
 	kfree(vblk);
 }
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index ced5b44..84f85e2 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -54,7 +54,7 @@
  *
  * The configuration information for a device consists of one or more
  * virtqueues, a feature bitmaks, and some configuration bytes.  The
- * configuration bytes don't really matter to us: the Launcher set them up, and
+ * configuration bytes don't really matter to us: the Launcher sets them up, and
  * the driver will look at them during setup.
  *
  * A convenient routine to return the device's virtqueue config array:
@@ -139,9 +139,20 @@
 
 static void lg_set_status(struct virtio_device *vdev, u8 status)
 {
+	BUG_ON(!status);
 	to_lgdev(vdev)->desc->status = status;
 }
 
+/* To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor
+ * address of the device.  The Host will zero the status and all the
+ * features. */
+static void lg_reset(struct virtio_device *vdev)
+{
+	unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
+
+	hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0);
+}
+
 /*
  * Virtqueues
  *
@@ -279,6 +290,7 @@
 	.set = lg_set,
 	.get_status = lg_get_status,
 	.set_status = lg_set_status,
+	.reset = lg_reset,
 	.find_vq = lg_find_vq,
 	.del_vq = lg_del_vq,
 };
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ec43284..6e0a9fe 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -390,13 +390,14 @@
 	struct virtnet_info *vi = vdev->priv;
 	struct sk_buff *skb;
 
+	/* Stop all the virtqueues. */
+	vdev->config->reset(vdev);
+
 	/* Free our skbs in send and recv queues, if any. */
-	vi->rvq->vq_ops->shutdown(vi->rvq);
 	while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
 		kfree_skb(skb);
 		vi->num--;
 	}
-	vi->svq->vq_ops->shutdown(vi->svq);
 	while ((skb = __skb_dequeue(&vi->send)) != NULL)
 		kfree_skb(skb);
 
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 303cb6f..7dddb18 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -102,9 +102,13 @@
 	struct virtio_driver *drv = container_of(dev->dev.driver,
 						 struct virtio_driver, driver);
 
-	dev->config->set_status(dev, dev->config->get_status(dev)
-				& ~VIRTIO_CONFIG_S_DRIVER);
 	drv->remove(dev);
+
+	/* Driver should have reset device. */
+	BUG_ON(dev->config->get_status(dev));
+
+	/* Acknowledge the device's existence again. */
+	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 	return 0;
 }
 
@@ -130,6 +134,10 @@
 	dev->dev.bus = &virtio_bus;
 	sprintf(dev->dev.bus_id, "%u", dev->index);
 
+	/* We always start by resetting the device, in case a previous
+	 * driver messed it up.  This also tests that code path a little. */
+	dev->config->reset(dev);
+
 	/* Acknowledge that we've seen the device. */
 	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index dbe1d35..9849bab 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -173,16 +173,6 @@
 	vq->num_free++;
 }
 
-/* FIXME: We need to tell other side about removal, to synchronize. */
-static void vring_shutdown(struct virtqueue *_vq)
-{
-	struct vring_virtqueue *vq = to_vvq(_vq);
-	unsigned int i;
-
-	for (i = 0; i < vq->vring.num; i++)
-		detach_buf(vq, i);
-}
-
 static inline bool more_used(const struct vring_virtqueue *vq)
 {
 	return vq->last_used_idx != vq->vring.used->idx;
@@ -278,7 +268,6 @@
 	.kick = vring_kick,
 	.disable_cb = vring_disable_cb,
 	.enable_cb = vring_enable_cb,
-	.shutdown = vring_shutdown,
 };
 
 struct virtqueue *vring_new_virtqueue(unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 78408d5..260d1fc 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -45,9 +45,6 @@
  *	vq: the struct virtqueue we're talking about.
  *	This returns "false" (and doesn't re-enable) if there are pending
  *	buffers in the queue, to avoid a race.
- * @shutdown: "unadd" all buffers.
- *	vq: the struct virtqueue we're talking about.
- *	Remove everything from the queue.
  *
  * Locking rules are straightforward: the driver is responsible for
  * locking.  No two operations may be invoked simultaneously.
@@ -67,8 +64,6 @@
 
 	void (*disable_cb)(struct virtqueue *vq);
 	bool (*enable_cb)(struct virtqueue *vq);
-
-	void (*shutdown)(struct virtqueue *vq);
 };
 
 /**
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 81f828a..d581b29 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -43,6 +43,9 @@
  * @set_status: write the status byte
  *	vdev: the virtio_device
  *	status: the new status byte
+ * @reset: reset the device
+ *	vdev: the virtio device
+ *	After this, status and feature negotiation must be done again
  * @find_vq: find a virtqueue and instantiate it.
  *	vdev: the virtio_device
  *	index: the 0-based virtqueue number in case there's more than one.
@@ -59,6 +62,7 @@
 		    const void *buf, unsigned len);
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
+	void (*reset)(struct virtio_device *vdev);
 	struct virtqueue *(*find_vq)(struct virtio_device *vdev,
 				     unsigned index,
 				     void (*callback)(struct virtqueue *));