virtio: explicit advertisement of driver features

A recent proposed feature addition to the virtio block driver revealed
some flaws in the API: in particular, we assume that feature
negotiation is complete once a driver's probe function returns.

There is nothing in the API to require this, however, and even I
didn't notice when it was violated.

So instead, we require the driver to specify what features it supports
in a table, we can then move the feature negotiation into the virtio
core.  The intersection of device and driver features are presented in
a new 'features' bitmap in the struct virtio_device.

Note that this highlights the difference between Linux unsigned-long
bitmaps where each unsigned long is in native endian, and a
straight-forward little-endian array of bytes.

Drivers can still remove feature bits in their probe routine if they
really have to.

API changes:
- dev->config->feature() no longer gets and acks a feature.
- drivers should advertise their features in the 'feature_table' field
- use virtio_has_feature() for extra sanity when checking feature bits

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index cc6d393..78be6b8 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -242,7 +242,7 @@
 	index++;
 
 	/* If barriers are supported, tell block layer that queue is ordered */
-	if (vdev->config->feature(vdev, VIRTIO_BLK_F_BARRIER))
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
 		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
 
 	/* Host must always specify the capacity. */
@@ -308,7 +308,13 @@
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
+};
+
 static struct virtio_driver virtio_blk = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 2bc9bf7..7a643a6 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -85,27 +85,34 @@
 		+ desc->config_len;
 }
 
-/* This tests (and acknowleges) a feature bit. */
-static bool lg_feature(struct virtio_device *vdev, unsigned fbit)
+/* This gets the device's feature bits. */
+static u32 lg_get_features(struct virtio_device *vdev)
 {
+	unsigned int i;
+	u32 features = 0;
 	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
-	u8 *features;
+	u8 *in_features = lg_features(desc);
 
-	/* Obviously if they ask for a feature off the end of our feature
-	 * bitmap, it's not set. */
-	if (fbit / 8 > desc->feature_len)
-		return false;
+	/* We do this the slow but generic way. */
+	for (i = 0; i < min(desc->feature_len * 8, 32); i++)
+		if (in_features[i / 8] & (1 << (i % 8)))
+			features |= (1 << i);
 
-	/* The feature bitmap comes after the virtqueues. */
-	features = lg_features(desc);
-	if (!(features[fbit / 8] & (1 << (fbit % 8))))
-		return false;
+	return features;
+}
 
-	/* We set the matching bit in the other half of the bitmap to tell the
-	 * Host we want to use this feature.  We don't use this yet, but we
-	 * could in future. */
-	features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8));
-	return true;
+static void lg_set_features(struct virtio_device *vdev, u32 features)
+{
+	unsigned int i;
+	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
+	/* Second half of bitmap is features we accept. */
+	u8 *out_features = lg_features(desc) + desc->feature_len;
+
+	memset(out_features, 0, desc->feature_len);
+	for (i = 0; i < min(desc->feature_len * 8, 32); i++) {
+		if (features & (1 << i))
+			out_features[i / 8] |= (1 << (i % 8));
+	}
 }
 
 /* Once they've found a field, getting a copy of it is easy. */
@@ -286,7 +293,8 @@
 
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
-	.feature = lg_feature,
+	.get_features = lg_get_features,
+	.set_features = lg_set_features,
 	.get = lg_get,
 	.set = lg_set,
 	.get_status = lg_get_status,
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ad43421..f926b5a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -378,26 +378,26 @@
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	/* Do we support "hardware" checksums? */
-	if (csum && vdev->config->feature(vdev, VIRTIO_NET_F_CSUM)) {
+	if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
 		/* This opens up the world of extra features. */
 		dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
-		if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_GSO)) {
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
 			dev->features |= NETIF_F_TSO | NETIF_F_UFO
 				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
 		}
 		/* Individual feature bits: what can host handle? */
-		if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_HOST_TSO4))
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
 			dev->features |= NETIF_F_TSO;
-		if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_HOST_TSO6))
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
 			dev->features |= NETIF_F_TSO6;
-		if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_HOST_ECN))
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
 			dev->features |= NETIF_F_TSO_ECN;
-		if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_HOST_UFO))
+		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
 			dev->features |= NETIF_F_UFO;
 	}
 
 	/* Configuration may specify what MAC to use.  Otherwise random. */
-	if (vdev->config->feature(vdev, VIRTIO_NET_F_MAC)) {
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
 		vdev->config->get(vdev,
 				  offsetof(struct virtio_net_config, mac),
 				  dev->dev_addr, dev->addr_len);
@@ -486,7 +486,15 @@
 	{ 0 },
 };
 
+static unsigned int features[] = {
+	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
+	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
+	VIRTIO_NET_F_HOST_ECN,
+};
+
 static struct virtio_driver virtio_net = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index b535483..1386678 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -80,19 +80,51 @@
 	dev->config->set_status(dev, dev->config->get_status(dev) | status);
 }
 
+void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
+					 unsigned int fbit)
+{
+	unsigned int i;
+	struct virtio_driver *drv = container_of(vdev->dev.driver,
+						 struct virtio_driver, driver);
+
+	for (i = 0; i < drv->feature_table_size; i++)
+		if (drv->feature_table[i] == fbit)
+			return;
+	BUG();
+}
+EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
+
 static int virtio_dev_probe(struct device *_d)
 {
-	int err;
+	int err, i;
 	struct virtio_device *dev = container_of(_d,struct virtio_device,dev);
 	struct virtio_driver *drv = container_of(dev->dev.driver,
 						 struct virtio_driver, driver);
+	u32 device_features;
 
+	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+
+	/* Figure out what features the device supports. */
+	device_features = dev->config->get_features(dev);
+
+	/* Features supported by both device and driver into dev->features. */
+	memset(dev->features, 0, sizeof(dev->features));
+	for (i = 0; i < drv->feature_table_size; i++) {
+		unsigned int f = drv->feature_table[i];
+		BUG_ON(f >= 32);
+		if (device_features & (1 << f))
+			set_bit(f, dev->features);
+	}
+
 	err = drv->probe(dev);
 	if (err)
 		add_status(dev, VIRTIO_CONFIG_S_FAILED);
-	else
+	else {
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+		/* They should never have set feature bits beyond 32 */
+		dev->config->set_features(dev, dev->features[0]);
+	}
 	return err;
 }
 
@@ -114,6 +146,8 @@
 
 int register_virtio_driver(struct virtio_driver *driver)
 {
+	/* Catch this early. */
+	BUG_ON(driver->feature_table_size && !driver->feature_table);
 	driver->driver.bus = &virtio_bus;
 	driver->driver.probe = virtio_dev_probe;
 	driver->driver.remove = virtio_dev_remove;
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index fef88d8..bfef604 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -227,7 +227,7 @@
 	}
 
 	vb->tell_host_first
-		= vdev->config->feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
+		= virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
 
 	return 0;
 
@@ -259,7 +259,11 @@
 	kfree(vb);
 }
 
+static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
+
 static struct virtio_driver virtio_balloon = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index de102a6..27e9fc9 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -87,23 +87,22 @@
 	return container_of(vdev, struct virtio_pci_device, vdev);
 }
 
-/* virtio config->feature() implementation */
-static bool vp_feature(struct virtio_device *vdev, unsigned bit)
+/* virtio config->get_features() implementation */
+static u32 vp_get_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	u32 mask;
 
-	/* Since this function is supposed to have the side effect of
-	 * enabling a queried feature, we simulate that by doing a read
-	 * from the host feature bitmask and then writing to the guest
-	 * feature bitmask */
-	mask = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
-	if (mask & (1 << bit)) {
-		mask |= (1 << bit);
-		iowrite32(mask, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
-	}
+	/* When someone needs more than 32 feature bits, we'll need to
+	 * steal a bit to indicate that the rest are somewhere else. */
+	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+}
 
-	return !!(mask & (1 << bit));
+/* virtio config->set_features() implementation */
+static void vp_set_features(struct virtio_device *vdev, u32 features)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
 }
 
 /* virtio config->get() implementation */
@@ -293,7 +292,6 @@
 }
 
 static struct virtio_config_ops virtio_pci_config_ops = {
-	.feature	= vp_feature,
 	.get		= vp_get,
 	.set		= vp_set,
 	.get_status	= vp_get_status,
@@ -301,6 +299,8 @@
 	.reset		= vp_reset,
 	.find_vq	= vp_find_vq,
 	.del_vq		= vp_del_vq,
+	.get_features	= vp_get_features,
+	.set_features	= vp_set_features,
 };
 
 /* the PCI probing function */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index e7d1084..06005fa 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -76,6 +76,7 @@
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
+ * @features: the features supported by both driver and device.
  * @priv: private pointer for the driver's use.
  */
 struct virtio_device
@@ -84,6 +85,8 @@
 	struct device dev;
 	struct virtio_device_id id;
 	struct virtio_config_ops *config;
+	/* Note that this is a Linux set_bit-style bitmap. */
+	unsigned long features[1];
 	void *priv;
 };
 
@@ -94,6 +97,8 @@
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
  * @id_table: the ids serviced by this driver.
+ * @feature_table: an array of feature numbers supported by this device.
+ * @feature_table_size: number of entries in the feature table array.
  * @probe: the function to call when a device is found.  Returns a token for
  *    remove, or PTR_ERR().
  * @remove: the function when a device is removed.
@@ -103,6 +108,8 @@
 struct virtio_driver {
 	struct device_driver driver;
 	const struct virtio_device_id *id_table;
+	const unsigned int *feature_table;
+	unsigned int feature_table_size;
 	int (*probe)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
 	void (*config_changed)(struct virtio_device *dev);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 475572e..50db245 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -20,11 +20,6 @@
 
 /**
  * virtio_config_ops - operations for configuring a virtio device
- * @feature: search for a feature in this config
- *	vdev: the virtio_device
- *	bit: the feature bit
- *	Returns true if the feature is supported.  Acknowledges the feature
- *	so the host can see it.
  * @get: read the value of a configuration field
  *	vdev: the virtio_device
  *	offset: the offset of the configuration field
@@ -50,10 +45,15 @@
  *	callback: the virqtueue callback
  *	Returns the new virtqueue or ERR_PTR() (eg. -ENOENT).
  * @del_vq: free a virtqueue found by find_vq().
+ * @get_features: get the array of feature bits for this device.
+ *	vdev: the virtio_device
+ *	Returns the first 32 feature bits (all we currently need).
+ * @set_features: confirm what device features we'll be using.
+ *	vdev: the virtio_device
+ *	feature: the first 32 feature bits
  */
 struct virtio_config_ops
 {
-	bool (*feature)(struct virtio_device *vdev, unsigned bit);
 	void (*get)(struct virtio_device *vdev, unsigned offset,
 		    void *buf, unsigned len);
 	void (*set)(struct virtio_device *vdev, unsigned offset,
@@ -65,8 +65,30 @@
 				     unsigned index,
 				     void (*callback)(struct virtqueue *));
 	void (*del_vq)(struct virtqueue *vq);
+	u32 (*get_features)(struct virtio_device *vdev);
+	void (*set_features)(struct virtio_device *vdev, u32 features);
 };
 
+/* If driver didn't advertise the feature, it will never appear. */
+void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
+					 unsigned int fbit);
+
+/**
+ * virtio_has_feature - helper to determine if this device has this feature.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline bool virtio_has_feature(const struct virtio_device *vdev,
+				      unsigned int fbit)
+{
+	/* Did you forget to fix assumptions on max features? */
+	if (__builtin_constant_p(fbit))
+		BUILD_BUG_ON(fbit >= 32);
+
+	virtio_check_driver_offered_feature(vdev, fbit);
+	return test_bit(fbit, vdev->features);
+}
+
 /**
  * virtio_config_val - look for a feature and get a virtio config entry.
  * @vdev: the virtio device
@@ -84,7 +106,7 @@
 				    unsigned int offset,
 				    void *buf, unsigned len)
 {
-	if (!vdev->config->feature(vdev, fbit))
+	if (!virtio_has_feature(vdev, fbit))
 		return -ENOENT;
 
 	vdev->config->get(vdev, offset, buf, len);