xen-netback: Introduce TX grant mapping

This patch introduces grant mapping on netback TX path. It replaces grant copy
operations, ditching grant copy coalescing along the way. Another solution for
copy coalescing is introduced in "xen-netback: Handle guests with too many
frags", older guests and Windows can broke before that patch applies.
There is a callback (xenvif_zerocopy_callback) from core stack to release the
slots back to the guests when kfree_skb or skb_orphan_frags called. It feeds a
separate dealloc thread, as scheduling NAPI instance from there is inefficient,
therefore we can't do dealloc from the instance.

Signed-off-by: Zoltan Kiss <zoltan.kiss@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index bc32627..1fe9fe5 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,6 +38,7 @@
 
 #include <xen/events.h>
 #include <asm/xen/hypercall.h>
+#include <xen/balloon.h>
 
 #define XENVIF_QUEUE_LENGTH 32
 #define XENVIF_NAPI_WEIGHT  64
@@ -87,7 +88,8 @@
 		local_irq_save(flags);
 
 		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
-		if (!more_to_do)
+		if (!(more_to_do &&
+		      xenvif_tx_pending_slots_available(vif)))
 			__napi_complete(napi);
 
 		local_irq_restore(flags);
@@ -121,7 +123,9 @@
 	BUG_ON(skb->dev != dev);
 
 	/* Drop the packet if vif is not ready */
-	if (vif->task == NULL || !xenvif_schedulable(vif))
+	if (vif->task == NULL ||
+	    vif->dealloc_task == NULL ||
+	    !xenvif_schedulable(vif))
 		goto drop;
 
 	/* At best we'll need one slot for the header and one for each
@@ -343,8 +347,26 @@
 	vif->pending_prod = MAX_PENDING_REQS;
 	for (i = 0; i < MAX_PENDING_REQS; i++)
 		vif->pending_ring[i] = i;
-	for (i = 0; i < MAX_PENDING_REQS; i++)
-		vif->mmap_pages[i] = NULL;
+	spin_lock_init(&vif->callback_lock);
+	spin_lock_init(&vif->response_lock);
+	/* If ballooning is disabled, this will consume real memory, so you
+	 * better enable it. The long term solution would be to use just a
+	 * bunch of valid page descriptors, without dependency on ballooning
+	 */
+	err = alloc_xenballooned_pages(MAX_PENDING_REQS,
+				       vif->mmap_pages,
+				       false);
+	if (err) {
+		netdev_err(dev, "Could not reserve mmap_pages\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	for (i = 0; i < MAX_PENDING_REQS; i++) {
+		vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
+			{ .callback = xenvif_zerocopy_callback,
+			  .ctx = NULL,
+			  .desc = i };
+		vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
+	}
 
 	/*
 	 * Initialise a dummy MAC address. We choose the numerically
@@ -382,12 +404,14 @@
 
 	BUG_ON(vif->tx_irq);
 	BUG_ON(vif->task);
+	BUG_ON(vif->dealloc_task);
 
 	err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
 	if (err < 0)
 		goto err;
 
 	init_waitqueue_head(&vif->wq);
+	init_waitqueue_head(&vif->dealloc_wq);
 
 	if (tx_evtchn == rx_evtchn) {
 		/* feature-split-event-channels == 0 */
@@ -431,6 +455,16 @@
 
 	vif->task = task;
 
+	task = kthread_create(xenvif_dealloc_kthread,
+			      (void *)vif, "%s-dealloc", vif->dev->name);
+	if (IS_ERR(task)) {
+		pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
+		err = PTR_ERR(task);
+		goto err_rx_unbind;
+	}
+
+	vif->dealloc_task = task;
+
 	rtnl_lock();
 	if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
 		dev_set_mtu(vif->dev, ETH_DATA_LEN);
@@ -441,6 +475,7 @@
 	rtnl_unlock();
 
 	wake_up_process(vif->task);
+	wake_up_process(vif->dealloc_task);
 
 	return 0;
 
@@ -478,6 +513,11 @@
 		vif->task = NULL;
 	}
 
+	if (vif->dealloc_task) {
+		kthread_stop(vif->dealloc_task);
+		vif->dealloc_task = NULL;
+	}
+
 	if (vif->tx_irq) {
 		if (vif->tx_irq == vif->rx_irq)
 			unbind_from_irqhandler(vif->tx_irq, vif);
@@ -493,6 +533,23 @@
 
 void xenvif_free(struct xenvif *vif)
 {
+	int i, unmap_timeout = 0;
+
+	for (i = 0; i < MAX_PENDING_REQS; ++i) {
+		if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
+			unmap_timeout++;
+			schedule_timeout(msecs_to_jiffies(1000));
+			if (unmap_timeout > 9 &&
+			    net_ratelimit())
+				netdev_err(vif->dev,
+					   "Page still granted! Index: %x\n",
+					   i);
+			i = -1;
+		}
+	}
+
+	free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
+
 	netif_napi_del(&vif->napi);
 
 	unregister_netdev(vif->dev);