USB: gadget: u_ether: Optimize TX interrupt on completion

Though dl aggregation is enabled and set to more than 3 packets,
still most of the time single packets are queued to the DCD.
Sending single packet adds extra penalty, as for every transfer
complete controller may do some clean up before fetching new
request to the HW.

In the current Tx aggregation logic, on receiving packet from
network layer, after preparing the usb_req checks if number of
packets aggregated is max aggregated value or if packets queued
to DCD is less than TX_FIFO_THRESHOLD, then it immediately
queues the packet to DCD, otherwise, it parks the packet for
further aggregation. On any tx completion handler, checks if
there is any parked tx packet, if yes, then queues the parked tx
packet to DCD in completion handler. In high throughput test
cases, there may be back to back completion handlers being
called, due to any rx or tx endpoint completions. This may cause
tx completion handler being called frequency and this may cause,
tx completion handler may queue the parked packet to DCD before
it gets aggregated with the multiple packets.

With this new tx aggregation logic, instead of queuing the parked
aggregated packet on every tx completion handler, checks the
completed request no_interrupt flag, if it is zero, then only
allow queuing the parked packet to DCD. With this logic, giving
sufficient time for more aggregation. To make this logic to work,
require to have synchronization between when to start aggregation
and how many packets can be queued with no_interrupt flag set
sequentially, hence adding new flag MAX_TX_REQ_WITH_NO_INT that
is being used in aggregation logic and no_interrupt logic.

Change-Id: I84fb4a4d3d20bb3ddb2f33c64cb4b65373b363ea
Signed-off-by: Sujeet Kumar <ksujeet@codeaurora.org>
Signed-off-by: Azhar Shaikh <azhars@codeaurora.org>
Signed-off-by: Ajay Agarwal <ajaya@codeaurora.org>
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 0305a6b..b1a7f73 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -72,7 +72,7 @@
 	struct list_head	tx_reqs, rx_reqs;
 	unsigned		tx_qlen;
 /* Minimum number of TX USB request queued to UDC */
-#define TX_REQ_THRESHOLD	5
+#define MAX_TX_REQ_WITH_NO_INT	5
 	int			no_tx_req_used;
 	int			tx_skb_hold_count;
 	u32			tx_req_bufsize;
@@ -571,14 +571,14 @@
 	dev->net->stats.tx_packets++;
 
 	spin_lock(&dev->req_lock);
-	list_add_tail(&req->list, &dev->tx_reqs);
 
 	if (dev->port_usb->multi_pkt_xfer && !req->context) {
 		dev->no_tx_req_used--;
 		req->length = 0;
 		in = dev->port_usb->in_ep;
 
-		if (!list_empty(&dev->tx_reqs)) {
+		/* Do not process further if no_interrupt is set */
+		if (!req->no_interrupt && !list_empty(&dev->tx_reqs)) {
 			new_req = container_of(dev->tx_reqs.next,
 					struct usb_request, list);
 			list_del(&new_req->list);
@@ -606,6 +606,16 @@
 					length++;
 				}
 
+				/* set when tx completion interrupt needed */
+				spin_lock(&dev->req_lock);
+				dev->tx_qlen++;
+				if (dev->tx_qlen == MAX_TX_REQ_WITH_NO_INT) {
+					new_req->no_interrupt = 0;
+					dev->tx_qlen = 0;
+				} else {
+					new_req->no_interrupt = 1;
+				}
+				spin_unlock(&dev->req_lock);
 				new_req->length = length;
 				retval = usb_ep_queue(in, new_req, GFP_ATOMIC);
 				switch (retval) {
@@ -650,6 +660,11 @@
 		dev_kfree_skb_any(skb);
 	}
 
+	/* put the completed req back to tx_reqs tail pool */
+	spin_lock(&dev->req_lock);
+	list_add_tail(&req->list, &dev->tx_reqs);
+	spin_unlock(&dev->req_lock);
+
 	if (netif_carrier_ok(dev->net))
 		netif_wake_queue(dev->net);
 }
@@ -813,7 +828,13 @@
 
 		spin_lock_irqsave(&dev->req_lock, flags);
 		if (dev->tx_skb_hold_count < dev->dl_max_pkts_per_xfer) {
-			if (dev->no_tx_req_used > TX_REQ_THRESHOLD) {
+			/*
+			 * should allow aggregation only, if the number of
+			 * requests queued more than the tx requests that can
+			 * be queued with no interrupt flag set sequentially.
+			 * Otherwise, packets may be blocked forever.
+			 */
+			if (dev->no_tx_req_used > MAX_TX_REQ_WITH_NO_INT) {
 				list_add(&req->list, &dev->tx_reqs);
 				spin_unlock_irqrestore(&dev->req_lock, flags);
 				goto success;
@@ -857,13 +878,15 @@
 	/* throttle highspeed IRQ rate back slightly */
 	if (gadget_is_dualspeed(dev->gadget) &&
 			 (dev->gadget->speed == USB_SPEED_HIGH)) {
+		spin_lock_irqsave(&dev->req_lock, flags);
 		dev->tx_qlen++;
-		if (dev->tx_qlen == (dev->qmult/2)) {
+		if (dev->tx_qlen == MAX_TX_REQ_WITH_NO_INT) {
 			req->no_interrupt = 0;
 			dev->tx_qlen = 0;
 		} else {
 			req->no_interrupt = 1;
 		}
+		spin_unlock_irqrestore(&dev->req_lock, flags);
 	} else {
 		req->no_interrupt = 0;
 	}