USB: gadget: rndis: Optimize tx path for better performance

Current rndis header addition and aggregation steps:
1. Increase skb headroom by size of rndis header.
2. Copy skb data with newly created skb having more headroom.
3. Create and add rndis header with it.
4. Again copy skb data with usb request buffer for aggregation purpose.

New optimized steps :
1. Create and copy rndis header directly with usb request buffer
2. Copy skb data with usb request buffer

Above new steps avoids cpu intensive skb operations which reduces CPU
usage and provide better throughput.

This change also assigns completion handle with usb request at the time
of usb request creation instead of assigning everytime before queueing
usb request. Also create debugfs for ether stats.

Change-Id: Ifc64d6a8573f6a24e28c73ef9a19b62649e171cd
Signed-off-by: Mayank Rana <mrana@codeaurora.org>
Signed-off-by: Ajay Agarwal <ajaya@codeaurora.org>
diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c
index a75e5d3..24bc2d4 100644
--- a/drivers/usb/gadget/function/f_rndis.c
+++ b/drivers/usb/gadget/function/f_rndis.c
@@ -386,10 +386,31 @@
 					struct sk_buff *skb)
 {
 	struct sk_buff *skb2;
+	struct rndis_packet_msg_type *header = NULL;
+	struct f_rndis *rndis = func_to_rndis(&port->func);
 
 	if (!skb)
 		return NULL;
 
+	if (rndis->port.multi_pkt_xfer) {
+		if (port->header) {
+			header = port->header;
+			header->MessageType = cpu_to_le32(RNDIS_MSG_PACKET);
+			header->MessageLength = cpu_to_le32(skb->len +
+							sizeof(*header));
+			header->DataOffset = cpu_to_le32(36);
+			header->DataLength = cpu_to_le32(skb->len);
+			pr_debug("MessageLength:%d DataLength:%d\n",
+						header->MessageLength,
+						header->DataLength);
+			return skb;
+		}
+
+		dev_kfree_skb(skb);
+		pr_err("RNDIS header is NULL.\n");
+		return NULL;
+	}
+
 	skb2 = skb_realloc_headroom(skb, sizeof(struct rndis_packet_msg_type));
 	rndis_add_hdr(skb2);
 
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 9e38809..fbf90c4 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -246,6 +246,7 @@
 }
 
 static void rx_complete(struct usb_ep *ep, struct usb_request *req);
+static void tx_complete(struct usb_ep *ep, struct usb_request *req);
 
 static int
 rx_submit(struct eth_dev *dev, struct usb_request *req, gfp_t gfp_flags)
@@ -307,7 +308,6 @@
 
 	req->buf = skb->data;
 	req->length = size;
-	req->complete = rx_complete;
 	req->context = skb;
 
 	retval = usb_ep_queue(out, req, gfp_flags);
@@ -410,6 +410,7 @@
 {
 	unsigned		i;
 	struct usb_request	*req;
+	bool			usb_in;
 
 	if (!n)
 		return -ENOMEM;
@@ -420,10 +421,22 @@
 		if (i-- == 0)
 			goto extra;
 	}
+
+	if (ep->desc->bEndpointAddress & USB_DIR_IN)
+		usb_in = true;
+	else
+		usb_in = false;
+
 	while (i--) {
 		req = usb_ep_alloc_request(ep, GFP_ATOMIC);
 		if (!req)
 			return list_empty(list) ? -ENOMEM : 0;
+		/* update completion handler */
+		if (usb_in)
+			req->complete = tx_complete;
+		else
+			req->complete = rx_complete;
+
 		list_add(&req->list, list);
 	}
 	return 0;
@@ -575,7 +588,7 @@
 
 static void tx_complete(struct usb_ep *ep, struct usb_request *req)
 {
-	struct sk_buff	*skb = req->context;
+	struct sk_buff	*skb;
 	struct eth_dev	*dev = ep->driver_data;
 	struct net_device *net = dev->net;
 	struct usb_request *new_req;
@@ -683,6 +696,7 @@
 			spin_unlock(&dev->req_lock);
 		}
 	} else {
+		skb = req->context;
 		/* Is aggregation already enabled and buffers allocated ? */
 		if (dev->port_usb->multi_pkt_xfer && dev->tx_req_bufsize) {
 			req->buf = kzalloc(dev->tx_req_bufsize
@@ -725,7 +739,7 @@
 	list_for_each(act, &dev->tx_reqs) {
 		req = container_of(act, struct usb_request, list);
 		if (!req->buf)
-			req->buf = kmalloc(dev->tx_req_bufsize
+			req->buf = kzalloc(dev->tx_req_bufsize
 				+ dev->gadget->extra_buf_alloc, GFP_ATOMIC);
 
 		if (!req->buf)
@@ -866,8 +880,19 @@
 	spin_unlock_irqrestore(&dev->req_lock, flags);
 
 	if (multi_pkt_xfer) {
+		pr_debug("req->length:%d header_len:%u\n"
+				"skb->len:%d skb->data_len:%d\n",
+				req->length, dev->header_len,
+				skb->len, skb->data_len);
+		/* Add RNDIS Header */
+		memcpy(req->buf + req->length, dev->port_usb->header,
+						dev->header_len);
+		/* Increment req length by header size */
+		req->length += dev->header_len;
+		/* Copy received IP data from SKB */
 		memcpy(req->buf + req->length, skb->data, skb->len);
-		req->length = req->length + skb->len;
+		/* Increment req length by skb data length */
+		req->length += skb->len;
 		length = req->length;
 		dev_kfree_skb_any(skb);
 
@@ -927,8 +952,6 @@
 		req->context = skb;
 	}
 
-	req->complete = tx_complete;
-
 	/* NCM requires no zlp if transfer is dwNtbInMaxSize */
 	if (is_fixed && length == fixed_in_len &&
 	    (length % in->maxpacket) == 0)
@@ -981,7 +1004,7 @@
 		spin_lock_irqsave(&dev->req_lock, flags);
 		if (list_empty(&dev->tx_reqs))
 			netif_start_queue(net);
-		list_add(&req->list, &dev->tx_reqs);
+		list_add_tail(&req->list, &dev->tx_reqs);
 		spin_unlock_irqrestore(&dev->req_lock, flags);
 	}
 success:
@@ -1443,6 +1466,8 @@
 	else
 		INFO(dev, "MAC %pM\n", dev->dev_mac);
 
+	uether_debugfs_init(dev);
+
 	return status;
 }
 EXPORT_SYMBOL_GPL(gether_register_netdev);
@@ -1594,6 +1619,11 @@
 	if (!dev)
 		return ERR_PTR(-EINVAL);
 
+	/* size of rndis_packet_msg_type is 44 */
+	link->header = kzalloc(44, GFP_ATOMIC);
+	if (!link->header)
+		return ERR_PTR(-ENOMEM);
+
 	if (link->in_ep) {
 		link->in_ep->driver_data = dev;
 		result = usb_ep_enable(link->in_ep);
@@ -1657,8 +1687,12 @@
 	}
 fail0:
 	/* caller is responsible for cleanup on error */
-	if (result < 0)
+	if (result < 0) {
+		kfree(link->header);
+
 		return ERR_PTR(result);
+	}
+
 	return dev->net;
 }
 EXPORT_SYMBOL_GPL(gether_connect);
@@ -1711,6 +1745,9 @@
 			spin_lock(&dev->req_lock);
 		}
 		spin_unlock(&dev->req_lock);
+		/* Free rndis header buffer memory */
+		kfree(link->header);
+		link->header = NULL;
 		link->in_ep->desc = NULL;
 	}
 
diff --git a/drivers/usb/gadget/function/u_ether.h b/drivers/usb/gadget/function/u_ether.h
index 3ff09d1..a10503a 100644
--- a/drivers/usb/gadget/function/u_ether.h
+++ b/drivers/usb/gadget/function/u_ether.h
@@ -87,6 +87,7 @@
 	/* called on network open/close */
 	void				(*open)(struct gether *);
 	void				(*close)(struct gether *);
+	struct rndis_packet_msg_type	*header;
 };
 
 #define	DEFAULT_FILTER	(USB_CDC_PACKET_TYPE_BROADCAST \