net: bcmgenet: fix dev->stats.tx_bytes accounting

1. Add bytes_compl local variable to __bcmgenet_tx_reclaim() to collect
   transmitted bytes. dev->stats updates can then be moved outside the
   while-loop. bytes_compl is also needed for future BQL support.
2. When bcmgenet device uses Tx checksum offload, each transmitted skb
   gets an extra 64-byte header prepended to it. Before this header is
   prepended to the skb, we need to save the skb "wire" length in
   GENET_CB(skb)->bytes_sent, so that proper Tx bytes accounting can
   be done in __bcmgenet_tx_reclaim().
3. skb->len covers the entire length of skb, whether it is linear or
   fragmented. Thus, when we clean the fragments, do not increase
   transmitted bytes.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Petri Gynther <pgynther@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 6746fd0..c1c7c0e 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1171,6 +1171,7 @@
 	struct enet_cb *tx_cb_ptr;
 	struct netdev_queue *txq;
 	unsigned int pkts_compl = 0;
+	unsigned int bytes_compl = 0;
 	unsigned int c_index;
 	unsigned int txbds_ready;
 	unsigned int txbds_processed = 0;
@@ -1193,16 +1194,13 @@
 		tx_cb_ptr = &priv->tx_cbs[ring->clean_ptr];
 		if (tx_cb_ptr->skb) {
 			pkts_compl++;
-			dev->stats.tx_packets++;
-			dev->stats.tx_bytes += tx_cb_ptr->skb->len;
+			bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent;
 			dma_unmap_single(&dev->dev,
 					 dma_unmap_addr(tx_cb_ptr, dma_addr),
 					 dma_unmap_len(tx_cb_ptr, dma_len),
 					 DMA_TO_DEVICE);
 			bcmgenet_free_cb(tx_cb_ptr);
 		} else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) {
-			dev->stats.tx_bytes +=
-				dma_unmap_len(tx_cb_ptr, dma_len);
 			dma_unmap_page(&dev->dev,
 				       dma_unmap_addr(tx_cb_ptr, dma_addr),
 				       dma_unmap_len(tx_cb_ptr, dma_len),
@@ -1220,6 +1218,9 @@
 	ring->free_bds += txbds_processed;
 	ring->c_index = (ring->c_index + txbds_processed) & DMA_C_INDEX_MASK;
 
+	dev->stats.tx_packets += pkts_compl;
+	dev->stats.tx_bytes += bytes_compl;
+
 	if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
 		txq = netdev_get_tx_queue(dev, ring->queue);
 		if (netif_tx_queue_stopped(txq))
@@ -1464,6 +1465,11 @@
 		goto out;
 	}
 
+	/* Retain how many bytes will be sent on the wire, without TSB inserted
+	 * by transmit checksum offload
+	 */
+	GENET_CB(skb)->bytes_sent = skb->len;
+
 	/* set the SKB transmit checksum */
 	if (priv->desc_64b_en) {
 		skb = bcmgenet_put_tx_csum(dev, skb);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 9673675..1e2dc34 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -531,6 +531,12 @@
 	u32		flags;
 };
 
+struct bcmgenet_skb_cb {
+	unsigned int bytes_sent;	/* bytes on the wire (no TSB) */
+};
+
+#define GENET_CB(skb)	((struct bcmgenet_skb_cb *)((skb)->cb))
+
 struct bcmgenet_tx_ring {
 	spinlock_t	lock;		/* ring lock */
 	struct napi_struct napi;	/* NAPI per tx queue */