i40e: Split bytes and packets from Rx/Tx stats

This makes it so that the Tx and Rx byte and packet counts are
separated from the rest of the statistics.  This allows for better
isolation of these stats when we move them into the 64 bit statistics.

Simplify things by re-ordering how the stats display in ethtool.
Instead of displaying all of the Tx queues as a block, followed by all
the Rx queues, the new order is Tx[0], Rx[0], Tx[1], Rx[1], ..., Tx[n],
Rx[n].  This reduces the loops and cleans up the display for testing
purposes since it is very easy to verify if flow director is doing the
right thing as the Tx and Rx queue pair are shown in pairs.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 0b01c61..2b6655b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -512,8 +512,8 @@
 				 vsi->rx_rings[i].ring_active);
 			dev_info(&pf->pdev->dev,
 				 "    rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n",
-				 i, vsi->rx_rings[i].rx_stats.packets,
-				 vsi->rx_rings[i].rx_stats.bytes,
+				 i, vsi->rx_rings[i].stats.packets,
+				 vsi->rx_rings[i].stats.bytes,
 				 vsi->rx_rings[i].rx_stats.non_eop_descs);
 			dev_info(&pf->pdev->dev,
 				 "    rx_rings[%i]: rx_stats: alloc_rx_page_failed = %lld, alloc_rx_buff_failed = %lld\n",
@@ -556,8 +556,8 @@
 				 vsi->tx_rings[i].ring_active);
 			dev_info(&pf->pdev->dev,
 				 "    tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n",
-				 i, vsi->tx_rings[i].tx_stats.packets,
-				 vsi->tx_rings[i].tx_stats.bytes,
+				 i, vsi->tx_rings[i].stats.packets,
+				 vsi->tx_rings[i].stats.bytes,
 				 vsi->tx_rings[i].tx_stats.restart_queue);
 			dev_info(&pf->pdev->dev,
 				 "    tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 9a76b8c..8754c6fa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -587,13 +587,11 @@
 		data[i++] = (i40e_gstrings_net_stats[j].sizeof_stat ==
 			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
 	}
-	for (j = 0; j < vsi->num_queue_pairs; j++) {
-		data[i++] = vsi->tx_rings[j].tx_stats.packets;
-		data[i++] = vsi->tx_rings[j].tx_stats.bytes;
-	}
-	for (j = 0; j < vsi->num_queue_pairs; j++) {
-		data[i++] = vsi->rx_rings[j].rx_stats.packets;
-		data[i++] = vsi->rx_rings[j].rx_stats.bytes;
+	for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) {
+		data[i] = vsi->tx_rings[j].stats.packets;
+		data[i + 1] = vsi->tx_rings[j].stats.bytes;
+		data[i + 2] = vsi->rx_rings[j].stats.packets;
+		data[i + 3] = vsi->rx_rings[j].stats.bytes;
 	}
 	if (vsi == pf->vsi[pf->lan_vsi]) {
 		for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
@@ -641,8 +639,6 @@
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
 			p += ETH_GSTRING_LEN;
-		}
-		for (i = 0; i < vsi->num_queue_pairs; i++) {
 			snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 657babe..d1b5bae 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -376,8 +376,12 @@
 	memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
 	if (vsi->rx_rings)
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
+			memset(&vsi->rx_rings[i].stats, 0 ,
+			       sizeof(vsi->rx_rings[i].stats));
 			memset(&vsi->rx_rings[i].rx_stats, 0 ,
 			       sizeof(vsi->rx_rings[i].rx_stats));
+			memset(&vsi->tx_rings[i].stats, 0 ,
+			       sizeof(vsi->tx_rings[i].stats));
 			memset(&vsi->tx_rings[i].tx_stats, 0,
 			       sizeof(vsi->tx_rings[i].tx_stats));
 		}
@@ -708,14 +712,14 @@
 		struct i40e_ring *p;
 
 		p = &vsi->rx_rings[q];
-		rx_b += p->rx_stats.bytes;
-		rx_p += p->rx_stats.packets;
+		rx_b += p->stats.bytes;
+		rx_p += p->stats.packets;
 		rx_buf += p->rx_stats.alloc_rx_buff_failed;
 		rx_page += p->rx_stats.alloc_rx_page_failed;
 
 		p = &vsi->tx_rings[q];
-		tx_b += p->tx_stats.bytes;
-		tx_p += p->tx_stats.packets;
+		tx_b += p->stats.bytes;
+		tx_p += p->stats.packets;
 		tx_restart += p->tx_stats.restart_queue;
 		tx_busy += p->tx_stats.tx_busy;
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ad2818f..3e73bc0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -305,14 +305,14 @@
 	 * run the check_tx_hang logic with a transmit completion
 	 * pending but without time to complete it yet.
 	 */
-	if ((tx_ring->tx_stats.tx_done_old == tx_ring->tx_stats.packets) &&
+	if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
 	    tx_pending) {
 		/* make sure it is true for two checks in a row */
 		ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
 				       &tx_ring->state);
 	} else {
 		/* update completed stats and disarm the hang check */
-		tx_ring->tx_stats.tx_done_old = tx_ring->tx_stats.packets;
+		tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets;
 		clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
 	}
 
@@ -411,8 +411,8 @@
 
 	i += tx_ring->count;
 	tx_ring->next_to_clean = i;
-	tx_ring->tx_stats.bytes += total_bytes;
-	tx_ring->tx_stats.packets += total_packets;
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
 	tx_ring->q_vector->tx.total_bytes += total_bytes;
 	tx_ring->q_vector->tx.total_packets += total_packets;
 
@@ -1075,8 +1075,8 @@
 	}
 
 	rx_ring->next_to_clean = i;
-	rx_ring->rx_stats.packets += total_rx_packets;
-	rx_ring->rx_stats.bytes += total_rx_bytes;
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index e514247..7f3f7e3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -126,17 +126,18 @@
 	unsigned int page_offset;
 };
 
-struct i40e_tx_queue_stats {
+struct i40e_queue_stats {
 	u64 packets;
 	u64 bytes;
+};
+
+struct i40e_tx_queue_stats {
 	u64 restart_queue;
 	u64 tx_busy;
 	u64 tx_done_old;
 };
 
 struct i40e_rx_queue_stats {
-	u64 packets;
-	u64 bytes;
 	u64 non_eop_descs;
 	u64 alloc_rx_page_failed;
 	u64 alloc_rx_buff_failed;
@@ -215,6 +216,7 @@
 	bool ring_active;		/* is ring online or not */
 
 	/* stats structs */
+	struct i40e_queue_stats	stats;
 	union {
 		struct i40e_tx_queue_stats tx_stats;
 		struct i40e_rx_queue_stats rx_stats;