net/mlx4_en: Refactor the XDP forwarding rings scheme

Separately manage the two types of TX rings: regular ones, and XDP.
Upon an XDP set, do not borrow regular TX rings and convert them
into XDP ones, but allocate new ones, unless we hit the max number
of rings.
Which means that in systems with smaller #cores we will not consume
the current TX rings for XDP, while we are still in the num TX limit.

XDP TX rings counters are not shown in ethtool statistics.
Instead, XDP counters will be added to the respective RX rings
in a downstream patch.

This has no performance implications.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index f2e8bed..71196f6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -788,7 +788,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	struct bpf_prog *xdp_prog;
 	int doorbell_pending;
 	struct sk_buff *skb;
-	int tx_index;
 	int index;
 	int nr;
 	unsigned int length;
@@ -808,7 +807,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(ring->xdp_prog);
 	doorbell_pending = 0;
-	tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
 
 	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
 	 * descriptor offset can be deduced from the CQE index instead of
@@ -905,7 +903,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 				break;
 			case XDP_TX:
 				if (likely(!mlx4_en_xmit_frame(frags, dev,
-							length, tx_index,
+							length, cq->ring,
 							&doorbell_pending)))
 					goto consumed;
 				goto xdp_drop; /* Drop on xmit failure */
@@ -1082,7 +1080,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 out:
 	rcu_read_unlock();
 	if (doorbell_pending)
-		mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
+		mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
 
 	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
 	mlx4_cq_set_ci(&cq->mcq);
@@ -1162,7 +1160,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
 	/* bpf requires buffers to be set up as 1 packet per page.
 	 * This only works when num_frags == 1.
 	 */
-	if (priv->xdp_ring_num) {
+	if (priv->tx_ring_num[TX_XDP]) {
 		dma_dir = PCI_DMA_BIDIRECTIONAL;
 		/* This will gain efficient xdp frame recycling at the expense
 		 * of more costly truesize accounting