be2net: event queue re-design

v2: Fixed up the bad typecasting pointed out by David...

In the current design 8 TXQs are serviced by 1 EQ, while each RSS queue
is serviced by a separate EQ. This is being changed as follows:

- Upto 8 EQs will be used (based on the availabilty of msix vectors).
Each EQ will handle 1 RSS and 1 TX ring. The default non-RSS RX queue and
MCC queue are handled by the last EQ.

- On cards which provide support, upto 8 RSS rings will be used, instead
of the current limit of 4.

The new design allows spreading the TX multi-queue completion processing
across multiple CPUs unlike the previous design.

Signed-off-by: Sathya Perla <sathya.perla@emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 74aa148..86f51de 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -81,7 +81,7 @@
 #define BE_MIN_MTU		256
 
 #define BE_NUM_VLANS_SUPPORTED	64
-#define BE_MAX_EQD		96
+#define BE_MAX_EQD		96u
 #define	BE_MAX_TX_FRAG_COUNT	30
 
 #define EVNT_Q_LEN		1024
@@ -92,12 +92,16 @@
 #define MCC_Q_LEN		128	/* total size not to exceed 8 pages */
 #define MCC_CQ_LEN		256
 
-#define MAX_RSS_QS		4	/* BE limit is 4 queues/port */
+#define BE3_MAX_RSS_QS		8
+#define BE2_MAX_RSS_QS		4
+#define MAX_RSS_QS		BE3_MAX_RSS_QS
 #define MAX_RX_QS		(MAX_RSS_QS + 1) /* RSS qs + 1 def Rx */
+
 #define MAX_TX_QS		8
-#define BE_MAX_MSIX_VECTORS	(MAX_RX_QS + 1)/* RX + TX */
+#define MAX_MSIX_VECTORS	MAX_RSS_QS
+#define BE_TX_BUDGET		256
 #define BE_NAPI_WEIGHT		64
-#define MAX_RX_POST 		BE_NAPI_WEIGHT /* Frags posted at a time */
+#define MAX_RX_POST		BE_NAPI_WEIGHT /* Frags posted at a time */
 #define RX_FRAGS_REFILL_WM	(RX_Q_LEN - MAX_RX_POST)
 
 #define FW_VER_LEN		32
@@ -165,13 +169,16 @@
 
 	/* Adaptive interrupt coalescing (AIC) info */
 	bool enable_aic;
-	u16 min_eqd;		/* in usecs */
-	u16 max_eqd;		/* in usecs */
-	u16 cur_eqd;		/* in usecs */
-	u8  eq_idx;
+	u32 min_eqd;		/* in usecs */
+	u32 max_eqd;		/* in usecs */
+	u32 eqd;		/* configured val when aic is off */
+	u32 cur_eqd;		/* in usecs */
 
+	u8 idx;			/* array index */
+	u16 tx_budget;
 	struct napi_struct napi;
-};
+	struct be_adapter *adapter;
+} ____cacheline_aligned_in_smp;
 
 struct be_mcc_obj {
 	struct be_queue_info q;
@@ -197,7 +204,7 @@
 	/* Remember the skbs that were transmitted */
 	struct sk_buff *sent_skb_list[TX_Q_LEN];
 	struct be_tx_stats stats;
-};
+} ____cacheline_aligned_in_smp;
 
 /* Struct to remember the pages posted for rx frags */
 struct be_rx_page_info {
@@ -215,8 +222,6 @@
 	u32 rx_drops_no_skbs;	/* skb allocation errors */
 	u32 rx_drops_no_frags;	/* HW has no fetched frags */
 	u32 rx_post_fail;	/* page post alloc failures */
-	u32 rx_polls;		/* NAPI calls */
-	u32 rx_events;
 	u32 rx_compl;
 	u32 rx_mcast_pkts;
 	u32 rx_compl_err;	/* completions with err set */
@@ -249,16 +254,13 @@
 	struct be_queue_info cq;
 	struct be_rx_compl_info rxcp;
 	struct be_rx_page_info page_info_tbl[RX_Q_LEN];
-	struct be_eq_obj rx_eq;
 	struct be_rx_stats stats;
 	u8 rss_id;
 	bool rx_post_starved;	/* Zero rx frags have been posted to BE */
-	u32 cache_line_barrier[16];
-};
+} ____cacheline_aligned_in_smp;
 
 struct be_drv_stats {
 	u32 be_on_die_temperature;
-	u32 tx_events;
 	u32 eth_red_drops;
 	u32 rx_drops_no_pbuf;
 	u32 rx_drops_no_txpb;
@@ -320,20 +322,19 @@
 	spinlock_t mcc_lock;	/* For serializing mcc cmds to BE card */
 	spinlock_t mcc_cq_lock;
 
-	struct msix_entry msix_entries[BE_MAX_MSIX_VECTORS];
 	u32 num_msix_vec;
+	u32 num_evt_qs;
+	struct be_eq_obj eq_obj[MAX_MSIX_VECTORS];
+	struct msix_entry msix_entries[MAX_MSIX_VECTORS];
 	bool isr_registered;
 
 	/* TX Rings */
-	struct be_eq_obj tx_eq;
+	u32 num_tx_qs;
 	struct be_tx_obj tx_obj[MAX_TX_QS];
-	u8 num_tx_qs;
-
-	u32 cache_line_break[8];
 
 	/* Rx rings */
-	struct be_rx_obj rx_obj[MAX_RX_QS];
 	u32 num_rx_qs;
+	struct be_rx_obj rx_obj[MAX_RX_QS];
 	u32 big_page_size;	/* Compounded page size shared by rx wrbs */
 
 	u8 eq_next_idx;
@@ -404,24 +405,34 @@
 extern const struct ethtool_ops be_ethtool_ops;
 
 #define msix_enabled(adapter)		(adapter->num_msix_vec > 0)
-#define tx_stats(txo)			(&txo->stats)
-#define rx_stats(rxo)			(&rxo->stats)
+#define num_irqs(adapter)		(msix_enabled(adapter) ?	\
+						adapter->num_msix_vec : 1)
+#define tx_stats(txo)			(&(txo)->stats)
+#define rx_stats(rxo)			(&(rxo)->stats)
 
-#define BE_SET_NETDEV_OPS(netdev, ops)	(netdev->netdev_ops = ops)
+/* The default RXQ is the last RXQ */
+#define default_rxo(adpt)		(&adpt->rx_obj[adpt->num_rx_qs - 1])
 
 #define for_all_rx_queues(adapter, rxo, i)				\
 	for (i = 0, rxo = &adapter->rx_obj[i]; i < adapter->num_rx_qs;	\
 		i++, rxo++)
 
-/* Just skip the first default non-rss queue */
+/* Skip the default non-rss queue (last one)*/
 #define for_all_rss_queues(adapter, rxo, i)				\
-	for (i = 0, rxo = &adapter->rx_obj[i+1]; i < (adapter->num_rx_qs - 1);\
+	for (i = 0, rxo = &adapter->rx_obj[i]; i < (adapter->num_rx_qs - 1);\
 		i++, rxo++)
 
 #define for_all_tx_queues(adapter, txo, i)				\
 	for (i = 0, txo = &adapter->tx_obj[i]; i < adapter->num_tx_qs;	\
 		i++, txo++)
 
+#define for_all_evt_queues(adapter, eqo, i)				\
+	for (i = 0, eqo = &adapter->eq_obj[i]; i < adapter->num_evt_qs; \
+		i++, eqo++)
+
+#define is_mcc_eqo(eqo)			(eqo->idx == 0)
+#define mcc_eqo(adapter)		(&adapter->eq_obj[0])
+
 #define PAGE_SHIFT_4K		12
 #define PAGE_SIZE_4K		(1 << PAGE_SHIFT_4K)