igb: when number of CPUs > 4 combine tx/rx queues to allow more queues

This patch makes it so that nics such as 82576 and newer can support more
hardware queues when there are more than 4 cpus by combining a tx/rx queue
pair onto one interrupt so that 8 queue pairs can be supported and thus
allow for more queues.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h
index 3298f5a..2bb9549 100644
--- a/drivers/net/igb/igb.h
+++ b/drivers/net/igb/igb.h
@@ -59,10 +59,10 @@
 #define MAX_Q_VECTORS                      8
 
 /* Transmit and receive queues */
-#define IGB_MAX_RX_QUEUES     (adapter->vfs_allocated_count ? \
-                               (adapter->vfs_allocated_count > 6 ? 1 : 2) : 4)
-#define IGB_MAX_TX_QUEUES     IGB_MAX_RX_QUEUES
-#define IGB_ABS_MAX_TX_QUEUES     4
+#define IGB_MAX_RX_QUEUES                  (adapter->vfs_allocated_count ? 2 : \
+                                           (hw->mac.type > e1000_82575 ? 8 : 4))
+#define IGB_ABS_MAX_TX_QUEUES              8
+#define IGB_MAX_TX_QUEUES                  IGB_MAX_RX_QUEUES
 
 #define IGB_MAX_VF_MC_ENTRIES              30
 #define IGB_MAX_VF_FUNCTIONS               8
@@ -315,6 +315,7 @@
 	u16 rx_ring_count;
 	unsigned int vfs_allocated_count;
 	struct vf_data_storage *vf_data;
+	u32 rss_queues;
 };
 
 #define IGB_FLAG_HAS_MSI           (1 << 0)
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index d72d484..0235220 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -296,10 +296,10 @@
 		 * and continue consuming queues in the same sequence
 		 */
 		if (adapter->vfs_allocated_count) {
-			for (; i < adapter->num_rx_queues; i++)
+			for (; i < adapter->rss_queues; i++)
 				adapter->rx_ring[i].reg_idx = rbase_offset +
 				                              Q_IDX_82576(i);
-			for (; j < adapter->num_tx_queues; j++)
+			for (; j < adapter->rss_queues; j++)
 				adapter->tx_ring[j].reg_idx = rbase_offset +
 				                              Q_IDX_82576(j);
 		}
@@ -618,14 +618,15 @@
 	int numvecs, i;
 
 	/* Number of supported queues. */
-	adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
-	adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
+	adapter->num_rx_queues = adapter->rss_queues;
+	adapter->num_tx_queues = adapter->rss_queues;
 
 	/* start with one vector for every rx queue */
 	numvecs = adapter->num_rx_queues;
 
 	/* if tx handler is seperate add 1 for every tx queue */
-	numvecs += adapter->num_tx_queues;
+	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
+		numvecs += adapter->num_tx_queues;
 
 	/* store the number of vectors reserved for queues */
 	adapter->num_q_vectors = numvecs;
@@ -666,6 +667,7 @@
 	}
 #endif
 	adapter->vfs_allocated_count = 0;
+	adapter->rss_queues = 1;
 	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
 	adapter->num_rx_queues = 1;
 	adapter->num_tx_queues = 1;
@@ -1824,6 +1826,17 @@
 		adapter->vfs_allocated_count = max_vfs;
 
 #endif /* CONFIG_PCI_IOV */
+	adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
+
+	/*
+	 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
+	 * then we should combine the queues into a queue pair in order to
+	 * conserve interrupts due to limited supply
+	 */
+	if ((adapter->rss_queues > 4) ||
+	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
+		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
+
 	/* This call may decrease the number of queues */
 	if (igb_init_interrupt_scheme(adapter)) {
 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
@@ -2015,7 +2028,7 @@
 		}
 	}
 
-	for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
+	for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
 		int r_idx = i % adapter->num_tx_queues;
 		adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
 	}
@@ -2199,7 +2212,7 @@
 		array_wr32(E1000_RSSRK(0), j, rsskey);
 	}
 
-	num_rx_queues = adapter->num_rx_queues;
+	num_rx_queues = adapter->rss_queues;
 
 	if (adapter->vfs_allocated_count) {
 		/* 82575 and 82576 supports 2 RSS queues for VMDq */
@@ -2255,7 +2268,7 @@
 				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
 			wr32(E1000_VT_CTL, vtctl);
 		}
-		if (adapter->num_rx_queues > 1)
+		if (adapter->rss_queues > 1)
 			mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
 		else
 			mrqc = E1000_MRQC_ENABLE_VMDQ;
@@ -2385,7 +2398,7 @@
 	/* clear all bits that might not be set */
 	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
 
-	if (adapter->num_rx_queues > 1 && vfn == adapter->vfs_allocated_count)
+	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
 		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
 	/*
 	 * for VMDq only allow the VFs and pool 0 to accept broadcast and