sfc: Create multiple TX queues

Create a core TX queue and 2 hardware TX queues for each channel.
If separate_tx_channels is set, create equal numbers of RX and TX
channels instead.

Rewrite the channel and queue iteration macros accordingly.
Eliminate efx_channel::used_flags as redundant.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h
index d68331c..2e6fd89 100644
--- a/drivers/net/sfc/net_driver.h
+++ b/drivers/net/sfc/net_driver.h
@@ -85,9 +85,13 @@
 #define EFX_MAX_CHANNELS 32
 #define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS
 
-#define EFX_TX_QUEUE_OFFLOAD_CSUM	0
-#define EFX_TX_QUEUE_NO_CSUM		1
-#define EFX_TX_QUEUE_COUNT		2
+/* Checksum generation is a per-queue option in hardware, so each
+ * queue visible to the networking core is backed by two hardware TX
+ * queues. */
+#define EFX_MAX_CORE_TX_QUEUES	EFX_MAX_CHANNELS
+#define EFX_TXQ_TYPE_OFFLOAD	1
+#define EFX_TXQ_TYPES		2
+#define EFX_MAX_TX_QUEUES	(EFX_TXQ_TYPES * EFX_MAX_CORE_TX_QUEUES)
 
 /**
  * struct efx_special_buffer - An Efx special buffer
@@ -187,7 +191,7 @@
 struct efx_tx_queue {
 	/* Members which don't change on the fast path */
 	struct efx_nic *efx ____cacheline_aligned_in_smp;
-	int queue;
+	unsigned queue;
 	struct efx_channel *channel;
 	struct efx_nic *nic;
 	struct efx_tx_buffer *buffer;
@@ -306,11 +310,6 @@
 };
 
 
-/* Flags for channel->used_flags */
-#define EFX_USED_BY_RX 1
-#define EFX_USED_BY_TX 2
-#define EFX_USED_BY_RX_TX (EFX_USED_BY_RX | EFX_USED_BY_TX)
-
 enum efx_rx_alloc_method {
 	RX_ALLOC_METHOD_AUTO = 0,
 	RX_ALLOC_METHOD_SKB = 1,
@@ -327,7 +326,6 @@
  * @efx: Associated Efx NIC
  * @channel: Channel instance number
  * @name: Name for channel and IRQ
- * @used_flags: Channel is used by net driver
  * @enabled: Channel enabled indicator
  * @irq: IRQ number (MSI and MSI-X only)
  * @irq_moderation: IRQ moderation value (in hardware ticks)
@@ -352,12 +350,14 @@
  * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors
  * @n_rx_overlength: Count of RX_OVERLENGTH errors
  * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
+ * @tx_queue: Pointer to first TX queue, or %NULL if not used for TX
+ * @tx_stop_count: Core TX queue stop count
+ * @tx_stop_lock: Core TX queue stop lock
  */
 struct efx_channel {
 	struct efx_nic *efx;
 	int channel;
 	char name[IFNAMSIZ + 6];
-	int used_flags;
 	bool enabled;
 	int irq;
 	unsigned int irq_moderation;
@@ -389,6 +389,9 @@
 	struct efx_rx_buffer *rx_pkt;
 	bool rx_pkt_csummed;
 
+	struct efx_tx_queue *tx_queue;
+	atomic_t tx_stop_count;
+	spinlock_t tx_stop_lock;
 };
 
 enum efx_led_mode {
@@ -661,8 +664,9 @@
  * @rx_queue: RX DMA queues
  * @channel: Channels
  * @next_buffer_table: First available buffer table id
- * @n_rx_queues: Number of RX queues
  * @n_channels: Number of channels in use
+ * @n_rx_channels: Number of channels used for RX (= number of RX queues)
+ * @n_tx_channels: Number of channels used for TX
  * @rx_buffer_len: RX buffer length
  * @rx_buffer_order: Order (log2) of number of pages for each RX buffer
  * @int_error_count: Number of internal errors seen recently
@@ -693,8 +697,6 @@
  * @port_initialized: Port initialized?
  * @net_dev: Operating system network device. Consider holding the rtnl lock
  * @rx_checksum_enabled: RX checksumming enabled
- * @netif_stop_count: Port stop count
- * @netif_stop_lock: Port stop lock
  * @mac_stats: MAC statistics. These include all statistics the MACs
  *	can provide.  Generic code converts these into a standard
  *	&struct net_device_stats.
@@ -742,13 +744,14 @@
 	enum nic_state state;
 	enum reset_type reset_pending;
 
-	struct efx_tx_queue tx_queue[EFX_TX_QUEUE_COUNT];
+	struct efx_tx_queue tx_queue[EFX_MAX_TX_QUEUES];
 	struct efx_rx_queue rx_queue[EFX_MAX_RX_QUEUES];
 	struct efx_channel channel[EFX_MAX_CHANNELS];
 
 	unsigned next_buffer_table;
-	int n_rx_queues;
-	int n_channels;
+	unsigned n_channels;
+	unsigned n_rx_channels;
+	unsigned n_tx_channels;
 	unsigned int rx_buffer_len;
 	unsigned int rx_buffer_order;
 
@@ -780,9 +783,6 @@
 	struct net_device *net_dev;
 	bool rx_checksum_enabled;
 
-	atomic_t netif_stop_count;
-	spinlock_t netif_stop_lock;
-
 	struct efx_mac_stats mac_stats;
 	struct efx_buffer stats_buffer;
 	spinlock_t stats_lock;
@@ -928,31 +928,26 @@
 /* Iterate over all used channels */
 #define efx_for_each_channel(_channel, _efx)				\
 	for (_channel = &((_efx)->channel[0]);				\
-	     _channel < &((_efx)->channel[EFX_MAX_CHANNELS]);		\
-	     _channel++)						\
-		if (!_channel->used_flags)				\
-			continue;					\
-		else
+	     _channel < &((_efx)->channel[(efx)->n_channels]);		\
+	     _channel++)
 
 /* Iterate over all used TX queues */
 #define efx_for_each_tx_queue(_tx_queue, _efx)				\
 	for (_tx_queue = &((_efx)->tx_queue[0]);			\
-	     _tx_queue < &((_efx)->tx_queue[EFX_TX_QUEUE_COUNT]);	\
+	     _tx_queue < &((_efx)->tx_queue[EFX_TXQ_TYPES *		\
+					    (_efx)->n_tx_channels]);	\
 	     _tx_queue++)
 
 /* Iterate over all TX queues belonging to a channel */
 #define efx_for_each_channel_tx_queue(_tx_queue, _channel)		\
-	for (_tx_queue = &((_channel)->efx->tx_queue[0]);		\
-	     _tx_queue < &((_channel)->efx->tx_queue[EFX_TX_QUEUE_COUNT]); \
-	     _tx_queue++)						\
-		if (_tx_queue->channel != (_channel))			\
-			continue;					\
-		else
+	for (_tx_queue = (_channel)->tx_queue;				\
+	     _tx_queue && _tx_queue < (_channel)->tx_queue + EFX_TXQ_TYPES; \
+	     _tx_queue++)
 
 /* Iterate over all used RX queues */
 #define efx_for_each_rx_queue(_rx_queue, _efx)				\
 	for (_rx_queue = &((_efx)->rx_queue[0]);			\
-	     _rx_queue < &((_efx)->rx_queue[(_efx)->n_rx_queues]);	\
+	     _rx_queue < &((_efx)->rx_queue[(_efx)->n_rx_channels]);	\
 	     _rx_queue++)
 
 /* Iterate over all RX queues belonging to a channel */