sfc: Make the dmaq size a run-time setting (rather than compile-time)

- Allow the ring size to be specified in non
   power-of-two sizes (for instance to limit
   the amount of receive buffers).
 - Automatically size the event queue.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index 6a6acc4..1172698 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -28,7 +28,7 @@
  * The tx_queue descriptor ring fill-level must fall below this value
  * before we restart the netif queue
  */
-#define EFX_TXQ_THRESHOLD (EFX_TXQ_MASK / 2u)
+#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u)
 
 /* We need to be able to nest calls to netif_tx_stop_queue(), partly
  * because of the 2 hardware queues associated with each core queue,
@@ -207,7 +207,7 @@
 	}
 
 	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
-	q_space = EFX_TXQ_MASK - 1 - fill_level;
+	q_space = efx->txq_entries - 1 - fill_level;
 
 	/* Map for DMA.  Use pci_map_single rather than pci_map_page
 	 * since this is more efficient on machines with sparse
@@ -244,14 +244,14 @@
 					&tx_queue->read_count;
 				fill_level = (tx_queue->insert_count
 					      - tx_queue->old_read_count);
-				q_space = EFX_TXQ_MASK - 1 - fill_level;
+				q_space = efx->txq_entries - 1 - fill_level;
 				if (unlikely(q_space-- <= 0))
 					goto stop;
 				smp_mb();
 				--tx_queue->stopped;
 			}
 
-			insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
+			insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
 			buffer = &tx_queue->buffer[insert_ptr];
 			efx_tsoh_free(tx_queue, buffer);
 			EFX_BUG_ON_PARANOID(buffer->tsoh);
@@ -320,7 +320,7 @@
 	/* Work backwards until we hit the original insert pointer value */
 	while (tx_queue->insert_count != tx_queue->write_count) {
 		--tx_queue->insert_count;
-		insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
+		insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
 		buffer = &tx_queue->buffer[insert_ptr];
 		efx_dequeue_buffer(tx_queue, buffer);
 		buffer->len = 0;
@@ -350,8 +350,8 @@
 	struct efx_nic *efx = tx_queue->efx;
 	unsigned int stop_index, read_ptr;
 
-	stop_index = (index + 1) & EFX_TXQ_MASK;
-	read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
+	stop_index = (index + 1) & tx_queue->ptr_mask;
+	read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
 
 	while (read_ptr != stop_index) {
 		struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
@@ -368,7 +368,7 @@
 		buffer->len = 0;
 
 		++tx_queue->read_count;
-		read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
+		read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
 	}
 }
 
@@ -402,7 +402,7 @@
 	unsigned fill_level;
 	struct efx_nic *efx = tx_queue->efx;
 
-	EFX_BUG_ON_PARANOID(index > EFX_TXQ_MASK);
+	EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
 
 	efx_dequeue_buffers(tx_queue, index);
 
@@ -412,7 +412,7 @@
 	smp_mb();
 	if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) {
 		fill_level = tx_queue->insert_count - tx_queue->read_count;
-		if (fill_level < EFX_TXQ_THRESHOLD) {
+		if (fill_level < EFX_TXQ_THRESHOLD(efx)) {
 			EFX_BUG_ON_PARANOID(!efx_dev_registered(efx));
 
 			/* Do this under netif_tx_lock(), to avoid racing
@@ -430,18 +430,24 @@
 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 {
 	struct efx_nic *efx = tx_queue->efx;
-	unsigned int txq_size;
+	unsigned int entries;
 	int i, rc;
 
-	netif_dbg(efx, probe, efx->net_dev, "creating TX queue %d\n",
-		  tx_queue->queue);
+	/* Create the smallest power-of-two aligned ring */
+	entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
+	EFX_BUG_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+	tx_queue->ptr_mask = entries - 1;
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "creating TX queue %d size %#x mask %#x\n",
+		  tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
 
 	/* Allocate software ring */
-	txq_size = EFX_TXQ_SIZE * sizeof(*tx_queue->buffer);
-	tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
+	tx_queue->buffer = kzalloc(entries * sizeof(*tx_queue->buffer),
+				   GFP_KERNEL);
 	if (!tx_queue->buffer)
 		return -ENOMEM;
-	for (i = 0; i <= EFX_TXQ_MASK; ++i)
+	for (i = 0; i <= tx_queue->ptr_mask; ++i)
 		tx_queue->buffer[i].continuation = true;
 
 	/* Allocate hardware ring */
@@ -481,7 +487,7 @@
 
 	/* Free any buffers left in the ring */
 	while (tx_queue->read_count != tx_queue->write_count) {
-		buffer = &tx_queue->buffer[tx_queue->read_count & EFX_TXQ_MASK];
+		buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
 		efx_dequeue_buffer(tx_queue, buffer);
 		buffer->continuation = true;
 		buffer->len = 0;
@@ -741,7 +747,7 @@
 
 	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
 	/* -1 as there is no way to represent all descriptors used */
-	q_space = EFX_TXQ_MASK - 1 - fill_level;
+	q_space = efx->txq_entries - 1 - fill_level;
 
 	while (1) {
 		if (unlikely(q_space-- <= 0)) {
@@ -757,7 +763,7 @@
 				*(volatile unsigned *)&tx_queue->read_count;
 			fill_level = (tx_queue->insert_count
 				      - tx_queue->old_read_count);
-			q_space = EFX_TXQ_MASK - 1 - fill_level;
+			q_space = efx->txq_entries - 1 - fill_level;
 			if (unlikely(q_space-- <= 0)) {
 				*final_buffer = NULL;
 				return 1;
@@ -766,13 +772,13 @@
 			--tx_queue->stopped;
 		}
 
-		insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
+		insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
 		buffer = &tx_queue->buffer[insert_ptr];
 		++tx_queue->insert_count;
 
 		EFX_BUG_ON_PARANOID(tx_queue->insert_count -
-				    tx_queue->read_count >
-				    EFX_TXQ_MASK);
+				    tx_queue->read_count >=
+				    efx->txq_entries);
 
 		efx_tsoh_free(tx_queue, buffer);
 		EFX_BUG_ON_PARANOID(buffer->len);
@@ -813,7 +819,7 @@
 {
 	struct efx_tx_buffer *buffer;
 
-	buffer = &tx_queue->buffer[tx_queue->insert_count & EFX_TXQ_MASK];
+	buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
 	efx_tsoh_free(tx_queue, buffer);
 	EFX_BUG_ON_PARANOID(buffer->len);
 	EFX_BUG_ON_PARANOID(buffer->unmap_len);
@@ -838,7 +844,7 @@
 	while (tx_queue->insert_count != tx_queue->write_count) {
 		--tx_queue->insert_count;
 		buffer = &tx_queue->buffer[tx_queue->insert_count &
-					   EFX_TXQ_MASK];
+					   tx_queue->ptr_mask];
 		efx_tsoh_free(tx_queue, buffer);
 		EFX_BUG_ON_PARANOID(buffer->skb);
 		if (buffer->unmap_len) {
@@ -1168,7 +1174,7 @@
 	unsigned i;
 
 	if (tx_queue->buffer) {
-		for (i = 0; i <= EFX_TXQ_MASK; ++i)
+		for (i = 0; i <= tx_queue->ptr_mask; ++i)
 			efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
 	}