sfc: Rework IRQ enable/disable

There are many problems with the current efx_stop_interrupts() and
efx_start_interrupts():

1. On Siena, it is unsafe to disable the master IRQ enable bit
(DRV_INT_EN_KER) while any IRQ sources are enabled.

2. On EF10 there is no master IRQ enable bit, so we cannot expect to
defer IRQs without tearing down event queues.  (Though I don't think
we will need to keep any event queues around while the device is down,
as we do for VFDI on Siena.)

3. synchronize_irq() only waits for a running IRQ handler to finish,
not for any propagation through IRQ controllers.  Therefore an IRQ may
still be received and handled after efx_stop_interrupts() returns.
IRQ handlers can then race with channel reallocation.

To fix this:

a. Introduce a software IRQ enable flag.  So long as this is clear,
IRQ handlers will only acknowledge IRQs and not touch the channel
structures.

b. Define a new struct efx_msi_context as the context for MSIs.  This
is never reallocated and is sufficient to find the software enable
flag and the channel structure.  It also includes the channel/IRQ
name, which was previously separated out as it must also not be
reallocated.

c. Split efx_{start,stop}_interrupts() into
efx_{,soft_}_{enable,disable}_interrupts().  The 'soft' functions
don't touch the hardware master enable flag (if it exists) and don't
reinitialise or tear down channels with the keep_eventq flag set.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 2b2dba1..a7818d1 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -191,8 +191,8 @@
  *
  *************************************************************************/
 
-static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq);
-static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq);
+static void efx_soft_enable_interrupts(struct efx_nic *efx);
+static void efx_soft_disable_interrupts(struct efx_nic *efx);
 static void efx_remove_channel(struct efx_channel *channel);
 static void efx_remove_channels(struct efx_nic *efx);
 static const struct efx_channel_type efx_default_channel_type;
@@ -520,8 +520,8 @@
 
 	efx_for_each_channel(channel, efx)
 		channel->type->get_name(channel,
-					efx->channel_name[channel->channel],
-					sizeof(efx->channel_name[0]));
+					efx->msi_context[channel->channel].name,
+					sizeof(efx->msi_context[0].name));
 }
 
 static int efx_probe_channels(struct efx_nic *efx)
@@ -746,7 +746,7 @@
 
 	efx_device_detach_sync(efx);
 	efx_stop_all(efx);
-	efx_stop_interrupts(efx, true);
+	efx_soft_disable_interrupts(efx);
 
 	/* Clone channels (where possible) */
 	memset(other_channel, 0, sizeof(other_channel));
@@ -796,7 +796,7 @@
 		}
 	}
 
-	efx_start_interrupts(efx, true);
+	efx_soft_enable_interrupts(efx);
 	efx_start_all(efx);
 	netif_device_attach(efx->net_dev);
 	return rc;
@@ -1329,8 +1329,50 @@
 	return 0;
 }
 
-/* Enable interrupts, then probe and start the event queues */
-static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq)
+static void efx_soft_enable_interrupts(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	BUG_ON(efx->state == STATE_DISABLED);
+
+	efx->irq_soft_enabled = true;
+	smp_wmb();
+
+	efx_for_each_channel(channel, efx) {
+		if (!channel->type->keep_eventq)
+			efx_init_eventq(channel);
+		efx_start_eventq(channel);
+	}
+
+	efx_mcdi_mode_event(efx);
+}
+
+static void efx_soft_disable_interrupts(struct efx_nic *efx)
+{
+	struct efx_channel *channel;
+
+	if (efx->state == STATE_DISABLED)
+		return;
+
+	efx_mcdi_mode_poll(efx);
+
+	efx->irq_soft_enabled = false;
+	smp_wmb();
+
+	if (efx->legacy_irq)
+		synchronize_irq(efx->legacy_irq);
+
+	efx_for_each_channel(channel, efx) {
+		if (channel->irq)
+			synchronize_irq(channel->irq);
+
+		efx_stop_eventq(channel);
+		if (!channel->type->keep_eventq)
+			efx_fini_eventq(channel);
+	}
+}
+
+static void efx_enable_interrupts(struct efx_nic *efx)
 {
 	struct efx_channel *channel;
 
@@ -1340,42 +1382,29 @@
 		enable_irq(efx->legacy_irq);
 		efx->eeh_disabled_legacy_irq = false;
 	}
-	if (efx->legacy_irq)
-		efx->legacy_irq_enabled = true;
+
 	efx_nic_enable_interrupts(efx);
 
 	efx_for_each_channel(channel, efx) {
-		if (!channel->type->keep_eventq || !may_keep_eventq)
+		if (channel->type->keep_eventq)
 			efx_init_eventq(channel);
-		efx_start_eventq(channel);
 	}
 
-	efx_mcdi_mode_event(efx);
+	efx_soft_enable_interrupts(efx);
 }
 
-static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq)
+static void efx_disable_interrupts(struct efx_nic *efx)
 {
 	struct efx_channel *channel;
 
-	if (efx->state == STATE_DISABLED)
-		return;
-
-	efx_mcdi_mode_poll(efx);
-
-	efx_nic_disable_interrupts(efx);
-	if (efx->legacy_irq) {
-		synchronize_irq(efx->legacy_irq);
-		efx->legacy_irq_enabled = false;
-	}
+	efx_soft_disable_interrupts(efx);
 
 	efx_for_each_channel(channel, efx) {
-		if (channel->irq)
-			synchronize_irq(channel->irq);
-
-		efx_stop_eventq(channel);
-		if (!channel->type->keep_eventq || !may_keep_eventq)
+		if (channel->type->keep_eventq)
 			efx_fini_eventq(channel);
 	}
+
+	efx_nic_disable_interrupts(efx);
 }
 
 static void efx_remove_interrupts(struct efx_nic *efx)
@@ -2160,7 +2189,7 @@
 	EFX_ASSERT_RESET_SERIALISED(efx);
 
 	efx_stop_all(efx);
-	efx_stop_interrupts(efx, false);
+	efx_disable_interrupts(efx);
 
 	mutex_lock(&efx->mac_lock);
 	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE)
@@ -2199,7 +2228,7 @@
 
 	efx->type->reconfigure_mac(efx);
 
-	efx_start_interrupts(efx, false);
+	efx_enable_interrupts(efx);
 	efx_restore_filters(efx);
 	efx_sriov_reset(efx);
 
@@ -2464,6 +2493,8 @@
 		efx->channel[i] = efx_alloc_channel(efx, i, NULL);
 		if (!efx->channel[i])
 			goto fail;
+		efx->msi_context[i].efx = efx;
+		efx->msi_context[i].index = i;
 	}
 
 	EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS);
@@ -2516,7 +2547,7 @@
 	BUG_ON(efx->state == STATE_READY);
 	cancel_work_sync(&efx->reset_work);
 
-	efx_stop_interrupts(efx, false);
+	efx_disable_interrupts(efx);
 	efx_nic_fini_interrupt(efx);
 	efx_fini_port(efx);
 	efx->type->fini(efx);
@@ -2538,7 +2569,7 @@
 	/* Mark the NIC as fini, then stop the interface */
 	rtnl_lock();
 	dev_close(efx->net_dev);
-	efx_stop_interrupts(efx, false);
+	efx_disable_interrupts(efx);
 	rtnl_unlock();
 
 	efx_sriov_fini(efx);
@@ -2640,7 +2671,7 @@
 	rc = efx_nic_init_interrupt(efx);
 	if (rc)
 		goto fail5;
-	efx_start_interrupts(efx, false);
+	efx_enable_interrupts(efx);
 
 	return 0;
 
@@ -2761,7 +2792,7 @@
 		efx_device_detach_sync(efx);
 
 		efx_stop_all(efx);
-		efx_stop_interrupts(efx, false);
+		efx_disable_interrupts(efx);
 	}
 
 	rtnl_unlock();
@@ -2776,7 +2807,7 @@
 	rtnl_lock();
 
 	if (efx->state != STATE_DISABLED) {
-		efx_start_interrupts(efx, false);
+		efx_enable_interrupts(efx);
 
 		mutex_lock(&efx->mac_lock);
 		efx->phy_op->reconfigure(efx);
@@ -2879,7 +2910,7 @@
 		efx_device_detach_sync(efx);
 
 		efx_stop_all(efx);
-		efx_stop_interrupts(efx, false);
+		efx_disable_interrupts(efx);
 
 		status = PCI_ERS_RESULT_NEED_RESET;
 	} else {