sfc: Add AER and EEH support for Siena
The Linux side of EEH is triggered by MMIO reads, but this
driver's data path does not issue any MMIO reads (except in
legacy interrupt mode). Therefore add a monitor function
to poll EEH periodically.
When preparing to reset the device based on our own error
detection, also poll EEH and defer to its recovery mechanism
if appropriate.
[bwh: Use a separate condition for the initial link poll; fix some
style errors]
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 11a8108..5e1ddc5 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -21,7 +21,9 @@
#include <linux/ethtool.h>
#include <linux/topology.h>
#include <linux/gfp.h>
+#include <linux/pci.h>
#include <linux/cpu_rmap.h>
+#include <linux/aer.h>
#include "net_driver.h"
#include "efx.h"
#include "nic.h"
@@ -71,17 +73,19 @@
const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
const char *const efx_reset_type_names[] = {
- [RESET_TYPE_INVISIBLE] = "INVISIBLE",
- [RESET_TYPE_ALL] = "ALL",
- [RESET_TYPE_WORLD] = "WORLD",
- [RESET_TYPE_DISABLE] = "DISABLE",
- [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
- [RESET_TYPE_INT_ERROR] = "INT_ERROR",
- [RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY",
- [RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH",
- [RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH",
- [RESET_TYPE_TX_SKIP] = "TX_SKIP",
- [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
+ [RESET_TYPE_INVISIBLE] = "INVISIBLE",
+ [RESET_TYPE_ALL] = "ALL",
+ [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
+ [RESET_TYPE_WORLD] = "WORLD",
+ [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
+ [RESET_TYPE_DISABLE] = "DISABLE",
+ [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
+ [RESET_TYPE_INT_ERROR] = "INT_ERROR",
+ [RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY",
+ [RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH",
+ [RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH",
+ [RESET_TYPE_TX_SKIP] = "TX_SKIP",
+ [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
};
#define EFX_MAX_MTU (9 * 1024)
@@ -117,9 +121,12 @@
static int napi_weight = 64;
/* This is the time (in jiffies) between invocations of the hardware
- * monitor. On Falcon-based NICs, this will:
+ * monitor.
+ * On Falcon-based NICs, this will:
* - Check the on-board hardware monitor;
* - Poll the link state and reconfigure the hardware as necessary.
+ * On Siena-based NICs for power systems with EEH support, this will give EEH a
+ * chance to start.
*/
static unsigned int efx_monitor_interval = 1 * HZ;
@@ -203,13 +210,14 @@
#define EFX_ASSERT_RESET_SERIALISED(efx) \
do { \
if ((efx->state == STATE_READY) || \
+ (efx->state == STATE_RECOVERY) || \
(efx->state == STATE_DISABLED)) \
ASSERT_RTNL(); \
} while (0)
static int efx_check_disabled(struct efx_nic *efx)
{
- if (efx->state == STATE_DISABLED) {
+ if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
netif_err(efx, drv, efx->net_dev,
"device is disabled due to earlier errors\n");
return -EIO;
@@ -677,7 +685,7 @@
BUG_ON(efx->port_enabled);
/* Only perform flush if dma is enabled */
- if (dev->is_busmaster) {
+ if (dev->is_busmaster && efx->state != STATE_RECOVERY) {
rc = efx_nic_flush_queues(efx);
if (rc && EFX_WORKAROUND_7803(efx)) {
@@ -1590,13 +1598,15 @@
efx_start_port(efx);
efx_start_datapath(efx);
- /* Start the hardware monitor if there is one. Otherwise (we're link
- * event driven), we have to poll the PHY because after an event queue
- * flush, we could have a missed a link state change */
- if (efx->type->monitor != NULL) {
+ /* Start the hardware monitor if there is one */
+ if (efx->type->monitor != NULL)
queue_delayed_work(efx->workqueue, &efx->monitor_work,
efx_monitor_interval);
- } else {
+
+ /* If link state detection is normally event-driven, we have
+ * to poll now because we could have missed a change
+ */
+ if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
mutex_lock(&efx->mac_lock);
if (efx->phy_op->poll(efx))
efx_link_status_changed(efx);
@@ -2303,7 +2313,9 @@
out:
/* Leave device stopped if necessary */
- disabled = rc || method == RESET_TYPE_DISABLE;
+ disabled = rc ||
+ method == RESET_TYPE_DISABLE ||
+ method == RESET_TYPE_RECOVER_OR_DISABLE;
rc2 = efx_reset_up(efx, method, !disabled);
if (rc2) {
disabled = true;
@@ -2322,13 +2334,48 @@
return rc;
}
+/* Try recovery mechanisms.
+ * For now only EEH is supported.
+ * Returns 0 if the recovery mechanisms are unsuccessful.
+ * Returns a non-zero value otherwise.
+ */
+static int efx_try_recovery(struct efx_nic *efx)
+{
+#ifdef CONFIG_EEH
+ /* A PCI error can occur and not be seen by EEH because nothing
+ * happens on the PCI bus. In this case the driver may fail and
+ * schedule a 'recover or reset', leading to this recovery handler.
+ * Manually call the eeh failure check function.
+ */
+ struct eeh_dev *eehdev =
+ of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev));
+
+ if (eeh_dev_check_failure(eehdev)) {
+ /* The EEH mechanisms will handle the error and reset the
+ * device if necessary.
+ */
+ return 1;
+ }
+#endif
+ return 0;
+}
+
/* The worker thread exists so that code that cannot sleep can
* schedule a reset for later.
*/
static void efx_reset_work(struct work_struct *data)
{
struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
- unsigned long pending = ACCESS_ONCE(efx->reset_pending);
+ unsigned long pending;
+ enum reset_type method;
+
+ pending = ACCESS_ONCE(efx->reset_pending);
+ method = fls(pending) - 1;
+
+ if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
+ method == RESET_TYPE_RECOVER_OR_ALL) &&
+ efx_try_recovery(efx))
+ return;
if (!pending)
return;
@@ -2340,7 +2387,7 @@
* it cannot change again.
*/
if (efx->state == STATE_READY)
- (void)efx_reset(efx, fls(pending) - 1);
+ (void)efx_reset(efx, method);
rtnl_unlock();
}
@@ -2349,11 +2396,20 @@
{
enum reset_type method;
+ if (efx->state == STATE_RECOVERY) {
+ netif_dbg(efx, drv, efx->net_dev,
+ "recovering: skip scheduling %s reset\n",
+ RESET_TYPE(type));
+ return;
+ }
+
switch (type) {
case RESET_TYPE_INVISIBLE:
case RESET_TYPE_ALL:
+ case RESET_TYPE_RECOVER_OR_ALL:
case RESET_TYPE_WORLD:
case RESET_TYPE_DISABLE:
+ case RESET_TYPE_RECOVER_OR_DISABLE:
method = type;
netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
RESET_TYPE(method));
@@ -2563,6 +2619,8 @@
efx_fini_struct(efx);
pci_set_drvdata(pci_dev, NULL);
free_netdev(efx->net_dev);
+
+ pci_disable_pcie_error_reporting(pci_dev);
};
/* NIC VPD information
@@ -2735,6 +2793,11 @@
netif_warn(efx, probe, efx->net_dev,
"failed to create MTDs (%d)\n", rc);
+ rc = pci_enable_pcie_error_reporting(pci_dev);
+ if (rc && rc != -EINVAL)
+ netif_warn(efx, probe, efx->net_dev,
+ "pci_enable_pcie_error_reporting failed (%d)\n", rc);
+
return 0;
fail4:
@@ -2859,12 +2922,112 @@
.restore = efx_pm_resume,
};
+/* A PCI error affecting this device was detected.
+ * At this point MMIO and DMA may be disabled.
+ * Stop the software path and request a slot reset.
+ */
+pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
+ enum pci_channel_state state)
+{
+ pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+ struct efx_nic *efx = pci_get_drvdata(pdev);
+
+ if (state == pci_channel_io_perm_failure)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ rtnl_lock();
+
+ if (efx->state != STATE_DISABLED) {
+ efx->state = STATE_RECOVERY;
+ efx->reset_pending = 0;
+
+ efx_device_detach_sync(efx);
+
+ efx_stop_all(efx);
+ efx_stop_interrupts(efx, false);
+
+ status = PCI_ERS_RESULT_NEED_RESET;
+ } else {
+ /* If the interface is disabled we don't want to do anything
+ * with it.
+ */
+ status = PCI_ERS_RESULT_RECOVERED;
+ }
+
+ rtnl_unlock();
+
+ pci_disable_device(pdev);
+
+ return status;
+}
+
+/* Fake a successfull reset, which will be performed later in efx_io_resume. */
+pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
+{
+ struct efx_nic *efx = pci_get_drvdata(pdev);
+ pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+ int rc;
+
+ if (pci_enable_device(pdev)) {
+ netif_err(efx, hw, efx->net_dev,
+ "Cannot re-enable PCI device after reset.\n");
+ status = PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ rc = pci_cleanup_aer_uncorrect_error_status(pdev);
+ if (rc) {
+ netif_err(efx, hw, efx->net_dev,
+ "pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
+ /* Non-fatal error. Continue. */
+ }
+
+ return status;
+}
+
+/* Perform the actual reset and resume I/O operations. */
+static void efx_io_resume(struct pci_dev *pdev)
+{
+ struct efx_nic *efx = pci_get_drvdata(pdev);
+ int rc;
+
+ rtnl_lock();
+
+ if (efx->state == STATE_DISABLED)
+ goto out;
+
+ rc = efx_reset(efx, RESET_TYPE_ALL);
+ if (rc) {
+ netif_err(efx, hw, efx->net_dev,
+ "efx_reset failed after PCI error (%d)\n", rc);
+ } else {
+ efx->state = STATE_READY;
+ netif_dbg(efx, hw, efx->net_dev,
+ "Done resetting and resuming IO after PCI error.\n");
+ }
+
+out:
+ rtnl_unlock();
+}
+
+/* For simplicity and reliability, we always require a slot reset and try to
+ * reset the hardware when a pci error affecting the device is detected.
+ * We leave both the link_reset and mmio_enabled callback unimplemented:
+ * with our request for slot reset the mmio_enabled callback will never be
+ * called, and the link_reset callback is not used by AER or EEH mechanisms.
+ */
+static struct pci_error_handlers efx_err_handlers = {
+ .error_detected = efx_io_error_detected,
+ .slot_reset = efx_io_slot_reset,
+ .resume = efx_io_resume,
+};
+
static struct pci_driver efx_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = efx_pci_table,
.probe = efx_pci_probe,
.remove = efx_pci_remove,
.driver.pm = &efx_pm_ops,
+ .err_handler = &efx_err_handlers,
};
/**************************************************************************