i40evf: refactor reset handling
Respond better to a VF reset event. When a reset is signaled by the
PF, or detected by the watchdog task, prevent the watchdog from
processing admin queue requests, and schedule the reset task.
In the reset task, wait first for the reset to start, then for it to
complete, then reinit the driver.
If the reset never appears to complete after a long, long time (>10
seconds is possible depending on what's going on with the PF driver),
then set a flag to indicate that PF communications have failed.
If this flag is set, check for the reset to complete in the watchdog,
and attempt to do a full reinitialization of the driver from scratch.
With these changes the VF driver correctly handles a PF reset event
while running on bare metal, or in a VM.
Also update copyrights.
Change-ID: I93513efd0b50523a8345e7f6a33a5e4f8a2a5996
Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index d271d3a..fe2271e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -964,16 +964,18 @@
struct net_device *netdev = adapter->netdev;
struct i40evf_mac_filter *f;
- /* remove all MAC filters from the VSI */
+ /* remove all MAC filters */
list_for_each_entry(f, &adapter->mac_filter_list, list) {
f->remove = true;
}
- adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
- /* disable receives */
- adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
- mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
- msleep(20);
-
+ if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
+ adapter->state != __I40EVF_RESETTING) {
+ adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
+ /* disable receives */
+ adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
+ mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
+ msleep(20);
+ }
netif_tx_disable(netdev);
netif_tx_stop_all_queues(netdev);
@@ -1292,19 +1294,47 @@
watchdog_task);
struct i40e_hw *hw = &adapter->hw;
- if (adapter->state < __I40EVF_DOWN)
- goto watchdog_done;
-
if (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section))
+ goto restart_watchdog;
+
+ if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
+ dev_info(&adapter->pdev->dev, "Checking for redemption\n");
+ if ((rd32(hw, I40E_VFGEN_RSTAT) & 0x3) == I40E_VFR_VFACTIVE) {
+ /* A chance for redemption! */
+ dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n");
+ adapter->state = __I40EVF_STARTUP;
+ adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
+ schedule_delayed_work(&adapter->init_task, 10);
+ clear_bit(__I40EVF_IN_CRITICAL_TASK,
+ &adapter->crit_section);
+ /* Don't reschedule the watchdog, since we've restarted
+ * the init task. When init_task contacts the PF and
+ * gets everything set up again, it'll restart the
+ * watchdog for us. Down, boy. Sit. Stay. Woof.
+ */
+ return;
+ }
+ adapter->aq_pending = 0;
+ adapter->aq_required = 0;
+ adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+ goto watchdog_done;
+ }
+
+ if ((adapter->state < __I40EVF_DOWN) ||
+ (adapter->flags & I40EVF_FLAG_RESET_PENDING))
goto watchdog_done;
- /* check for unannounced reset */
- if ((adapter->state != __I40EVF_RESETTING) &&
+ /* check for reset */
+ if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING) &&
(rd32(hw, I40E_VFGEN_RSTAT) & 0x3) != I40E_VFR_VFACTIVE) {
adapter->state = __I40EVF_RESETTING;
+ adapter->flags |= I40EVF_FLAG_RESET_PENDING;
+ dev_err(&adapter->pdev->dev, "Hardware reset detected.\n");
+ dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
schedule_work(&adapter->reset_task);
- dev_info(&adapter->pdev->dev, "%s: hardware reset detected\n",
- __func__);
+ adapter->aq_pending = 0;
+ adapter->aq_required = 0;
+ adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
goto watchdog_done;
}
@@ -1359,13 +1389,15 @@
i40evf_irq_enable(adapter, true);
i40evf_fire_sw_int(adapter, 0xFF);
+
watchdog_done:
+ clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+restart_watchdog:
if (adapter->aq_required)
mod_timer(&adapter->watchdog_timer,
jiffies + msecs_to_jiffies(20));
else
mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2));
- clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
schedule_work(&adapter->adminq_task);
}
@@ -1412,6 +1444,8 @@
i40e_flush(hw);
}
+#define I40EVF_RESET_WAIT_MS 100
+#define I40EVF_RESET_WAIT_COUNT 200
/**
* i40evf_reset_task - Call-back task to handle hardware reset
* @work: pointer to work_struct
@@ -1422,8 +1456,9 @@
**/
static void i40evf_reset_task(struct work_struct *work)
{
- struct i40evf_adapter *adapter =
- container_of(work, struct i40evf_adapter, reset_task);
+ struct i40evf_adapter *adapter = container_of(work,
+ struct i40evf_adapter,
+ reset_task);
struct i40e_hw *hw = &adapter->hw;
int i = 0, err;
uint32_t rstat_val;
@@ -1431,22 +1466,56 @@
while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
&adapter->crit_section))
udelay(500);
-
- /* wait until the reset is complete */
- for (i = 0; i < 20; i++) {
+ /* poll until we see the reset actually happen */
+ for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
I40E_VFGEN_RSTAT_VFR_STATE_MASK;
- if (rstat_val == I40E_VFR_COMPLETED)
+ if (rstat_val != I40E_VFR_VFACTIVE) {
+ dev_info(&adapter->pdev->dev, "Reset now occurring\n");
break;
- else
- mdelay(100);
+ } else {
+ msleep(I40EVF_RESET_WAIT_MS);
+ }
}
- if (i == 20) {
+ if (i == I40EVF_RESET_WAIT_COUNT) {
+ dev_err(&adapter->pdev->dev, "Reset was not detected\n");
+ adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+ goto continue_reset; /* act like the reset happened */
+ }
+
+ /* wait until the reset is complete and the PF is responding to us */
+ for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
+ rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
+ I40E_VFGEN_RSTAT_VFR_STATE_MASK;
+ if (rstat_val == I40E_VFR_VFACTIVE) {
+ dev_info(&adapter->pdev->dev, "Reset is complete. Reinitializing.\n");
+ break;
+ } else {
+ msleep(I40EVF_RESET_WAIT_MS);
+ }
+ }
+ if (i == I40EVF_RESET_WAIT_COUNT) {
/* reset never finished */
- dev_info(&adapter->pdev->dev, "%s: reset never finished: %x\n",
- __func__, rstat_val);
- /* carry on anyway */
+ dev_err(&adapter->pdev->dev, "Reset never finished (%x). PF driver is dead, and so am I.\n",
+ rstat_val);
+ adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
+
+ if (netif_running(adapter->netdev))
+ i40evf_close(adapter->netdev);
+
+ i40evf_free_misc_irq(adapter);
+ i40evf_reset_interrupt_capability(adapter);
+ i40evf_free_queues(adapter);
+ kfree(adapter->vf_res);
+ i40evf_shutdown_adminq(hw);
+ adapter->netdev->flags &= ~IFF_UP;
+ clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+ return; /* Do not attempt to reinit. It's dead, Jim. */
}
+
+continue_reset:
+ adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+
i40evf_down(adapter);
adapter->state = __I40EVF_RESETTING;
@@ -1506,6 +1575,9 @@
i40e_status ret;
u16 pending;
+ if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
+ return;
+
event.msg_size = I40EVF_MAX_AQ_BUF_SIZE;
event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL);
if (!event.msg_buf) {
@@ -1637,6 +1709,10 @@
struct i40evf_adapter *adapter = netdev_priv(netdev);
int err;
+ if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
+ dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n");
+ return -EIO;
+ }
if (adapter->state != __I40EVF_DOWN)
return -EBUSY;
@@ -1691,8 +1767,12 @@
{
struct i40evf_adapter *adapter = netdev_priv(netdev);
+ if (adapter->state <= __I40EVF_DOWN)
+ return 0;
+
/* signal that we are down to the interrupt handler */
adapter->state = __I40EVF_DOWN;
+
set_bit(__I40E_DOWN, &adapter->vsi.state);
i40evf_down(adapter);
@@ -1843,6 +1923,8 @@
switch (adapter->state) {
case __I40EVF_STARTUP:
/* driver loaded, probe complete */
+ adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
+ adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
err = i40e_set_mac_type(hw);
if (err) {
dev_info(&pdev->dev, "%s: set_mac_type failed: %d\n",
@@ -2006,9 +2088,11 @@
adapter->vsi.tx_itr_setting = I40E_ITR_DYNAMIC;
adapter->vsi.netdev = adapter->netdev;
- err = register_netdev(netdev);
- if (err)
- goto err_register;
+ if (!adapter->netdev_registered) {
+ err = register_netdev(netdev);
+ if (err)
+ goto err_register;
+ }
adapter->netdev_registered = true;
@@ -2032,17 +2116,16 @@
i40evf_free_misc_irq(adapter);
err_sw_init:
i40evf_reset_interrupt_capability(adapter);
- adapter->state = __I40EVF_FAILED;
err_alloc:
kfree(adapter->vf_res);
adapter->vf_res = NULL;
err:
+ if (hw->aq.asq.count)
+ i40evf_shutdown_adminq(hw); /* ignore error */
/* Things went into the weeds, so try again later */
if (++adapter->aq_wait_count > I40EVF_AQ_MAX_ERR) {
dev_err(&pdev->dev, "Failed to communicate with PF; giving up.\n");
- if (hw->aq.asq.count)
- i40evf_shutdown_adminq(hw); /* ignore error */
- adapter->state = __I40EVF_FAILED;
+ adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
return; /* do not reschedule */
}
schedule_delayed_work(&adapter->init_task, HZ * 3);
@@ -2272,6 +2355,7 @@
struct i40e_hw *hw = &adapter->hw;
cancel_delayed_work_sync(&adapter->init_task);
+ cancel_work_sync(&adapter->reset_task);
if (adapter->netdev_registered) {
unregister_netdev(netdev);