i40evf: refactor reset handling

Respond better to a VF reset event. When a reset is signaled by the
PF, or detected by the watchdog task, prevent the watchdog from
processing admin queue requests, and schedule the reset task.

In the reset task, wait first for the reset to start, then for it to
complete, then reinit the driver.

If the reset never appears to complete after a long, long time (>10
seconds is possible depending on what's going on with the PF driver),
then set a flag to indicate that PF communications have failed.

If this flag is set, check for the reset to complete in the watchdog,
and  attempt to do a full reinitialization of the driver from scratch.

With these changes the VF driver correctly handles a PF reset event
while running on bare metal, or in a VM.

Also update copyrights.

Change-ID: I93513efd0b50523a8345e7f6a33a5e4f8a2a5996
Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 5e0a344..a30c4a9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -164,15 +164,14 @@
 /* Driver state. The order of these is important! */
 enum i40evf_state_t {
 	__I40EVF_STARTUP,		/* driver loaded, probe complete */
-	__I40EVF_FAILED,		/* PF communication failed. Fatal. */
 	__I40EVF_REMOVE,		/* driver is being unloaded */
 	__I40EVF_INIT_VERSION_CHECK,	/* aq msg sent, awaiting reply */
 	__I40EVF_INIT_GET_RESOURCES,	/* aq msg sent, awaiting reply */
 	__I40EVF_INIT_SW,		/* got resources, setting up structs */
+	__I40EVF_RESETTING,		/* in reset */
 	/* Below here, watchdog is running */
 	__I40EVF_DOWN,			/* ready, can be opened */
 	__I40EVF_TESTING,		/* in ethtool self-test */
-	__I40EVF_RESETTING,		/* in reset */
 	__I40EVF_RUNNING,		/* opened, working */
 };
 
@@ -214,6 +213,8 @@
 #define I40EVF_FLAG_IMIR_ENABLED                 (u32)(1 << 5)
 #define I40EVF_FLAG_MQ_CAPABLE                   (u32)(1 << 6)
 #define I40EVF_FLAG_NEED_LINK_UPDATE             (u32)(1 << 7)
+#define I40EVF_FLAG_PF_COMMS_FAILED              (u32)(1 << 8)
+#define I40EVF_FLAG_RESET_PENDING                (u32)(1 << 9)
 /* duplcates for common code */
 #define I40E_FLAG_FDIR_ATR_ENABLED		 0
 #define I40E_FLAG_DCB_ENABLED			 0
@@ -231,6 +232,7 @@
 #define I40EVF_FLAG_AQ_CONFIGURE_QUEUES		(u32)(1 << 6)
 #define I40EVF_FLAG_AQ_MAP_VECTORS		(u32)(1 << 7)
 #define I40EVF_FLAG_AQ_HANDLE_RESET		(u32)(1 << 8)
+
 	/* OS defined structs */
 	struct net_device *netdev;
 	struct pci_dev *pdev;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index d271d3a..fe2271e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -964,16 +964,18 @@
 	struct net_device *netdev = adapter->netdev;
 	struct i40evf_mac_filter *f;
 
-	/* remove all MAC filters from the VSI */
+	/* remove all MAC filters */
 	list_for_each_entry(f, &adapter->mac_filter_list, list) {
 		f->remove = true;
 	}
-	adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
-	/* disable receives */
-	adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
-	mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
-	msleep(20);
-
+	if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
+	    adapter->state != __I40EVF_RESETTING) {
+		adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
+		/* disable receives */
+		adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
+		mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
+		msleep(20);
+	}
 	netif_tx_disable(netdev);
 
 	netif_tx_stop_all_queues(netdev);
@@ -1292,19 +1294,47 @@
 					  watchdog_task);
 	struct i40e_hw *hw = &adapter->hw;
 
-	if (adapter->state < __I40EVF_DOWN)
-		goto watchdog_done;
-
 	if (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section))
+		goto restart_watchdog;
+
+	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
+		dev_info(&adapter->pdev->dev, "Checking for redemption\n");
+		if ((rd32(hw, I40E_VFGEN_RSTAT) & 0x3) == I40E_VFR_VFACTIVE) {
+			/* A chance for redemption! */
+			dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n");
+			adapter->state = __I40EVF_STARTUP;
+			adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
+			schedule_delayed_work(&adapter->init_task, 10);
+			clear_bit(__I40EVF_IN_CRITICAL_TASK,
+				  &adapter->crit_section);
+			/* Don't reschedule the watchdog, since we've restarted
+			 * the init task. When init_task contacts the PF and
+			 * gets everything set up again, it'll restart the
+			 * watchdog for us. Down, boy. Sit. Stay. Woof.
+			 */
+			return;
+		}
+		adapter->aq_pending = 0;
+		adapter->aq_required = 0;
+		adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
+		goto watchdog_done;
+	}
+
+	if ((adapter->state < __I40EVF_DOWN) ||
+	    (adapter->flags & I40EVF_FLAG_RESET_PENDING))
 		goto watchdog_done;
 
-	/* check for unannounced reset */
-	if ((adapter->state != __I40EVF_RESETTING) &&
+	/* check for reset */
+	if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING) &&
 	    (rd32(hw, I40E_VFGEN_RSTAT) & 0x3) != I40E_VFR_VFACTIVE) {
 		adapter->state = __I40EVF_RESETTING;
+		adapter->flags |= I40EVF_FLAG_RESET_PENDING;
+		dev_err(&adapter->pdev->dev, "Hardware reset detected.\n");
+		dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
 		schedule_work(&adapter->reset_task);
-		dev_info(&adapter->pdev->dev, "%s: hardware reset detected\n",
-			 __func__);
+		adapter->aq_pending = 0;
+		adapter->aq_required = 0;
+		adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
 		goto watchdog_done;
 	}
 
@@ -1359,13 +1389,15 @@
 
 	i40evf_irq_enable(adapter, true);
 	i40evf_fire_sw_int(adapter, 0xFF);
+
 watchdog_done:
+	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+restart_watchdog:
 	if (adapter->aq_required)
 		mod_timer(&adapter->watchdog_timer,
 			  jiffies + msecs_to_jiffies(20));
 	else
 		mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2));
-	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
 	schedule_work(&adapter->adminq_task);
 }
 
@@ -1412,6 +1444,8 @@
 	i40e_flush(hw);
 }
 
+#define I40EVF_RESET_WAIT_MS 100
+#define I40EVF_RESET_WAIT_COUNT 200
 /**
  * i40evf_reset_task - Call-back task to handle hardware reset
  * @work: pointer to work_struct
@@ -1422,8 +1456,9 @@
  **/
 static void i40evf_reset_task(struct work_struct *work)
 {
-	struct i40evf_adapter *adapter =
-			container_of(work, struct i40evf_adapter, reset_task);
+	struct i40evf_adapter *adapter = container_of(work,
+						      struct i40evf_adapter,
+						      reset_task);
 	struct i40e_hw *hw = &adapter->hw;
 	int i = 0, err;
 	uint32_t rstat_val;
@@ -1431,22 +1466,56 @@
 	while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
 				&adapter->crit_section))
 		udelay(500);
-
-	/* wait until the reset is complete */
-	for (i = 0; i < 20; i++) {
+	/* poll until we see the reset actually happen */
+	for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
 		rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
 			    I40E_VFGEN_RSTAT_VFR_STATE_MASK;
-		if (rstat_val == I40E_VFR_COMPLETED)
+		if (rstat_val != I40E_VFR_VFACTIVE) {
+			dev_info(&adapter->pdev->dev, "Reset now occurring\n");
 			break;
-		else
-			mdelay(100);
+		} else {
+			msleep(I40EVF_RESET_WAIT_MS);
+		}
 	}
-	if (i == 20) {
+	if (i == I40EVF_RESET_WAIT_COUNT) {
+		dev_err(&adapter->pdev->dev, "Reset was not detected\n");
+		adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+		goto continue_reset; /* act like the reset happened */
+	}
+
+	/* wait until the reset is complete and the PF is responding to us */
+	for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
+		rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
+			    I40E_VFGEN_RSTAT_VFR_STATE_MASK;
+		if (rstat_val == I40E_VFR_VFACTIVE) {
+			dev_info(&adapter->pdev->dev, "Reset is complete. Reinitializing.\n");
+			break;
+		} else {
+			msleep(I40EVF_RESET_WAIT_MS);
+		}
+	}
+	if (i == I40EVF_RESET_WAIT_COUNT) {
 		/* reset never finished */
-		dev_info(&adapter->pdev->dev, "%s: reset never finished: %x\n",
-			__func__, rstat_val);
-		/* carry on anyway */
+		dev_err(&adapter->pdev->dev, "Reset never finished (%x). PF driver is dead, and so am I.\n",
+			rstat_val);
+		adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
+
+		if (netif_running(adapter->netdev))
+			i40evf_close(adapter->netdev);
+
+		i40evf_free_misc_irq(adapter);
+		i40evf_reset_interrupt_capability(adapter);
+		i40evf_free_queues(adapter);
+		kfree(adapter->vf_res);
+		i40evf_shutdown_adminq(hw);
+		adapter->netdev->flags &= ~IFF_UP;
+		clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+		return; /* Do not attempt to reinit. It's dead, Jim. */
 	}
+
+continue_reset:
+	adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+
 	i40evf_down(adapter);
 	adapter->state = __I40EVF_RESETTING;
 
@@ -1506,6 +1575,9 @@
 	i40e_status ret;
 	u16 pending;
 
+	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
+		return;
+
 	event.msg_size = I40EVF_MAX_AQ_BUF_SIZE;
 	event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL);
 	if (!event.msg_buf) {
@@ -1637,6 +1709,10 @@
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	int err;
 
+	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
+		dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n");
+		return -EIO;
+	}
 	if (adapter->state != __I40EVF_DOWN)
 		return -EBUSY;
 
@@ -1691,8 +1767,12 @@
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 
+	if (adapter->state <= __I40EVF_DOWN)
+		return 0;
+
 	/* signal that we are down to the interrupt handler */
 	adapter->state = __I40EVF_DOWN;
+
 	set_bit(__I40E_DOWN, &adapter->vsi.state);
 
 	i40evf_down(adapter);
@@ -1843,6 +1923,8 @@
 	switch (adapter->state) {
 	case __I40EVF_STARTUP:
 		/* driver loaded, probe complete */
+		adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
+		adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
 		err = i40e_set_mac_type(hw);
 		if (err) {
 			dev_info(&pdev->dev, "%s: set_mac_type failed: %d\n",
@@ -2006,9 +2088,11 @@
 	adapter->vsi.tx_itr_setting = I40E_ITR_DYNAMIC;
 	adapter->vsi.netdev = adapter->netdev;
 
-	err = register_netdev(netdev);
-	if (err)
-		goto err_register;
+	if (!adapter->netdev_registered) {
+		err = register_netdev(netdev);
+		if (err)
+			goto err_register;
+	}
 
 	adapter->netdev_registered = true;
 
@@ -2032,17 +2116,16 @@
 	i40evf_free_misc_irq(adapter);
 err_sw_init:
 	i40evf_reset_interrupt_capability(adapter);
-	adapter->state = __I40EVF_FAILED;
 err_alloc:
 	kfree(adapter->vf_res);
 	adapter->vf_res = NULL;
 err:
+	if (hw->aq.asq.count)
+		i40evf_shutdown_adminq(hw); /* ignore error */
 	/* Things went into the weeds, so try again later */
 	if (++adapter->aq_wait_count > I40EVF_AQ_MAX_ERR) {
 		dev_err(&pdev->dev, "Failed to communicate with PF; giving up.\n");
-		if (hw->aq.asq.count)
-			i40evf_shutdown_adminq(hw); /* ignore error */
-		adapter->state = __I40EVF_FAILED;
+		adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
 		return; /* do not reschedule */
 	}
 	schedule_delayed_work(&adapter->init_task, HZ * 3);
@@ -2272,6 +2355,7 @@
 	struct i40e_hw *hw = &adapter->hw;
 
 	cancel_delayed_work_sync(&adapter->init_task);
+	cancel_work_sync(&adapter->reset_task);
 
 	if (adapter->netdev_registered) {
 		unregister_netdev(netdev);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index e6978d7..93891a1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1,7 +1,7 @@
 /*******************************************************************************
  *
  * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
- * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2013 - 2014 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -43,6 +43,9 @@
 	struct i40e_hw *hw = &adapter->hw;
 	i40e_status err;
 
+	if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
+		return 0; /* nothing to see here, move along */
+
 	err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL);
 	if (err)
 		dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, error %d, aq status %d\n",
@@ -689,10 +692,12 @@
 			}
 			break;
 		case I40E_VIRTCHNL_EVENT_RESET_IMPENDING:
-			adapter->state = __I40EVF_RESETTING;
-			schedule_work(&adapter->reset_task);
-			dev_info(&adapter->pdev->dev,
-				 "%s: hardware reset pending\n", __func__);
+			dev_info(&adapter->pdev->dev, "PF reset warning received\n");
+			if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
+				adapter->flags |= I40EVF_FLAG_RESET_PENDING;
+				dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
+				schedule_work(&adapter->reset_task);
+			}
 			break;
 		default:
 			dev_err(&adapter->pdev->dev,