cxgb4: implement EEH

Implement the pci_error_handlers methods for EEH.

Signed-off-by: Dimitris Michailidis <dm@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/cxgb4/cxgb4.h b/drivers/net/cxgb4/cxgb4.h
index bfa1366..5e37c1e 100644
--- a/drivers/net/cxgb4/cxgb4.h
+++ b/drivers/net/cxgb4/cxgb4.h
@@ -650,6 +650,7 @@
 void t4_intr_clear(struct adapter *adapter);
 int t4_slow_intr_handler(struct adapter *adapter);
 
+int t4_wait_dev_ready(struct adapter *adap);
 int t4_link_start(struct adapter *adap, unsigned int mbox, unsigned int port,
 		  struct link_config *lc);
 int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port);
diff --git a/drivers/net/cxgb4/cxgb4_main.c b/drivers/net/cxgb4/cxgb4_main.c
index 60f6ea05..baf4f0a 100644
--- a/drivers/net/cxgb4/cxgb4_main.c
+++ b/drivers/net/cxgb4/cxgb4_main.c
@@ -2483,6 +2483,7 @@
 	t4_intr_disable(adapter);
 	cancel_work_sync(&adapter->tid_release_task);
 	adapter->tid_release_task_busy = false;
+	adapter->tid_release_head = NULL;
 
 	if (adapter->flags & USING_MSIX) {
 		free_msix_queue_irqs(adapter);
@@ -2907,6 +2908,108 @@
 	return ret;
 }
 
+/* EEH callbacks */
+
+static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
+					 pci_channel_state_t state)
+{
+	int i;
+	struct adapter *adap = pci_get_drvdata(pdev);
+
+	if (!adap)
+		goto out;
+
+	rtnl_lock();
+	adap->flags &= ~FW_OK;
+	notify_ulds(adap, CXGB4_STATE_START_RECOVERY);
+	for_each_port(adap, i) {
+		struct net_device *dev = adap->port[i];
+
+		netif_device_detach(dev);
+		netif_carrier_off(dev);
+	}
+	if (adap->flags & FULL_INIT_DONE)
+		cxgb_down(adap);
+	rtnl_unlock();
+	pci_disable_device(pdev);
+out:	return state == pci_channel_io_perm_failure ?
+		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t eeh_slot_reset(struct pci_dev *pdev)
+{
+	int i, ret;
+	struct fw_caps_config_cmd c;
+	struct adapter *adap = pci_get_drvdata(pdev);
+
+	if (!adap) {
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+		return PCI_ERS_RESULT_RECOVERED;
+	}
+
+	if (pci_enable_device(pdev)) {
+		dev_err(&pdev->dev, "cannot reenable PCI device after reset\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_set_master(pdev);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+	pci_cleanup_aer_uncorrect_error_status(pdev);
+
+	if (t4_wait_dev_ready(adap) < 0)
+		return PCI_ERS_RESULT_DISCONNECT;
+	if (t4_fw_hello(adap, 0, 0, MASTER_MUST, NULL))
+		return PCI_ERS_RESULT_DISCONNECT;
+	adap->flags |= FW_OK;
+	if (adap_init1(adap, &c))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	for_each_port(adap, i) {
+		struct port_info *p = adap2pinfo(adap, i);
+
+		ret = t4_alloc_vi(adap, 0, p->tx_chan, 0, 0, 1, NULL, NULL);
+		if (ret < 0)
+			return PCI_ERS_RESULT_DISCONNECT;
+		p->viid = ret;
+		p->xact_addr_filt = -1;
+	}
+
+	t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd,
+		     adap->params.b_wnd);
+	if (cxgb_up(adap))
+		return PCI_ERS_RESULT_DISCONNECT;
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void eeh_resume(struct pci_dev *pdev)
+{
+	int i;
+	struct adapter *adap = pci_get_drvdata(pdev);
+
+	if (!adap)
+		return;
+
+	rtnl_lock();
+	for_each_port(adap, i) {
+		struct net_device *dev = adap->port[i];
+
+		if (netif_running(dev)) {
+			link_start(dev);
+			cxgb_set_rxmode(dev);
+		}
+		netif_device_attach(dev);
+	}
+	rtnl_unlock();
+}
+
+static struct pci_error_handlers cxgb4_eeh = {
+	.error_detected = eeh_err_detected,
+	.slot_reset     = eeh_slot_reset,
+	.resume         = eeh_resume,
+};
+
 static inline bool is_10g_port(const struct link_config *lc)
 {
 	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
@@ -3154,8 +3257,10 @@
 
 	/* We control everything through PF 0 */
 	func = PCI_FUNC(pdev->devfn);
-	if (func > 0)
+	if (func > 0) {
+		pci_save_state(pdev);        /* to restore SR-IOV later */
 		goto sriov;
+	}
 
 	err = pci_enable_device(pdev);
 	if (err) {
@@ -3396,6 +3501,7 @@
 	.id_table = cxgb4_pci_tbl,
 	.probe    = init_one,
 	.remove   = __devexit_p(remove_one),
+	.err_handler = &cxgb4_eeh,
 };
 
 static int __init cxgb4_init_module(void)
diff --git a/drivers/net/cxgb4/l2t.c b/drivers/net/cxgb4/l2t.c
index 9f96724..5b990d2 100644
--- a/drivers/net/cxgb4/l2t.c
+++ b/drivers/net/cxgb4/l2t.c
@@ -310,6 +310,13 @@
 			neigh_release(e->neigh);
 			e->neigh = NULL;
 		}
+		while (e->arpq_head) {
+			struct sk_buff *skb = e->arpq_head;
+
+			e->arpq_head = skb->next;
+			kfree(skb);
+		}
+		e->arpq_tail = NULL;
 	}
 	spin_unlock_bh(&e->lock);
 
diff --git a/drivers/net/cxgb4/t4_hw.c b/drivers/net/cxgb4/t4_hw.c
index 5c81c55..0c8a84a 100644
--- a/drivers/net/cxgb4/t4_hw.c
+++ b/drivers/net/cxgb4/t4_hw.c
@@ -221,6 +221,13 @@
 	if ((size & 15) || size > MBOX_LEN)
 		return -EINVAL;
 
+	/*
+	 * If the device is off-line, as in EEH, commands will time out.
+	 * Fail them early so we don't waste time waiting.
+	 */
+	if (adap->pdev->error_state != pci_channel_io_normal)
+		return -EIO;
+
 	v = MBOWNER_GET(t4_read_reg(adap, ctl_reg));
 	for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++)
 		v = MBOWNER_GET(t4_read_reg(adap, ctl_reg));
@@ -3045,7 +3052,7 @@
 	}
 }
 
-static int __devinit wait_dev_ready(struct adapter *adap)
+int t4_wait_dev_ready(struct adapter *adap)
 {
 	if (t4_read_reg(adap, PL_WHOAMI) != 0xffffffff)
 		return 0;
@@ -3093,7 +3100,7 @@
 {
 	int ret;
 
-	ret = wait_dev_ready(adapter);
+	ret = t4_wait_dev_ready(adapter);
 	if (ret < 0)
 		return ret;