[SCSI] lpfc: bug fixes

 Following the NPIV support, the following changes have been accumulated
 in the testing and qualification of the driver:

 - Fix affinity of ELS ring to slow/deferred event processing
 - Fix Ring attention masks
 - Defer dev_loss_tmo timeout handling to worker thread
 - Consolidate link down error classification for better error checking
 - Remove unused/deprecated nlp_initiator_tmr timer
 - Fix for async scan - move adapter init code back into pci_probe_one
   context. Fix async scan interfaces.
 - Expand validation of ability to create vports
 - Extract VPI resource cnt from firmware
 - Tuning of Login/Reject policies to better deal with overwhelmned targets
 - Misc ELS and discovery fixes
 - Export the npiv_enable attribute to sysfs
 - Mailbox handling fix
 - Add debugfs support
 - A few other small misc fixes:
    - wrong return values, double-frees, bad locking
 - Added adapter failure heartbeat

Signed-off-by: James Smart <James.Smart@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4dd0f1a..350522c 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -391,6 +391,9 @@
 	 */
 	timeout = phba->fc_ratov << 1;
 	mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout);
+	mod_timer(&phba->hb_tmofunc, jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+	phba->hb_outstanding = 0;
+	phba->last_completion_time = jiffies;
 
 	lpfc_init_link(phba, pmb, phba->cfg_topology, phba->cfg_link_speed);
 	pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
@@ -485,6 +488,119 @@
 	return 0;
 }
 
+/* HBA heart beat timeout handler */
+void
+lpfc_hb_timeout(unsigned long ptr)
+{
+	struct lpfc_hba *phba;
+	unsigned long iflag;
+
+	phba = (struct lpfc_hba *)ptr;
+	spin_lock_irqsave(&phba->pport->work_port_lock, iflag);
+	if (!(phba->pport->work_port_events & WORKER_HB_TMO))
+		phba->pport->work_port_events |= WORKER_HB_TMO;
+	spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag);
+
+	if (phba->work_wait)
+		wake_up(phba->work_wait);
+	return;
+}
+
+static void
+lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
+{
+	unsigned long drvr_flag;
+
+	spin_lock_irqsave(&phba->hbalock, drvr_flag);
+	phba->hb_outstanding = 0;
+	spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+
+	mempool_free(pmboxq, phba->mbox_mem_pool);
+	if (!(phba->pport->fc_flag & FC_OFFLINE_MODE) &&
+		!(phba->link_state == LPFC_HBA_ERROR) &&
+		!(phba->pport->fc_flag & FC_UNLOADING))
+		mod_timer(&phba->hb_tmofunc,
+			jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+	return;
+}
+
+void
+lpfc_hb_timeout_handler(struct lpfc_hba *phba)
+{
+	LPFC_MBOXQ_t *pmboxq;
+	int retval;
+	struct lpfc_sli *psli = &phba->sli;
+
+	if ((phba->link_state == LPFC_HBA_ERROR) ||
+		(phba->pport->fc_flag & FC_UNLOADING) ||
+		(phba->pport->fc_flag & FC_OFFLINE_MODE))
+		return;
+
+	spin_lock_irq(&phba->pport->work_port_lock);
+	/* If the timer is already canceled do nothing */
+	if (!(phba->pport->work_port_events & WORKER_HB_TMO)) {
+		spin_unlock_irq(&phba->pport->work_port_lock);
+		return;
+	}
+
+	if (time_after(phba->last_completion_time + LPFC_HB_MBOX_INTERVAL * HZ,
+		jiffies)) {
+		spin_unlock_irq(&phba->pport->work_port_lock);
+		if (!phba->hb_outstanding)
+			mod_timer(&phba->hb_tmofunc,
+				jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+		else
+			mod_timer(&phba->hb_tmofunc,
+				jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
+		return;
+	}
+	spin_unlock_irq(&phba->pport->work_port_lock);
+
+	/* If there is no heart beat outstanding, issue a heartbeat command */
+	if (!phba->hb_outstanding) {
+		pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL);
+		if (!pmboxq) {
+			mod_timer(&phba->hb_tmofunc,
+				jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+			return;
+		}
+
+		lpfc_heart_beat(phba, pmboxq);
+		pmboxq->mbox_cmpl = lpfc_hb_mbox_cmpl;
+		pmboxq->vport = phba->pport;
+		retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT);
+
+		if (retval != MBX_BUSY && retval != MBX_SUCCESS) {
+			mempool_free(pmboxq, phba->mbox_mem_pool);
+			mod_timer(&phba->hb_tmofunc,
+				jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
+			return;
+		}
+		mod_timer(&phba->hb_tmofunc,
+			jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
+		phba->hb_outstanding = 1;
+		return;
+	} else {
+		/*
+		 * If heart beat timeout called with hb_outstanding set we
+		 * need to take the HBA offline.
+		 */
+		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+			"%d:0459 Adapter heartbeat failure, taking "
+			"this port offline.\n", phba->brd_no);
+
+		spin_lock_irq(&phba->hbalock);
+		psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+		spin_unlock_irq(&phba->hbalock);
+
+		lpfc_offline_prep(phba);
+		lpfc_offline(phba);
+		lpfc_unblock_mgmt_io(phba);
+		phba->link_state = LPFC_HBA_ERROR;
+		lpfc_hba_down_post(phba);
+	}
+}
+
 /************************************************************************/
 /*                                                                      */
 /*    lpfc_handle_eratt                                                 */
@@ -1190,9 +1306,6 @@
 	lpfc_can_disctmo(vport);
 	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp)
 		lpfc_nlp_put(ndlp);
-
-	INIT_LIST_HEAD(&vport->fc_nodes);
-
 	return;
 }
 
@@ -1238,6 +1351,8 @@
 		lpfc_stop_vport_timers(vport);
 	del_timer_sync(&phba->sli.mbox_tmo);
 	del_timer_sync(&phba->fabric_block_timer);
+	phba->hb_outstanding = 0;
+	del_timer_sync(&phba->hb_tmofunc);
 	return;
 }
 
@@ -1474,8 +1589,8 @@
 	struct lpfc_hba  *phba = vport->phba;
 
 	kfree(vport->vname);
-	lpfc_free_sysfs_attr(vport);
 
+	lpfc_debugfs_terminate(vport);
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
 
@@ -1500,50 +1615,29 @@
 	return instance;
 }
 
-static void
-lpfc_remove_device(struct lpfc_vport *vport)
-{
-	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
-
-	lpfc_free_sysfs_attr(vport);
-
-	spin_lock_irq(shost->host_lock);
-	vport->fc_flag |= FC_UNLOADING;
-	spin_unlock_irq(shost->host_lock);
-
-	fc_remove_host(shost);
-	scsi_remove_host(shost);
-}
-
-void lpfc_scan_start(struct Scsi_Host *shost)
-{
-	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
-	struct lpfc_hba   *phba = vport->phba;
-
-	if (lpfc_sli_hba_setup(phba))
-		goto error;
-
-	/*
-	 * hba setup may have changed the hba_queue_depth so we need to adjust
-	 * the value of can_queue.
-	 */
-	shost->can_queue = phba->cfg_hba_queue_depth - 10;
-	return;
-
-error:
-	lpfc_remove_device(vport);
-}
+/*
+ * Note: there is no scan_start function as adapter initialization
+ * will have asynchronously kicked off the link initialization.
+ */
 
 int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
 {
 	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
+	int stat = 0;
 
+	spin_lock_irq(shost->host_lock);
+
+	if (vport->fc_flag & FC_UNLOADING) {
+		stat = 1;
+		goto finished;
+	}
 	if (time >= 30 * HZ) {
 		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
 				"%d:0461 Scanning longer than 30 "
 				"seconds.  Continuing initialization\n",
 				phba->brd_no);
+		stat = 1;
 		goto finished;
 	}
 	if (time >= 15 * HZ && phba->link_state <= LPFC_LINK_DOWN) {
@@ -1551,21 +1645,24 @@
 				"%d:0465 Link down longer than 15 "
 				"seconds.  Continuing initialization\n",
 				phba->brd_no);
+		stat = 1;
 		goto finished;
 	}
 
 	if (vport->port_state != LPFC_VPORT_READY)
-		return 0;
+		goto finished;
 	if (vport->num_disc_nodes || vport->fc_prli_sent)
-		return 0;
+		goto finished;
 	if (vport->fc_map_cnt == 0 && time < 2 * HZ)
-		return 0;
+		goto finished;
 	if ((phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) != 0)
-		return 0;
+		goto finished;
+
+	stat = 1;
 
 finished:
-	lpfc_host_attrib_init(shost);
-	return 1;
+	spin_unlock_irq(shost->host_lock);
+	return stat;
 }
 
 void lpfc_host_attrib_init(struct Scsi_Host *shost)
@@ -1656,7 +1753,12 @@
 	/* Initialize timers used by driver */
 	init_timer(&phba->fc_estabtmo);
 	phba->fc_estabtmo.function = lpfc_establish_link_tmo;
-	phba->fc_estabtmo.data = (unsigned long) phba;
+	phba->fc_estabtmo.data = (unsigned long)phba;
+
+	init_timer(&phba->hb_tmofunc);
+	phba->hb_tmofunc.function = lpfc_hb_timeout;
+	phba->hb_tmofunc.data = (unsigned long)phba;
+
 	psli = &phba->sli;
 	init_timer(&psli->mbox_tmo);
 	psli->mbox_tmo.function = lpfc_mbox_timeout;
@@ -1791,6 +1893,7 @@
 
 	shost = lpfc_shost_from_vport(vport);
 	phba->pport = vport;
+	lpfc_debugfs_initialize(vport);
 
 	pci_set_drvdata(pdev, shost);
 
@@ -1820,15 +1923,32 @@
 	if (lpfc_alloc_sysfs_attr(vport))
 		goto out_free_irq;
 
-	scsi_scan_host(shost);
+	if (lpfc_sli_hba_setup(phba))
+		goto out_remove_device;
+
+	/*
+	 * hba setup may have changed the hba_queue_depth so we need to adjust
+	 * the value of can_queue.
+	 */
+	shost->can_queue = phba->cfg_hba_queue_depth - 10;
+
+	lpfc_host_attrib_init(shost);
+
 	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
 		spin_lock_irq(shost->host_lock);
 		lpfc_poll_start_timer(phba);
 		spin_unlock_irq(shost->host_lock);
 	}
 
+	scsi_scan_host(shost);
+
 	return 0;
 
+out_remove_device:
+	lpfc_free_sysfs_attr(vport);
+	spin_lock_irq(shost->host_lock);
+	vport->fc_flag |= FC_UNLOADING;
+	spin_unlock_irq(shost->host_lock);
 out_free_irq:
 	lpfc_stop_phba_timers(phba);
 	phba->pport->work_port_events = 0;
@@ -1865,6 +1985,8 @@
 	pci_disable_device(pdev);
 out:
 	pci_set_drvdata(pdev, NULL);
+	if (shost)
+		scsi_host_put(shost);
 	return error;
 }
 
@@ -1878,6 +2000,12 @@
 	list_for_each_entry(port_iterator, &phba->port_list, listentry)
 		port_iterator->load_flag |= FC_UNLOADING;
 
+	kfree(vport->vname);
+	lpfc_free_sysfs_attr(vport);
+
+	fc_remove_host(shost);
+	scsi_remove_host(shost);
+
 	/*
 	 * Bring down the SLI Layer. This step disable all interrupts,
 	 * clears the rings, discards all mailbox commands, and resets
@@ -1887,6 +2015,13 @@
 	lpfc_sli_brdrestart(phba);
 
 	lpfc_stop_phba_timers(phba);
+	spin_lock_irq(&phba->hbalock);
+	list_del_init(&vport->listentry);
+	spin_unlock_irq(&phba->hbalock);
+
+
+	lpfc_debugfs_terminate(vport);
+	lpfc_cleanup(vport);
 
 	kthread_stop(phba->worker_thread);
 
@@ -1894,9 +2029,8 @@
 	free_irq(phba->pcidev->irq, phba);
 	pci_disable_msi(phba->pcidev);
 
-	destroy_port(vport);
-
 	pci_set_drvdata(pdev, NULL);
+	scsi_host_put(shost);
 
 	/*
 	 * Call scsi_free before mem_free since scsi bufs are released to their