[SCSI] lpfc 8.1.2: Add ERROR and WARM_START modes for diagnostic purposes.

Add ERROR and WARM_START modes for diagnostic purposes.

Signed-off-by: Jamie Wellnitz <Jamie.Wellnitz@emulex.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 1f87632..d6ffe26 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,7 +1,7 @@
 /*******************************************************************
  * This file is part of the Emulex Linux Device Driver for         *
  * Fibre Channel Host Bus Adapters.                                *
- * Copyright (C) 2004-2005 Emulex.  All rights reserved.           *
+ * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
  * EMULEX and SLI are trademarks of Emulex.                        *
  * www.emulex.com                                                  *
  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
@@ -513,7 +513,9 @@
 	case MBX_SET_MASK:
 	case MBX_SET_SLIM:
 	case MBX_UNREG_D_ID:
+	case MBX_KILL_BOARD:
 	case MBX_CONFIG_FARP:
+	case MBX_BEACON:
 	case MBX_LOAD_AREA:
 	case MBX_RUN_BIU_DIAG64:
 	case MBX_CONFIG_PORT:
@@ -1512,98 +1514,162 @@
 	return errcnt;
 }
 
-/******************************************************************************
-* lpfc_sli_send_reset
-*
-* Note: After returning from this function, the HBA cannot be accessed for
-* 1 ms. Since we do not wish to delay in interrupt context, it is the
-* responsibility of the caller to perform the mdelay(1) and flush via readl().
-******************************************************************************/
-static int
-lpfc_sli_send_reset(struct lpfc_hba * phba, uint16_t skip_post)
+int
+lpfc_sli_brdready(struct lpfc_hba * phba, uint32_t mask)
 {
-	MAILBOX_t *swpmb;
-	volatile uint32_t word0;
-	void __iomem *to_slim;
-	unsigned long flags = 0;
+	uint32_t status;
+	int i = 0;
+	int retval = 0;
 
-	spin_lock_irqsave(phba->host->host_lock, flags);
+	/* Read the HBA Host Status Register */
+	status = readl(phba->HSregaddr);
 
-	/* A board reset must use REAL SLIM. */
-	phba->sli.sli_flag &= ~LPFC_SLI2_ACTIVE;
+	/*
+	 * Check status register every 100ms for 5 retries, then every
+	 * 500ms for 5, then every 2.5 sec for 5, then reset board and
+	 * every 2.5 sec for 4.
+	 * Break our of the loop if errors occurred during init.
+	 */
+	while (((status & mask) != mask) &&
+	       !(status & HS_FFERM) &&
+	       i++ < 20) {
 
-	word0 = 0;
-	swpmb = (MAILBOX_t *) & word0;
-	swpmb->mbxCommand = MBX_RESTART;
-	swpmb->mbxHc = 1;
+		if (i <= 5)
+			msleep(10);
+		else if (i <= 10)
+			msleep(500);
+		else
+			msleep(2500);
 
-	to_slim = phba->MBslimaddr;
-	writel(*(uint32_t *) swpmb, to_slim);
-	readl(to_slim); /* flush */
-
-	/* Only skip post after fc_ffinit is completed */
-	if (skip_post) {
-		word0 = 1;	/* This is really setting up word1 */
-	} else {
-		word0 = 0;	/* This is really setting up word1 */
+		if (i == 15) {
+			phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */
+			lpfc_sli_brdrestart(phba);
+		}
+		/* Read the HBA Host Status Register */
+		status = readl(phba->HSregaddr);
 	}
-	to_slim = phba->MBslimaddr + sizeof (uint32_t);
-	writel(*(uint32_t *) swpmb, to_slim);
-	readl(to_slim); /* flush */
 
-	/* Turn off parity checking and serr during the physical reset */
-	pci_read_config_word(phba->pcidev, PCI_COMMAND, &phba->pci_cfg_value);
-	pci_write_config_word(phba->pcidev, PCI_COMMAND,
-			      (phba->pci_cfg_value &
-			       ~(PCI_COMMAND_PARITY | PCI_COMMAND_SERR)));
+	/* Check to see if any errors occurred during init */
+	if ((status & HS_FFERM) || (i >= 20)) {
+		phba->hba_state = LPFC_HBA_ERROR;
+		retval = 1;
+	}
 
-	writel(HC_INITFF, phba->HCregaddr);
-
-	phba->hba_state = LPFC_INIT_START;
-	spin_unlock_irqrestore(phba->host->host_lock, flags);
-
-	return 0;
+	return retval;
 }
 
-static int
-lpfc_sli_brdreset(struct lpfc_hba * phba, uint16_t skip_post)
+int
+lpfc_sli_brdkill(struct lpfc_hba * phba)
 {
+	struct lpfc_sli *psli;
+	LPFC_MBOXQ_t *pmb;
+	uint32_t status;
+	uint32_t ha_copy;
+	int retval;
+	int i = 0;
+
+	psli = &phba->sli;
+
+	/* Kill HBA */
+	lpfc_printf_log(phba,
+		KERN_INFO,
+		LOG_SLI,
+		"%d:0329 Kill HBA Data: x%x x%x\n",
+		phba->brd_no,
+		phba->hba_state,
+		psli->sli_flag);
+
+	if ((pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+						  GFP_ATOMIC)) == 0) {
+		return 1;
+	}
+
+	/* Disable the error attention */
+	spin_lock_irq(phba->host->host_lock);
+	status = readl(phba->HCregaddr);
+	status &= ~HC_ERINT_ENA;
+	writel(status, phba->HCregaddr);
+	readl(phba->HCregaddr); /* flush */
+	spin_unlock_irq(phba->host->host_lock);
+
+	lpfc_kill_board(phba, pmb);
+	pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+	retval = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+
+	if (retval != MBX_SUCCESS) {
+		if (retval != MBX_BUSY)
+			mempool_free(pmb, phba->mbox_mem_pool);
+		return 1;
+	}
+
+	mempool_free(pmb, phba->mbox_mem_pool);
+
+	/* There is no completion for a KILL_BOARD mbox cmd. Check for an error
+	 * attention every 100ms for 3 seconds. If we don't get ERATT after
+	 * 3 seconds we still set HBA_ERROR state because the status of the
+	 * board is now undefined.
+	 */
+	ha_copy = readl(phba->HAregaddr);
+
+	while ((i++ < 30) && !(ha_copy & HA_ERATT)) {
+		mdelay(100);
+		ha_copy = readl(phba->HAregaddr);
+	}
+
+	del_timer_sync(&psli->mbox_tmo);
+
+	spin_lock_irq(phba->host->host_lock);
+	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+	spin_unlock_irq(phba->host->host_lock);
+
+	psli->mbox_active = NULL;
+	lpfc_hba_down_post(phba);
+	phba->hba_state = LPFC_HBA_ERROR;
+
+	return (ha_copy & HA_ERATT ? 0 : 1);
+}
+
+int
+lpfc_sli_brdreset(struct lpfc_hba * phba)
+{
+	struct lpfc_sli *psli;
 	struct lpfc_sli_ring *pring;
+	uint16_t cfg_value;
 	int i;
-	struct lpfc_dmabuf *mp, *next_mp;
-	unsigned long flags = 0;
 
-	lpfc_sli_send_reset(phba, skip_post);
-	mdelay(1);
+	psli = &phba->sli;
 
-	spin_lock_irqsave(phba->host->host_lock, flags);
-	/* Risk the write on flush case ie no delay after the readl */
-	readl(phba->HCregaddr); /* flush */
-	/* Now toggle INITFF bit set by lpfc_sli_send_reset */
-	writel(0, phba->HCregaddr);
-	readl(phba->HCregaddr); /* flush */
-
-	/* Restore PCI cmd register */
-	pci_write_config_word(phba->pcidev, PCI_COMMAND, phba->pci_cfg_value);
+	/* Reset HBA */
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"%d:0325 Reset HBA Data: x%x x%x\n", phba->brd_no,
+			phba->hba_state, psli->sli_flag);
 
 	/* perform board reset */
 	phba->fc_eventTag = 0;
 	phba->fc_myDID = 0;
-	phba->fc_prevDID = Mask_DID;
+	phba->fc_prevDID = 0;
 
-	/* Reset HBA */
-	lpfc_printf_log(phba,
-		KERN_INFO,
-		LOG_SLI,
-		"%d:0325 Reset HBA Data: x%x x%x x%x\n",
-		phba->brd_no,
-		phba->hba_state,
-		phba->sli.sli_flag,
-		skip_post);
+	psli->sli_flag = 0;
+
+	/* Turn off parity checking and serr during the physical reset */
+	pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
+	pci_write_config_word(phba->pcidev, PCI_COMMAND,
+			      (cfg_value &
+			       ~(PCI_COMMAND_PARITY | PCI_COMMAND_SERR)));
+
+	/* Now toggle INITFF bit in the Host Control Register */
+	writel(HC_INITFF, phba->HCregaddr);
+	mdelay(1);
+	readl(phba->HCregaddr); /* flush */
+	writel(0, phba->HCregaddr);
+	readl(phba->HCregaddr); /* flush */
+
+	/* Restore PCI cmd register */
+	pci_write_config_word(phba->pcidev, PCI_COMMAND, cfg_value);
 
 	/* Initialize relevant SLI info */
-	for (i = 0; i < phba->sli.num_rings; i++) {
-		pring = &phba->sli.ring[i];
+	for (i = 0; i < psli->num_rings; i++) {
+		pring = &psli->ring[i];
 		pring->flag = 0;
 		pring->rspidx = 0;
 		pring->next_cmdidx  = 0;
@@ -1611,27 +1677,62 @@
 		pring->cmdidx = 0;
 		pring->missbufcnt = 0;
 	}
-	spin_unlock_irqrestore(phba->host->host_lock, flags);
 
-	if (skip_post) {
-		mdelay(100);
+	phba->hba_state = LPFC_WARM_START;
+	return 0;
+}
+
+int
+lpfc_sli_brdrestart(struct lpfc_hba * phba)
+{
+	MAILBOX_t *mb;
+	struct lpfc_sli *psli;
+	uint16_t skip_post;
+	volatile uint32_t word0;
+	void __iomem *to_slim;
+
+	spin_lock_irq(phba->host->host_lock);
+
+	psli = &phba->sli;
+
+	/* Restart HBA */
+	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+			"%d:0328 Restart HBA Data: x%x x%x\n", phba->brd_no,
+			phba->hba_state, psli->sli_flag);
+
+	word0 = 0;
+	mb = (MAILBOX_t *) &word0;
+	mb->mbxCommand = MBX_RESTART;
+	mb->mbxHc = 1;
+
+	to_slim = phba->MBslimaddr;
+	writel(*(uint32_t *) mb, to_slim);
+	readl(to_slim); /* flush */
+
+	/* Only skip post after fc_ffinit is completed */
+	if (phba->hba_state) {
+		skip_post = 1;
+		word0 = 1;	/* This is really setting up word1 */
 	} else {
+		skip_post = 0;
+		word0 = 0;	/* This is really setting up word1 */
+	}
+	to_slim = (uint8_t *) phba->MBslimaddr + sizeof (uint32_t);
+	writel(*(uint32_t *) mb, to_slim);
+	readl(to_slim); /* flush */
+
+	lpfc_sli_brdreset(phba);
+
+	phba->hba_state = LPFC_INIT_START;
+
+	spin_unlock_irq(phba->host->host_lock);
+
+	if (skip_post)
+		mdelay(100);
+	else
 		mdelay(2000);
-	}
 
-	spin_lock_irqsave(phba->host->host_lock, flags);
-	/* Cleanup preposted buffers on the ELS ring */
-	pring = &phba->sli.ring[LPFC_ELS_RING];
-	list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
-		list_del(&mp->list);
-		pring->postbufq_cnt--;
-		lpfc_mbuf_free(phba, mp->virt, mp->phys);
-		kfree(mp);
-	}
-	spin_unlock_irqrestore(phba->host->host_lock, flags);
-
-	for (i = 0; i < phba->sli.num_rings; i++)
-		lpfc_sli_abort_iocb_ring(phba, &phba->sli.ring[i]);
+	lpfc_hba_down_post(phba);
 
 	return 0;
 }
@@ -1691,7 +1792,8 @@
 		}
 
 		if (i == 15) {
-			lpfc_sli_brdreset(phba, 0);
+			phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */
+			lpfc_sli_brdrestart(phba);
 		}
 		/* Read the HBA Host Status Register */
 		status = readl(phba->HSregaddr);
@@ -1735,8 +1837,8 @@
 	}
 
 	while (resetcount < 2 && !done) {
-		phba->hba_state = 0;
-		lpfc_sli_brdreset(phba, 0);
+		phba->hba_state = LPFC_STATE_UNKNOWN;
+		lpfc_sli_brdrestart(phba);
 		msleep(2500);
 		rc = lpfc_sli_chipset_init(phba);
 		if (rc)
@@ -1920,6 +2022,14 @@
 	mb = &pmbox->mb;
 	status = MBX_SUCCESS;
 
+	if (phba->hba_state == LPFC_HBA_ERROR) {
+		spin_unlock_irqrestore(phba->host->host_lock, drvr_flag);
+
+		/* Mbox command <mbxCommand> cannot issue */
+		LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+		return (MBX_NOT_FINISHED);
+	}
+
 	if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) {
 		/* Polling for a mbox command when another one is already active
 		 * is not allowed in SLI. Also, the driver must have established
@@ -2002,7 +2112,8 @@
 
 	/* If we are not polling, we MUST be in SLI2 mode */
 	if (flag != MBX_POLL) {
-		if (!(psli->sli_flag & LPFC_SLI2_ACTIVE)) {
+		if (!(psli->sli_flag & LPFC_SLI2_ACTIVE) &&
+		    (mb->mbxCommand != MBX_KILL_BOARD)) {
 			psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
 			spin_unlock_irqrestore(phba->host->host_lock,
 					       drvr_flag);
@@ -2035,7 +2146,8 @@
 		/* First copy command data to host SLIM area */
 		lpfc_sli_pcimem_bcopy(mb, &phba->slim2p->mbx, MAILBOX_CMD_SIZE);
 	} else {
-		if (mb->mbxCommand == MBX_CONFIG_PORT) {
+		if (mb->mbxCommand == MBX_CONFIG_PORT ||
+		    mb->mbxCommand == MBX_KILL_BOARD) {
 			/* copy command data into host mbox for cmpl */
 			lpfc_sli_pcimem_bcopy(mb, &phba->slim2p->mbx,
 					MAILBOX_CMD_SIZE);
@@ -2086,8 +2198,9 @@
 		ha_copy = readl(phba->HAregaddr);
 
 		/* Wait for command to complete */
-		while (((word0 & OWN_CHIP) == OWN_CHIP)
-		       || !(ha_copy & HA_MBATT)) {
+		while (((word0 & OWN_CHIP) == OWN_CHIP) ||
+		       (!(ha_copy & HA_MBATT) &&
+			(phba->hba_state > LPFC_WARM_START))) {
 			if (i++ >= 100) {
 				psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
 				spin_unlock_irqrestore(phba->host->host_lock,
@@ -2455,15 +2568,6 @@
 
 	spin_unlock_irqrestore(phba->host->host_lock, flags);
 
-	/*
-	 * Provided the hba is not in an error state, reset it.  It is not
-	 * capable of IO anymore.
-	 */
-	if (phba->hba_state != LPFC_HBA_ERROR) {
-		phba->hba_state = LPFC_INIT_START;
-		lpfc_sli_brdreset(phba, 1);
-	}
-
 	return 1;
 }
 
@@ -2976,13 +3080,6 @@
 			/* Clear Chip error bit */
 			writel(HA_ERATT, phba->HAregaddr);
 			readl(phba->HAregaddr); /* flush */
-
-			/*
-			 * Reseting the HBA is the only reliable way
-			 * to shutdown interrupt when there is a
-			 * ERROR.
-			 */
-			lpfc_sli_send_reset(phba, 1);
 		}
 
 		spin_lock(phba->host->host_lock);