[SCSI] lpfc 8.3.39: Fix driver issues with large s/g lists for BlockGuard

Signed-off-by: James Smart <james.smart@emulex.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 1f2f1f4..924ceaa 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -46,13 +46,15 @@
 #define LPFC_DEFAULT_MENLO_SG_SEG_CNT 128	/* sg element count per scsi
 		cmnd for menlo needs nearly twice as for firmware
 		downloads using bsg */
-#define LPFC_DEFAULT_PROT_SG_SEG_CNT 4096 /* sg protection elements count */
+
+#define LPFC_MIN_SG_SLI4_BUF_SZ	0x800	/* based on LPFC_DEFAULT_SG_SEG_CNT */
+#define LPFC_MAX_SG_SLI4_SEG_CNT_DIF 128 /* sg element count per scsi cmnd */
+#define LPFC_MAX_SG_SEG_CNT_DIF 512	/* sg element count per scsi cmnd  */
 #define LPFC_MAX_SG_SEG_CNT	4096	/* sg element count per scsi cmnd */
 #define LPFC_MAX_SGL_SEG_CNT	512	/* SGL element count per scsi cmnd */
 #define LPFC_MAX_BPL_SEG_CNT	4096	/* BPL element count per scsi cmnd */
 
 #define LPFC_MAX_SGE_SIZE       0x80000000 /* Maximum data allowed in a SGE */
-#define LPFC_MAX_PROT_SG_SEG_CNT 4096	/* prot sg element count per scsi cmd*/
 #define LPFC_IOCB_LIST_CNT	2250	/* list of IOCBs for fast-path usage. */
 #define LPFC_Q_RAMP_UP_INTERVAL 120     /* lun q_depth ramp up interval */
 #define LPFC_VNAME_LEN		100	/* vport symbolic name length */
@@ -710,6 +712,7 @@
 	uint32_t cfg_fcp_wq_count;
 	uint32_t cfg_fcp_eq_count;
 	uint32_t cfg_fcp_io_channel;
+	uint32_t cfg_total_seg_cnt;
 	uint32_t cfg_sg_seg_cnt;
 	uint32_t cfg_prot_sg_seg_cnt;
 	uint32_t cfg_sg_dma_buf_size;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index ad5b0f9..3b10aa5 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -4073,16 +4073,23 @@
 
 /*
  * lpfc_sg_seg_cnt - Initial Maximum DMA Segment Count
- * This value can be set to values between 64 and 256. The default value is
+ * This value can be set to values between 64 and 4096. The default value is
  * 64, but may be increased to allow for larger Max I/O sizes. The scsi layer
  * will be allowed to request I/Os of sizes up to (MAX_SEG_COUNT * SEG_SIZE).
+ * Because of the additional overhead involved in setting up T10-DIF,
+ * this parameter will be limited to 128 if BlockGuard is enabled under SLI4
+ * and will be limited to 512 if BlockGuard is enabled under SLI3.
  */
 LPFC_ATTR_R(sg_seg_cnt, LPFC_DEFAULT_SG_SEG_CNT, LPFC_DEFAULT_SG_SEG_CNT,
 	    LPFC_MAX_SG_SEG_CNT, "Max Scatter Gather Segment Count");
 
-LPFC_ATTR_R(prot_sg_seg_cnt, LPFC_DEFAULT_PROT_SG_SEG_CNT,
-		LPFC_DEFAULT_PROT_SG_SEG_CNT, LPFC_MAX_PROT_SG_SEG_CNT,
-		"Max Protection Scatter Gather Segment Count");
+/*
+ * This parameter will be depricated, the driver cannot limit the
+ * protection data s/g list.
+ */
+LPFC_ATTR_R(prot_sg_seg_cnt, LPFC_DEFAULT_SG_SEG_CNT,
+	    LPFC_DEFAULT_SG_SEG_CNT, LPFC_MAX_SG_SEG_CNT,
+	    "Max Protection Scatter Gather Segment Count");
 
 struct device_attribute *lpfc_hba_attrs[] = {
 	&dev_attr_bg_info,
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index b2227fc..1874b32 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -4730,24 +4730,53 @@
 		return -ENOMEM;
 
 	/*
-	 * Since the sg_tablesize is module parameter, the sg_dma_buf_size
+	 * Since lpfc_sg_seg_cnt is module parameter, the sg_dma_buf_size
 	 * used to create the sg_dma_buf_pool must be dynamically calculated.
-	 * 2 segments are added since the IOCB needs a command and response bde.
 	 */
-	phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
-		sizeof(struct fcp_rsp) +
-			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
 
-	if (phba->cfg_enable_bg) {
-		phba->cfg_sg_seg_cnt = LPFC_MAX_BPL_SEG_CNT;
-		phba->cfg_sg_dma_buf_size +=
-			phba->cfg_prot_sg_seg_cnt * sizeof(struct ulp_bde64);
-	}
-
-	/* Also reinitialize the host templates with new values. */
+	/* Initialize the host templates the configured values. */
 	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
 
+	/* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */
+	if (phba->cfg_enable_bg) {
+		/*
+		 * The scsi_buf for a T10-DIF I/O will hold the FCP cmnd,
+		 * the FCP rsp, and a BDE for each. Sice we have no control
+		 * over how many protection data segments the SCSI Layer
+		 * will hand us (ie: there could be one for every block
+		 * in the IO), we just allocate enough BDEs to accomidate
+		 * our max amount and we need to limit lpfc_sg_seg_cnt to
+		 * minimize the risk of running out.
+		 */
+		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+			sizeof(struct fcp_rsp) +
+			(LPFC_MAX_SG_SEG_CNT * sizeof(struct ulp_bde64));
+
+		if (phba->cfg_sg_seg_cnt > LPFC_MAX_SG_SEG_CNT_DIF)
+			phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SEG_CNT_DIF;
+
+		/* Total BDEs in BPL for scsi_sg_list and scsi_sg_prot_list */
+		phba->cfg_total_seg_cnt = LPFC_MAX_SG_SEG_CNT;
+	} else {
+		/*
+		 * The scsi_buf for a regular I/O will hold the FCP cmnd,
+		 * the FCP rsp, a BDE for each, and a BDE for up to
+		 * cfg_sg_seg_cnt data segments.
+		 */
+		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+			sizeof(struct fcp_rsp) +
+			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct ulp_bde64));
+
+		/* Total BDEs in BPL for scsi_sg_list */
+		phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + 2;
+	}
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
+			"9088 sg_tablesize:%d dmabuf_size:%d total_bde:%d\n",
+			phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
+			phba->cfg_total_seg_cnt);
+
 	phba->max_vpi = LPFC_MAX_VPI;
 	/* This will be set to correct value after config_port mbox */
 	phba->max_vports = 0;
@@ -4814,11 +4843,10 @@
 {
 	struct lpfc_sli *psli;
 	LPFC_MBOXQ_t *mboxq;
-	int rc, i, hbq_count, buf_size, dma_buf_size, max_buf_size;
+	int rc, i, hbq_count, max_buf_size;
 	uint8_t pn_page[LPFC_MAX_SUPPORTED_PAGES] = {0};
 	struct lpfc_mqe *mqe;
 	int longs;
-	int sges_per_segment;
 
 	/* Before proceed, wait for POST done and device ready */
 	rc = lpfc_sli4_post_status_check(phba);
@@ -4886,11 +4914,6 @@
 	phba->fc_map[1] = LPFC_FCOE_FCF_MAP1;
 	phba->fc_map[2] = LPFC_FCOE_FCF_MAP2;
 
-	/* With BlockGuard we can have multiple SGEs per Data Segemnt */
-	sges_per_segment = 1;
-	if (phba->cfg_enable_bg)
-		sges_per_segment = 2;
-
 	/*
 	 * For SLI4, instead of using ring 0 (LPFC_FCP_RING) for FCP commands
 	 * we will associate a new ring, for each FCP fastpath EQ/CQ/WQ tuple.
@@ -4910,29 +4933,62 @@
 	max_buf_size = (2 * SLI4_PAGE_SIZE);
 	if (phba->cfg_sg_seg_cnt > LPFC_MAX_SGL_SEG_CNT - 2)
 		phba->cfg_sg_seg_cnt = LPFC_MAX_SGL_SEG_CNT - 2;
-	max_buf_size += (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
 
 	/*
-	 * Since the sg_tablesize is module parameter, the sg_dma_buf_size
+	 * Since lpfc_sg_seg_cnt is module parameter, the sg_dma_buf_size
 	 * used to create the sg_dma_buf_pool must be dynamically calculated.
-	 * 2 segments are added since the IOCB needs a command and response bde.
-	 * To insure that the scsi sgl does not cross a 4k page boundary only
-	 * sgl sizes of must be a power of 2.
 	 */
-	buf_size = (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp) +
-		    (((phba->cfg_sg_seg_cnt * sges_per_segment) + 2) *
-		    sizeof(struct sli4_sge)));
 
-	for (dma_buf_size = LPFC_SLI4_MIN_BUF_SIZE;
-	     dma_buf_size < max_buf_size && buf_size > dma_buf_size;
-	     dma_buf_size = dma_buf_size << 1)
-		;
-	if (dma_buf_size == max_buf_size)
-		phba->cfg_sg_seg_cnt = (dma_buf_size -
-			sizeof(struct fcp_cmnd) - sizeof(struct fcp_rsp) -
-			(2 * sizeof(struct sli4_sge))) /
-				sizeof(struct sli4_sge);
-	phba->cfg_sg_dma_buf_size = dma_buf_size;
+	if (phba->cfg_enable_bg) {
+		/*
+		 * The scsi_buf for a T10-DIF I/O will hold the FCP cmnd,
+		 * the FCP rsp, and a SGE for each. Sice we have no control
+		 * over how many protection data segments the SCSI Layer
+		 * will hand us (ie: there could be one for every block
+		 * in the IO), we just allocate enough SGEs to accomidate
+		 * our max amount and we need to limit lpfc_sg_seg_cnt to
+		 * minimize the risk of running out.
+		 */
+		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+			sizeof(struct fcp_rsp) + max_buf_size;
+
+		/* Total SGEs for scsi_sg_list and scsi_sg_prot_list */
+		phba->cfg_total_seg_cnt = LPFC_MAX_SGL_SEG_CNT;
+
+		if (phba->cfg_sg_seg_cnt > LPFC_MAX_SG_SLI4_SEG_CNT_DIF)
+			phba->cfg_sg_seg_cnt = LPFC_MAX_SG_SLI4_SEG_CNT_DIF;
+	} else {
+		/*
+		 * The scsi_buf for a regular I/O will hold the FCP cmnd,
+		 * the FCP rsp, a SGE for each, and a SGE for up to
+		 * cfg_sg_seg_cnt data segments.
+		 */
+		phba->cfg_sg_dma_buf_size = sizeof(struct fcp_cmnd) +
+			sizeof(struct fcp_rsp) +
+			((phba->cfg_sg_seg_cnt + 2) * sizeof(struct sli4_sge));
+
+		/* Total SGEs for scsi_sg_list */
+		phba->cfg_total_seg_cnt = phba->cfg_sg_seg_cnt + 2;
+		/*
+		 * NOTE: if (phba->cfg_sg_seg_cnt + 2) <= 256 we only need
+		 * to post 1 page for the SGL.
+		 */
+	}
+
+	/* Initialize the host templates with the updated values. */
+	lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+	lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt;
+
+	if (phba->cfg_sg_dma_buf_size  <= LPFC_MIN_SG_SLI4_BUF_SZ)
+		phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ;
+	else
+		phba->cfg_sg_dma_buf_size =
+			SLI4_PAGE_ALIGN(phba->cfg_sg_dma_buf_size);
+
+	lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP,
+			"9087 sg_tablesize:%d dmabuf_size:%d total_sge:%d\n",
+			phba->cfg_sg_seg_cnt, phba->cfg_sg_dma_buf_size,
+			phba->cfg_total_seg_cnt);
 
 	/* Initialize buffer queue management fields */
 	hbq_count = lpfc_sli_hbq_count();
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index cd86069..812d0cd 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -64,18 +64,26 @@
 	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
 	int i;
 
-	if (phba->sli_rev == LPFC_SLI_REV4)
+	if (phba->sli_rev == LPFC_SLI_REV4) {
+		/* Calculate alignment */
+		if (phba->cfg_sg_dma_buf_size < SLI4_PAGE_SIZE)
+			i = phba->cfg_sg_dma_buf_size;
+		else
+			i = SLI4_PAGE_SIZE;
+
 		phba->lpfc_scsi_dma_buf_pool =
 			pci_pool_create("lpfc_scsi_dma_buf_pool",
 				phba->pcidev,
 				phba->cfg_sg_dma_buf_size,
-				phba->cfg_sg_dma_buf_size,
+				i,
 				0);
-	else
+	} else {
 		phba->lpfc_scsi_dma_buf_pool =
 			pci_pool_create("lpfc_scsi_dma_buf_pool",
 				phba->pcidev, phba->cfg_sg_dma_buf_size,
 				align, 0);
+	}
+
 	if (!phba->lpfc_scsi_dma_buf_pool)
 		goto fail;
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index d096402..c79b4dc 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -536,7 +536,16 @@
 	dma_addr_t pdma_phys_fcp_rsp;
 	dma_addr_t pdma_phys_bpl;
 	uint16_t iotag;
-	int bcnt;
+	int bcnt, bpl_size;
+
+	bpl_size = phba->cfg_sg_dma_buf_size -
+		(sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
+			 "9067 ALLOC %d scsi_bufs: %d (%d + %d + %d)\n",
+			 num_to_alloc, phba->cfg_sg_dma_buf_size,
+			 (int)sizeof(struct fcp_cmnd),
+			 (int)sizeof(struct fcp_rsp), bpl_size);
 
 	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
 		psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
@@ -761,7 +770,7 @@
 			     struct list_head *post_sblist, int sb_count)
 {
 	struct lpfc_scsi_buf *psb, *psb_next;
-	int status;
+	int status, sgl_size;
 	int post_cnt = 0, block_cnt = 0, num_posting = 0, num_posted = 0;
 	dma_addr_t pdma_phys_bpl1;
 	int last_xritag = NO_XRI;
@@ -773,6 +782,9 @@
 	if (sb_count <= 0)
 		return -EINVAL;
 
+	sgl_size = phba->cfg_sg_dma_buf_size -
+		(sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+
 	list_for_each_entry_safe(psb, psb_next, post_sblist, list) {
 		list_del_init(&psb->list);
 		block_cnt++;
@@ -805,7 +817,7 @@
 				post_cnt = block_cnt;
 			} else if (block_cnt == 1) {
 				/* last single sgl with non-contiguous xri */
-				if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
+				if (sgl_size > SGL_PAGE_SIZE)
 					pdma_phys_bpl1 = psb->dma_phys_bpl +
 								SGL_PAGE_SIZE;
 				else
@@ -925,13 +937,22 @@
 	IOCB_t *iocb;
 	dma_addr_t pdma_phys_fcp_cmd;
 	dma_addr_t pdma_phys_fcp_rsp;
-	dma_addr_t pdma_phys_bpl, pdma_phys_bpl1;
+	dma_addr_t pdma_phys_bpl;
 	uint16_t iotag, lxri = 0;
-	int bcnt, num_posted;
+	int bcnt, num_posted, sgl_size;
 	LIST_HEAD(prep_sblist);
 	LIST_HEAD(post_sblist);
 	LIST_HEAD(scsi_sblist);
 
+	sgl_size = phba->cfg_sg_dma_buf_size -
+		(sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+
+	lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP,
+			 "9068 ALLOC %d scsi_bufs: %d (%d + %d + %d)\n",
+			 num_to_alloc, phba->cfg_sg_dma_buf_size, sgl_size,
+			 (int)sizeof(struct fcp_cmnd),
+			 (int)sizeof(struct fcp_rsp));
+
 	for (bcnt = 0; bcnt < num_to_alloc; bcnt++) {
 		psb = kzalloc(sizeof(struct lpfc_scsi_buf), GFP_KERNEL);
 		if (!psb)
@@ -950,6 +971,15 @@
 		}
 		memset(psb->data, 0, phba->cfg_sg_dma_buf_size);
 
+		/* Page alignment is CRITICAL, double check to be sure */
+		if (((unsigned long)(psb->data) &
+		    (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0) {
+			pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
+				      psb->data, psb->dma_handle);
+			kfree(psb);
+			break;
+		}
+
 		/* Allocate iotag for psb->cur_iocbq. */
 		iotag = lpfc_sli_next_iotag(phba, &psb->cur_iocbq);
 		if (iotag == 0) {
@@ -970,17 +1000,14 @@
 		psb->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
 		psb->cur_iocbq.iocb_flag |= LPFC_IO_FCP;
 		psb->fcp_bpl = psb->data;
-		psb->fcp_cmnd = (psb->data + phba->cfg_sg_dma_buf_size)
-			- (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+		psb->fcp_cmnd = (psb->data + sgl_size);
 		psb->fcp_rsp = (struct fcp_rsp *)((uint8_t *)psb->fcp_cmnd +
 					sizeof(struct fcp_cmnd));
 
 		/* Initialize local short-hand pointers. */
 		sgl = (struct sli4_sge *)psb->fcp_bpl;
 		pdma_phys_bpl = psb->dma_handle;
-		pdma_phys_fcp_cmd =
-			(psb->dma_handle + phba->cfg_sg_dma_buf_size)
-			 - (sizeof(struct fcp_cmnd) + sizeof(struct fcp_rsp));
+		pdma_phys_fcp_cmd = (psb->dma_handle + sgl_size);
 		pdma_phys_fcp_rsp = pdma_phys_fcp_cmd + sizeof(struct fcp_cmnd);
 
 		/*
@@ -1022,10 +1049,6 @@
 		iocb->ulpLe = 1;
 		iocb->ulpClass = CLASS3;
 		psb->cur_iocbq.context1 = psb;
-		if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
-			pdma_phys_bpl1 = pdma_phys_bpl + SGL_PAGE_SIZE;
-		else
-			pdma_phys_bpl1 = 0;
 		psb->dma_phys_bpl = pdma_phys_bpl;
 
 		/* add the scsi buffer to a post list */
@@ -1270,6 +1293,7 @@
 			       "dma_map_sg.  Config %d, seg_cnt %d\n",
 			       __func__, phba->cfg_sg_seg_cnt,
 			       lpfc_cmd->seg_cnt);
+			lpfc_cmd->seg_cnt = 0;
 			scsi_dma_unmap(scsi_cmnd);
 			return 1;
 		}
@@ -2147,6 +2171,10 @@
 
 	split_offset = 0;
 	do {
+		/* Check to see if we ran out of space */
+		if (num_bde >= (phba->cfg_total_seg_cnt - 2))
+			return num_bde + 3;
+
 		/* setup PDE5 with what we have */
 		pde5 = (struct lpfc_pde5 *) bpl;
 		memset(pde5, 0, sizeof(struct lpfc_pde5));
@@ -2215,6 +2243,10 @@
 		pgdone = 0;
 		subtotal = 0; /* total bytes processed for current prot grp */
 		while (!pgdone) {
+			/* Check to see if we ran out of space */
+			if (num_bde >= phba->cfg_total_seg_cnt)
+				return num_bde + 1;
+
 			if (!sgde) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_BG,
 					"9065 BLKGRD:%s Invalid data segment\n",
@@ -2499,6 +2531,10 @@
 
 	split_offset = 0;
 	do {
+		/* Check to see if we ran out of space */
+		if (num_sge >= (phba->cfg_total_seg_cnt - 2))
+			return num_sge + 3;
+
 		/* setup DISEED with what we have */
 		diseed = (struct sli4_sge_diseed *) sgl;
 		memset(diseed, 0, sizeof(struct sli4_sge_diseed));
@@ -2558,6 +2594,10 @@
 		pgdone = 0;
 		subtotal = 0; /* total bytes processed for current prot grp */
 		while (!pgdone) {
+			/* Check to see if we ran out of space */
+			if (num_sge >= phba->cfg_total_seg_cnt)
+				return num_sge + 1;
+
 			if (!sgde) {
 				lpfc_printf_log(phba, KERN_ERR, LOG_BG,
 					"9086 BLKGRD:%s Invalid data segment\n",
@@ -2713,28 +2753,28 @@
 			return 1;
 
 		lpfc_cmd->seg_cnt = datasegcnt;
-		if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-					"9067 BLKGRD: %s: Too many sg segments"
-					" from dma_map_sg.  Config %d, seg_cnt"
-					" %d\n",
-					__func__, phba->cfg_sg_seg_cnt,
-					lpfc_cmd->seg_cnt);
-			scsi_dma_unmap(scsi_cmnd);
-			return 1;
-		}
+
+		/* First check if data segment count from SCSI Layer is good */
+		if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt)
+			goto err;
 
 		prot_group_type = lpfc_prot_group_type(phba, scsi_cmnd);
 
 		switch (prot_group_type) {
 		case LPFC_PG_TYPE_NO_DIF:
+
+			/* Here we need to add a PDE5 and PDE6 to the count */
+			if ((lpfc_cmd->seg_cnt + 2) > phba->cfg_total_seg_cnt)
+				goto err;
+
 			num_bde = lpfc_bg_setup_bpl(phba, scsi_cmnd, bpl,
 					datasegcnt);
 			/* we should have 2 or more entries in buffer list */
 			if (num_bde < 2)
 				goto err;
 			break;
-		case LPFC_PG_TYPE_DIF_BUF:{
+
+		case LPFC_PG_TYPE_DIF_BUF:
 			/*
 			 * This type indicates that protection buffers are
 			 * passed to the driver, so that needs to be prepared
@@ -2749,31 +2789,28 @@
 			}
 
 			lpfc_cmd->prot_seg_cnt = protsegcnt;
-			if (lpfc_cmd->prot_seg_cnt
-			    > phba->cfg_prot_sg_seg_cnt) {
-				lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-					"9068 BLKGRD: %s: Too many prot sg "
-					"segments from dma_map_sg.  Config %d,"
-						"prot_seg_cnt %d\n", __func__,
-						phba->cfg_prot_sg_seg_cnt,
-						lpfc_cmd->prot_seg_cnt);
-				dma_unmap_sg(&phba->pcidev->dev,
-					     scsi_prot_sglist(scsi_cmnd),
-					     scsi_prot_sg_count(scsi_cmnd),
-					     datadir);
-				scsi_dma_unmap(scsi_cmnd);
-				return 1;
-			}
+
+			/*
+			 * There is a minimun of 4 BPLs used for every
+			 * protection data segment.
+			 */
+			if ((lpfc_cmd->prot_seg_cnt * 4) >
+			    (phba->cfg_total_seg_cnt - 2))
+				goto err;
 
 			num_bde = lpfc_bg_setup_bpl_prot(phba, scsi_cmnd, bpl,
 					datasegcnt, protsegcnt);
 			/* we should have 3 or more entries in buffer list */
-			if (num_bde < 3)
+			if ((num_bde < 3) ||
+			    (num_bde > phba->cfg_total_seg_cnt))
 				goto err;
 			break;
-		}
+
 		case LPFC_PG_TYPE_INVALID:
 		default:
+			scsi_dma_unmap(scsi_cmnd);
+			lpfc_cmd->seg_cnt = 0;
+
 			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
 					"9022 Unexpected protection group %i\n",
 					prot_group_type);
@@ -2814,10 +2851,22 @@
 
 	return 0;
 err:
+	if (lpfc_cmd->seg_cnt)
+		scsi_dma_unmap(scsi_cmnd);
+	if (lpfc_cmd->prot_seg_cnt)
+		dma_unmap_sg(&phba->pcidev->dev, scsi_prot_sglist(scsi_cmnd),
+			     scsi_prot_sg_count(scsi_cmnd),
+			     scsi_cmnd->sc_data_direction);
+
 	lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-			"9023 Could not setup all needed BDE's"
-			"prot_group_type=%d, num_bde=%d\n",
+			"9023 Cannot setup S/G List for HBA"
+			"IO segs %d/%d BPL %d SCSI %d: %d %d\n",
+			lpfc_cmd->seg_cnt, lpfc_cmd->prot_seg_cnt,
+			phba->cfg_total_seg_cnt, phba->cfg_sg_seg_cnt,
 			prot_group_type, num_bde);
+
+	lpfc_cmd->seg_cnt = 0;
+	lpfc_cmd->prot_seg_cnt = 0;
 	return 1;
 }
 
@@ -3255,6 +3304,7 @@
 				"dma_map_sg.  Config %d, seg_cnt %d\n",
 				__func__, phba->cfg_sg_seg_cnt,
 			       lpfc_cmd->seg_cnt);
+			lpfc_cmd->seg_cnt = 0;
 			scsi_dma_unmap(scsi_cmnd);
 			return 1;
 		}
@@ -3376,14 +3426,14 @@
 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
 	struct sli4_sge *sgl = (struct sli4_sge *)(lpfc_cmd->fcp_bpl);
 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
-	uint32_t num_bde = 0;
+	uint32_t num_sge = 0;
 	int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction;
 	int prot_group_type = 0;
 	int fcpdl;
 
 	/*
 	 * Start the lpfc command prep by bumping the sgl beyond fcp_cmnd
-	 *  fcp_rsp regions to the first data bde entry
+	 *  fcp_rsp regions to the first data sge entry
 	 */
 	if (scsi_sg_count(scsi_cmnd)) {
 		/*
@@ -3406,28 +3456,28 @@
 
 		sgl += 1;
 		lpfc_cmd->seg_cnt = datasegcnt;
-		if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
-			lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-					"9087 BLKGRD: %s: Too many sg segments"
-					" from dma_map_sg.  Config %d, seg_cnt"
-					" %d\n",
-					__func__, phba->cfg_sg_seg_cnt,
-					lpfc_cmd->seg_cnt);
-			scsi_dma_unmap(scsi_cmnd);
-			return 1;
-		}
+
+		/* First check if data segment count from SCSI Layer is good */
+		if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt)
+			goto err;
 
 		prot_group_type = lpfc_prot_group_type(phba, scsi_cmnd);
 
 		switch (prot_group_type) {
 		case LPFC_PG_TYPE_NO_DIF:
-			num_bde = lpfc_bg_setup_sgl(phba, scsi_cmnd, sgl,
+			/* Here we need to add a DISEED to the count */
+			if ((lpfc_cmd->seg_cnt + 1) > phba->cfg_total_seg_cnt)
+				goto err;
+
+			num_sge = lpfc_bg_setup_sgl(phba, scsi_cmnd, sgl,
 					datasegcnt);
+
 			/* we should have 2 or more entries in buffer list */
-			if (num_bde < 2)
+			if (num_sge < 2)
 				goto err;
 			break;
-		case LPFC_PG_TYPE_DIF_BUF:{
+
+		case LPFC_PG_TYPE_DIF_BUF:
 			/*
 			 * This type indicates that protection buffers are
 			 * passed to the driver, so that needs to be prepared
@@ -3442,31 +3492,28 @@
 			}
 
 			lpfc_cmd->prot_seg_cnt = protsegcnt;
-			if (lpfc_cmd->prot_seg_cnt
-			    > phba->cfg_prot_sg_seg_cnt) {
-				lpfc_printf_log(phba, KERN_ERR, LOG_BG,
-					"9088 BLKGRD: %s: Too many prot sg "
-					"segments from dma_map_sg.  Config %d,"
-						"prot_seg_cnt %d\n", __func__,
-						phba->cfg_prot_sg_seg_cnt,
-						lpfc_cmd->prot_seg_cnt);
-				dma_unmap_sg(&phba->pcidev->dev,
-					     scsi_prot_sglist(scsi_cmnd),
-					     scsi_prot_sg_count(scsi_cmnd),
-					     datadir);
-				scsi_dma_unmap(scsi_cmnd);
-				return 1;
-			}
+			/*
+			 * There is a minimun of 3 SGEs used for every
+			 * protection data segment.
+			 */
+			if ((lpfc_cmd->prot_seg_cnt * 3) >
+			    (phba->cfg_total_seg_cnt - 2))
+				goto err;
 
-			num_bde = lpfc_bg_setup_sgl_prot(phba, scsi_cmnd, sgl,
+			num_sge = lpfc_bg_setup_sgl_prot(phba, scsi_cmnd, sgl,
 					datasegcnt, protsegcnt);
+
 			/* we should have 3 or more entries in buffer list */
-			if (num_bde < 3)
+			if ((num_sge < 3) ||
+			    (num_sge > phba->cfg_total_seg_cnt))
 				goto err;
 			break;
-		}
+
 		case LPFC_PG_TYPE_INVALID:
 		default:
+			scsi_dma_unmap(scsi_cmnd);
+			lpfc_cmd->seg_cnt = 0;
+
 			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
 					"9083 Unexpected protection group %i\n",
 					prot_group_type);
@@ -3501,10 +3548,22 @@
 
 	return 0;
 err:
+	if (lpfc_cmd->seg_cnt)
+		scsi_dma_unmap(scsi_cmnd);
+	if (lpfc_cmd->prot_seg_cnt)
+		dma_unmap_sg(&phba->pcidev->dev, scsi_prot_sglist(scsi_cmnd),
+			     scsi_prot_sg_count(scsi_cmnd),
+			     scsi_cmnd->sc_data_direction);
+
 	lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
-			"9084 Could not setup all needed BDE's"
-			"prot_group_type=%d, num_bde=%d\n",
-			prot_group_type, num_bde);
+			"9084 Cannot setup S/G List for HBA"
+			"IO segs %d/%d SGL %d SCSI %d: %d %d\n",
+			lpfc_cmd->seg_cnt, lpfc_cmd->prot_seg_cnt,
+			phba->cfg_total_seg_cnt, phba->cfg_sg_seg_cnt,
+			prot_group_type, num_sge);
+
+	lpfc_cmd->seg_cnt = 0;
+	lpfc_cmd->prot_seg_cnt = 0;
 	return 1;
 }
 
@@ -5317,11 +5376,11 @@
 	}
 	num_allocated = lpfc_new_scsi_buf(vport, num_to_alloc);
 	if (num_to_alloc != num_allocated) {
-			lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP,
-				 "0708 Allocation request of %d "
-				 "command buffers did not succeed.  "
-				 "Allocated %d buffers.\n",
-				 num_to_alloc, num_allocated);
+			lpfc_printf_vlog(vport, KERN_ERR, LOG_FCP,
+					 "0708 Allocation request of %d "
+					 "command buffers did not succeed.  "
+					 "Allocated %d buffers.\n",
+					 num_to_alloc, num_allocated);
 	}
 	if (num_allocated > 0)
 		phba->total_scsi_bufs += num_allocated;
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index be02b59..347e22c 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -346,11 +346,6 @@
 #define SLI4_CT_VFI 2
 #define SLI4_CT_FCFI 3
 
-#define LPFC_SLI4_FL1_MAX_SEGMENT_SIZE	0x10000
-#define LPFC_SLI4_FL1_MAX_BUF_SIZE	0X2000
-#define LPFC_SLI4_MIN_BUF_SIZE		0x400
-#define LPFC_SLI4_MAX_BUF_SIZE		0x20000
-
 /*
  * SLI4 specific data structures
  */