RDMA/ocrdma: Create IRD queue fix

1)	Fix ocrdma_get_num_posted_shift for upto 128 QPs.
2)	Create for min of dev->max_wqe and requested wqe in create_qp.
3)	As part of creating ird queue, populate with basic header templates.
4)	Make sure all the DB memory allocated to userspace are page aligned.
5)	Fix issue in checking the mmap local cache.
6)	Some code cleanup.

Signed-off-by: Naresh Gottumukkala <bgottumukkala@emulex.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 5f68dff..278b33b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -186,7 +186,7 @@
 
 	mutex_lock(&uctx->mm_list_lock);
 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
-		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
 			continue;
 
 		list_del(&mm->entry);
@@ -204,7 +204,7 @@
 
 	mutex_lock(&uctx->mm_list_lock);
 	list_for_each_entry(mm, &uctx->mm_head, entry) {
-		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
 			continue;
 
 		found = true;
@@ -307,7 +307,10 @@
 	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
 		dev->nic_info.db_total_size)) &&
 		(len <=	dev->nic_info.db_page_size)) {
-		/* doorbell mapping */
+		if (vma->vm_flags & VM_READ)
+			return -EPERM;
+
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 					    len, vma->vm_page_prot);
 	} else if (dev->nic_info.dpp_unmapped_len &&
@@ -315,12 +318,13 @@
 		(vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
 			dev->nic_info.dpp_unmapped_len)) &&
 		(len <= dev->nic_info.dpp_unmapped_len)) {
-		/* dpp area mapping */
+		if (vma->vm_flags & VM_READ)
+			return -EPERM;
+
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 					    len, vma->vm_page_prot);
 	} else {
-		/* queue memory mapping */
 		status = remap_pfn_range(vma, vma->vm_start,
 					 vma->vm_pgoff, len, vma->vm_page_prot);
 	}
@@ -351,9 +355,9 @@
 
 	if (pd->dpp_enabled) {
 		dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
-				(pd->id * OCRDMA_DPP_PAGE_SIZE);
+				(pd->id * PAGE_SIZE);
 		status = ocrdma_add_mmap(uctx, dpp_page_addr,
-				 OCRDMA_DPP_PAGE_SIZE);
+				 PAGE_SIZE);
 		if (status)
 			goto dpp_map_err;
 		rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
@@ -369,7 +373,7 @@
 
 ucopy_err:
 	if (pd->dpp_enabled)
-		ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE);
+		ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
 dpp_map_err:
 	ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
 	return status;
@@ -392,10 +396,18 @@
 		pd->num_dpp_qp =
 			pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
 	}
+retry:
 	status = ocrdma_mbx_alloc_pd(dev, pd);
 	if (status) {
-		kfree(pd);
-		return ERR_PTR(status);
+		/* try for pd with out dpp */
+		if (pd->dpp_enabled) {
+			pd->dpp_enabled = false;
+			pd->num_dpp_qp = 0;
+			goto retry;
+		} else {
+			kfree(pd);
+			return ERR_PTR(status);
+		}
 	}
 
 	if (udata && context) {
@@ -421,9 +433,9 @@
 	status = ocrdma_mbx_dealloc_pd(dev, pd);
 	if (pd->uctx) {
 		u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
-		    (pd->id * OCRDMA_DPP_PAGE_SIZE);
+		    (pd->id * PAGE_SIZE);
 		if (pd->dpp_enabled)
-			ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE);
+			ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
 		usr_db = dev->nic_info.unmapped_db +
 		    (pd->id * dev->nic_info.db_page_size);
 		ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
@@ -693,7 +705,7 @@
 
 	memset(&uresp, 0, sizeof(uresp));
 	uresp.cq_id = cq->id;
-	uresp.page_size = cq->len;
+	uresp.page_size = PAGE_ALIGN(cq->len);
 	uresp.num_pages = 1;
 	uresp.max_hw_cqe = cq->max_hw_cqe;
 	uresp.page_addr[0] = cq->pa;
@@ -788,7 +800,8 @@
 	status = ocrdma_mbx_destroy_cq(dev, cq);
 
 	if (cq->ucontext) {
-		ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len);
+		ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
+				PAGE_ALIGN(cq->len));
 		ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db,
 				dev->nic_info.db_page_size);
 	}
@@ -817,14 +830,17 @@
 static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
 				  struct ib_qp_init_attr *attrs)
 {
-	if (attrs->qp_type != IB_QPT_GSI &&
-	    attrs->qp_type != IB_QPT_RC &&
-	    attrs->qp_type != IB_QPT_UD) {
+	if ((attrs->qp_type != IB_QPT_GSI) &&
+	    (attrs->qp_type != IB_QPT_RC) &&
+	    (attrs->qp_type != IB_QPT_UC) &&
+	    (attrs->qp_type != IB_QPT_UD)) {
 		pr_err("%s(%d) unsupported qp type=0x%x requested\n",
 		       __func__, dev->id, attrs->qp_type);
 		return -EINVAL;
 	}
-	if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
+	/* Skip the check for QP1 to support CM size of 128 */
+	if ((attrs->qp_type != IB_QPT_GSI) &&
+	    (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
 		pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
 		       __func__, dev->id, attrs->cap.max_send_wr);
 		pr_err("%s(%d) supported send_wr=0x%x\n",
@@ -875,11 +891,9 @@
 	/* verify consumer QPs are not trying to use GSI QP's CQ */
 	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
 		if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
-		    (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
-		    (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
-		    (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
+			(dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
 			pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
-			       __func__, dev->id);
+				__func__, dev->id);
 			return -EINVAL;
 		}
 	}
@@ -902,13 +916,13 @@
 	uresp.qp_id = qp->id;
 	uresp.sq_dbid = qp->sq.dbid;
 	uresp.num_sq_pages = 1;
-	uresp.sq_page_size = qp->sq.len;
+	uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
 	uresp.sq_page_addr[0] = qp->sq.pa;
 	uresp.num_wqe_allocated = qp->sq.max_cnt;
 	if (!srq) {
 		uresp.rq_dbid = qp->rq.dbid;
 		uresp.num_rq_pages = 1;
-		uresp.rq_page_size = qp->rq.len;
+		uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
 		uresp.rq_page_addr[0] = qp->rq.pa;
 		uresp.num_rqe_allocated = qp->rq.max_cnt;
 	}
@@ -1043,6 +1057,9 @@
 	}
 	qp->dev = dev;
 	ocrdma_set_qp_init_params(qp, pd, attrs);
+	if (udata == NULL)
+		qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
+					OCRDMA_QP_FAST_REG);
 
 	mutex_lock(&dev->dev_lock);
 	status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
@@ -1053,8 +1070,6 @@
 
 	/* user space QP's wr_id table are managed in library */
 	if (udata == NULL) {
-		qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
-				  OCRDMA_QP_FAST_REG);
 		status = ocrdma_alloc_wr_id_tbl(qp);
 		if (status)
 			goto map_err;
@@ -1290,22 +1305,17 @@
 static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
 {
 	int free_cnt;
-	if (q->head >= q->tail)
-		free_cnt = (q->max_cnt - q->head) + q->tail;
-	else
-		free_cnt = q->tail - q->head;
-	return free_cnt;
+	return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
 }
 
 static int is_hw_sq_empty(struct ocrdma_qp *qp)
 {
-	return (qp->sq.tail == qp->sq.head &&
-		ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0);
+	return (qp->sq.tail == qp->sq.head);
 }
 
 static int is_hw_rq_empty(struct ocrdma_qp *qp)
 {
-	return (qp->rq.tail == qp->rq.head) ? 1 : 0;
+	return (qp->rq.tail == qp->rq.head);
 }
 
 static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
@@ -1456,9 +1466,11 @@
 	mutex_unlock(&dev->dev_lock);
 
 	if (pd->uctx) {
-		ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len);
+		ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
+				PAGE_ALIGN(qp->sq.len));
 		if (!qp->srq)
-			ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len);
+			ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
+					PAGE_ALIGN(qp->rq.len));
 	}
 
 	ocrdma_del_flush_qp(qp);
@@ -1603,7 +1615,8 @@
 	status = ocrdma_mbx_destroy_srq(dev, srq);
 
 	if (srq->pd->uctx)
-		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
+		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
+				PAGE_ALIGN(srq->rq.len));
 
 	kfree(srq->idx_bit_fields);
 	kfree(srq->rqe_wr_id_tbl);
@@ -1650,7 +1663,7 @@
 				    struct ocrdma_sge *sge,
 				    struct ib_send_wr *wr, u32 wqe_size)
 {
-	if (wr->send_flags & IB_SEND_INLINE) {
+	if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
 		if (wr->sg_list[0].length > qp->max_inline_data) {
 			pr_err("%s() supported_len=0x%x,\n"
 			       " unspported len req=0x%x\n", __func__,
@@ -1662,6 +1675,8 @@
 		       wr->sg_list[0].length);
 		hdr->total_len = wr->sg_list[0].length;
 		wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
+		if (0 == wr->sg_list[0].length)
+			wqe_size += sizeof(struct ocrdma_sge);
 		hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
 	} else {
 		ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
@@ -2208,7 +2223,8 @@
 		ocrdma_update_wc(qp, ibwc, tail);
 		*polled = true;
 	}
-	wqe_idx = le32_to_cpu(cqe->wq.wqeidx) &	OCRDMA_CQE_WQEIDX_MASK;
+	wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
+			OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
 	if (tail != wqe_idx)
 		expand = true; /* Coalesced CQE can't be consumed yet */
 
@@ -2257,7 +2273,8 @@
 	u32 wqe_idx;
 
 	srq = get_ocrdma_srq(qp->ibqp.srq);
-	wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT;
+	wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
+			OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
 	ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
 	spin_lock_irqsave(&srq->q_lock, flags);
 	ocrdma_srq_toggle_bit(srq, wqe_idx);