IB/mthca: Fix max_srq_sge returned by ib_query_device for Tavor devices

The driver allocates SRQ WQEs size with a power of 2 size both for
Tavor and for memfree. For Tavor, however, the hardware only requires
the WQE size to be a multiple of 16, not a power of 2, and the max
number of scatter-gather allowed is reported accordingly by the
firmware (and this is the value currently returned by
ib_query_device() and ibv_query_device()).

If the max number of scatter/gather entries reported by the FW is used
when creating an SRQ, the creation will fail for Tavor, since the
required WQE size will be increased to the next power of 2, which
turns out to be larger than the device permitted max WQE size (which
is not a power of 2).

This patch reduces the reported SRQ max wqe size so that it can be used
successfully in creating an SRQ on Tavor HCAs.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 49d0eae..4c1dcb4 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -151,6 +151,7 @@
 	int      reserved_qps;
 	int      num_srqs;
 	int      max_srq_wqes;
+	int      max_srq_sge;
 	int      reserved_srqs;
 	int      num_eecs;
 	int      reserved_eecs;
@@ -507,6 +508,7 @@
 int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		     enum ib_srq_attr_mask attr_mask);
 int mthca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type);
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 7e9c97b..9b9ff7b 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -191,6 +191,7 @@
 	mdev->limits.reserved_srqs      = dev_lim->reserved_srqs;
 	mdev->limits.reserved_eecs      = dev_lim->reserved_eecs;
 	mdev->limits.max_desc_sz        = dev_lim->max_desc_sz;
+	mdev->limits.max_srq_sge	= mthca_max_srq_sge(mdev);
 	/*
 	 * Subtract 1 from the limit because we need to allocate a
 	 * spare CQE so the HCA HW can tell the difference between an
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 2c250bc..565a24b 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -106,7 +106,7 @@
 	props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
 	props->max_srq             = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
 	props->max_srq_wr          = mdev->limits.max_srq_wqes;
-	props->max_srq_sge         = mdev->limits.max_sg;
+	props->max_srq_sge         = mdev->limits.max_srq_sge;
 	props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
 	props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ?
 					IB_ATOMIC_HCA : IB_ATOMIC_NONE;
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 2dd3aea..adcaf85 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -192,7 +192,7 @@
 
 	/* Sanity check SRQ size before proceeding */
 	if (attr->max_wr  > dev->limits.max_srq_wqes ||
-	    attr->max_sge > dev->limits.max_sg)
+	    attr->max_sge > dev->limits.max_srq_sge)
 		return -EINVAL;
 
 	srq->max      = attr->max_wr;
@@ -660,6 +660,31 @@
 	return err;
 }
 
+int mthca_max_srq_sge(struct mthca_dev *dev)
+{
+	if (mthca_is_memfree(dev))
+		return dev->limits.max_sg;
+
+	/*
+	 * SRQ allocations are based on powers of 2 for Tavor,
+	 * (although they only need to be multiples of 16 bytes).
+	 *
+	 * Therefore, we need to base the max number of sg entries on
+	 * the largest power of 2 descriptor size that is <= to the
+	 * actual max WQE descriptor size, rather than return the
+	 * max_sg value given by the firmware (which is based on WQE
+	 * sizes as multiples of 16, not powers of 2).
+	 *
+	 * If SRQ implementation is changed for Tavor to be based on
+	 * multiples of 16, the calculation below can be deleted and
+	 * the FW max_sg value returned.
+	 */
+	return min_t(int, dev->limits.max_sg,
+		     ((1 << (fls(dev->limits.max_desc_sz) - 1)) -
+		      sizeof (struct mthca_next_seg)) /
+		     sizeof (struct mthca_data_seg));
+}
+
 int __devinit mthca_init_srq_table(struct mthca_dev *dev)
 {
 	int err;