mlx4_core: Add support for multiple completion event vectors

When using MSI-X mode, create a completion event queue for each CPU.
Report the number of completion EQs in a new struct mlx4_caps member,
num_comp_vectors, and extend the mlx4_cq_alloc() interface with a
vector parameter so that consumers can specify which completion EQ
should be used to report events for the CQ being created.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index b7ad282..ac57b6a 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -189,7 +189,7 @@
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-		  int collapsed)
+		  unsigned vector, int collapsed)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -198,6 +198,11 @@
 	u64 mtt_addr;
 	int err;
 
+	if (vector >= dev->caps.num_comp_vectors)
+		return -EINVAL;
+
+	cq->vector = vector;
+
 	cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
 	if (cq->cqn == -1)
 		return -ENOMEM;
@@ -227,7 +232,7 @@
 
 	cq_context->flags	    = cpu_to_be32(!!collapsed << 18);
 	cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
-	cq_context->comp_eqn        = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
+	cq_context->comp_eqn	    = priv->eq_table.eq[vector].eqn;
 	cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 
 	mtt_addr = mlx4_mtt_addr(dev, mtt);
@@ -276,7 +281,7 @@
 	if (err)
 		mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
 
-	synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq);
+	synchronize_irq(priv->eq_table.eq[cq->vector].irq);
 
 	spin_lock_irq(&cq_table->lock);
 	radix_tree_delete(&cq_table->tree, cq->cqn);