IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters

Add an InfiniBand driver for Mellanox ConnectX adapters.  Because
these adapters can also be used as ethernet NICs and Fibre Channel 
HBAs, the driver is split into two modules: 
 
  mlx4_core: Handles low-level things like device initialization and 
    processing firmware commands.  Also controls resource allocation 
    so that the InfiniBand, ethernet and FC functions can share a 
    device without stepping on each other. 
 
  mlx4_ib: Handles InfiniBand-specific things; plugs into the 
    InfiniBand midlayer. 

Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
new file mode 100644
index 0000000..acf1c80
--- /dev/null
+++ b/drivers/net/mlx4/eq.c
@@ -0,0 +1,696 @@
+/*
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+
+#include <linux/mlx4/cmd.h>
+
+#include "mlx4.h"
+#include "fw.h"
+
+enum {
+	MLX4_NUM_ASYNC_EQE	= 0x100,
+	MLX4_NUM_SPARE_EQE	= 0x80,
+	MLX4_EQ_ENTRY_SIZE	= 0x20
+};
+
+/*
+ * Must be packed because start is 64 bits but only aligned to 32 bits.
+ */
+struct mlx4_eq_context {
+	__be32			flags;
+	u16			reserved1[3];
+	__be16			page_offset;
+	u8			log_eq_size;
+	u8			reserved2[4];
+	u8			eq_period;
+	u8			reserved3;
+	u8			eq_max_count;
+	u8			reserved4[3];
+	u8			intr;
+	u8			log_page_size;
+	u8			reserved5[2];
+	u8			mtt_base_addr_h;
+	__be32			mtt_base_addr_l;
+	u32			reserved6[2];
+	__be32			consumer_index;
+	__be32			producer_index;
+	u32			reserved7[4];
+};
+
+#define MLX4_EQ_STATUS_OK	   ( 0 << 28)
+#define MLX4_EQ_STATUS_WRITE_FAIL  (10 << 28)
+#define MLX4_EQ_OWNER_SW	   ( 0 << 24)
+#define MLX4_EQ_OWNER_HW	   ( 1 << 24)
+#define MLX4_EQ_FLAG_EC		   ( 1 << 18)
+#define MLX4_EQ_FLAG_OI		   ( 1 << 17)
+#define MLX4_EQ_STATE_ARMED	   ( 9 <<  8)
+#define MLX4_EQ_STATE_FIRED	   (10 <<  8)
+#define MLX4_EQ_STATE_ALWAYS_ARMED (11 <<  8)
+
+#define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG)	    | \
+			       (1ull << MLX4_EVENT_TYPE_COMM_EST)	    | \
+			       (1ull << MLX4_EVENT_TYPE_SQ_DRAINED)	    | \
+			       (1ull << MLX4_EVENT_TYPE_CQ_ERROR)	    | \
+			       (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR)	    | \
+			       (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR)    | \
+			       (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED)    | \
+			       (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
+			       (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
+			       (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
+			       (1ull << MLX4_EVENT_TYPE_PORT_CHANGE)	    | \
+			       (1ull << MLX4_EVENT_TYPE_ECC_DETECT)	    | \
+			       (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
+			       (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
+			       (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT)	    | \
+			       (1ull << MLX4_EVENT_TYPE_CMD))
+#define MLX4_CATAS_EVENT_MASK  (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR)
+
+struct mlx4_eqe {
+	u8			reserved1;
+	u8			type;
+	u8			reserved2;
+	u8			subtype;
+	union {
+		u32		raw[6];
+		struct {
+			__be32	cqn;
+		} __attribute__((packed)) comp;
+		struct {
+			u16	reserved1;
+			__be16	token;
+			u32	reserved2;
+			u8	reserved3[3];
+			u8	status;
+			__be64	out_param;
+		} __attribute__((packed)) cmd;
+		struct {
+			__be32	qpn;
+		} __attribute__((packed)) qp;
+		struct {
+			__be32	srqn;
+		} __attribute__((packed)) srq;
+		struct {
+			__be32	cqn;
+			u32	reserved1;
+			u8	reserved2[3];
+			u8	syndrome;
+		} __attribute__((packed)) cq_err;
+		struct {
+			u32	reserved1[2];
+			__be32	port;
+		} __attribute__((packed)) port_change;
+	}			event;
+	u8			reserved3[3];
+	u8			owner;
+} __attribute__((packed));
+
+static void eq_set_ci(struct mlx4_eq *eq, int req_not)
+{
+	__raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
+					       req_not << 31),
+		     eq->doorbell);
+	/* We still want ordering, just not swabbing, so add a barrier */
+	mb();
+}
+
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
+{
+	unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
+	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+}
+
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
+{
+	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
+	return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
+}
+
+static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
+{
+	struct mlx4_eqe *eqe;
+	int cqn;
+	int eqes_found = 0;
+	int set_ci = 0;
+
+	while ((eqe = next_eqe_sw(eq))) {
+		/*
+		 * Make sure we read EQ entry contents after we've
+		 * checked the ownership bit.
+		 */
+		rmb();
+
+		switch (eqe->type) {
+		case MLX4_EVENT_TYPE_COMP:
+			cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
+			mlx4_cq_completion(dev, cqn);
+			break;
+
+		case MLX4_EVENT_TYPE_PATH_MIG:
+		case MLX4_EVENT_TYPE_COMM_EST:
+		case MLX4_EVENT_TYPE_SQ_DRAINED:
+		case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
+		case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
+		case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
+		case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+		case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
+			mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+				      eqe->type);
+			break;
+
+		case MLX4_EVENT_TYPE_SRQ_LIMIT:
+		case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
+			mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
+				      eqe->type);
+			break;
+
+		case MLX4_EVENT_TYPE_CMD:
+			mlx4_cmd_event(dev,
+				       be16_to_cpu(eqe->event.cmd.token),
+				       eqe->event.cmd.status,
+				       be64_to_cpu(eqe->event.cmd.out_param));
+			break;
+
+		case MLX4_EVENT_TYPE_PORT_CHANGE:
+			mlx4_dispatch_event(dev, eqe->type, eqe->subtype,
+					    be32_to_cpu(eqe->event.port_change.port) >> 28);
+			break;
+
+		case MLX4_EVENT_TYPE_CQ_ERROR:
+			mlx4_warn(dev, "CQ %s on CQN %06x\n",
+				  eqe->event.cq_err.syndrome == 1 ?
+				  "overrun" : "access violation",
+				  be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
+			mlx4_cq_event(dev, be32_to_cpu(eqe->event.cq_err.cqn),
+				      eqe->type);
+			break;
+
+		case MLX4_EVENT_TYPE_EQ_OVERFLOW:
+			mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
+			break;
+
+		case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
+		case MLX4_EVENT_TYPE_ECC_DETECT:
+		default:
+			mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
+				  eqe->type, eqe->subtype, eq->eqn, eq->cons_index);
+			break;
+		};
+
+		++eq->cons_index;
+		eqes_found = 1;
+		++set_ci;
+
+		/*
+		 * The HCA will think the queue has overflowed if we
+		 * don't tell it we've been processing events.  We
+		 * create our EQs with MLX4_NUM_SPARE_EQE extra
+		 * entries, so we must update our consumer index at
+		 * least that often.
+		 */
+		if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
+			/*
+			 * Conditional on hca_type is OK here because
+			 * this is a rare case, not the fast path.
+			 */
+			eq_set_ci(eq, 0);
+			set_ci = 0;
+		}
+	}
+
+	eq_set_ci(eq, 1);
+
+	return eqes_found;
+}
+
+static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
+{
+	struct mlx4_dev *dev = dev_ptr;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int work = 0;
+	int i;
+
+	writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
+
+	for (i = 0; i < MLX4_EQ_CATAS; ++i)
+		work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
+
+	return IRQ_RETVAL(work);
+}
+
+static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr)
+{
+	struct mlx4_eq  *eq  = eq_ptr;
+	struct mlx4_dev *dev = eq->dev;
+
+	mlx4_eq_int(dev, eq);
+
+	/* MSI-X vectors always belong to us */
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mlx4_catas_interrupt(int irq, void *dev_ptr)
+{
+	mlx4_handle_catas_err(dev_ptr);
+
+	/* MSI-X vectors always belong to us */
+	return IRQ_HANDLED;
+}
+
+static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
+			int eq_num)
+{
+	return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
+			0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B);
+}
+
+static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+			 int eq_num)
+{
+	return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ,
+			MLX4_CMD_TIME_CLASS_A);
+}
+
+static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+			 int eq_num)
+{
+	return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ,
+			    MLX4_CMD_TIME_CLASS_A);
+}
+
+static void __devinit __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev,
+					       struct mlx4_eq *eq)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int index;
+
+	index = eq->eqn / 4 - dev->caps.reserved_eqs / 4;
+
+	if (!priv->eq_table.uar_map[index]) {
+		priv->eq_table.uar_map[index] =
+			ioremap(pci_resource_start(dev->pdev, 2) +
+				((eq->eqn / 4) << PAGE_SHIFT),
+				PAGE_SIZE);
+		if (!priv->eq_table.uar_map[index]) {
+			mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
+				 eq->eqn);
+			return NULL;
+		}
+	}
+
+	return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
+}
+
+static int __devinit mlx4_create_eq(struct mlx4_dev *dev, int nent,
+				    u8 intr, struct mlx4_eq *eq)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_eq_context *eq_context;
+	int npages;
+	u64 *dma_list = NULL;
+	dma_addr_t t;
+	u64 mtt_addr;
+	int err = -ENOMEM;
+	int i;
+
+	eq->dev   = dev;
+	eq->nent  = roundup_pow_of_two(max(nent, 2));
+	npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
+
+	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
+				GFP_KERNEL);
+	if (!eq->page_list)
+		goto err_out;
+
+	for (i = 0; i < npages; ++i)
+		eq->page_list[i].buf = NULL;
+
+	dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
+	if (!dma_list)
+		goto err_out_free;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		goto err_out_free;
+	eq_context = mailbox->buf;
+
+	for (i = 0; i < npages; ++i) {
+		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
+							  PAGE_SIZE, &t, GFP_KERNEL);
+		if (!eq->page_list[i].buf)
+			goto err_out_free_pages;
+
+		dma_list[i] = t;
+		eq->page_list[i].map = t;
+
+		memset(eq->page_list[i].buf, 0, PAGE_SIZE);
+	}
+
+	eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap);
+	if (eq->eqn == -1)
+		goto err_out_free_pages;
+
+	eq->doorbell = mlx4_get_eq_uar(dev, eq);
+	if (!eq->doorbell) {
+		err = -ENOMEM;
+		goto err_out_free_eq;
+	}
+
+	err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt);
+	if (err)
+		goto err_out_free_eq;
+
+	err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list);
+	if (err)
+		goto err_out_free_mtt;
+
+	memset(eq_context, 0, sizeof *eq_context);
+	eq_context->flags	  = cpu_to_be32(MLX4_EQ_STATUS_OK   |
+						MLX4_EQ_STATE_ARMED);
+	eq_context->log_eq_size	  = ilog2(eq->nent);
+	eq_context->intr	  = intr;
+	eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT;
+
+	mtt_addr = mlx4_mtt_addr(dev, &eq->mtt);
+	eq_context->mtt_base_addr_h = mtt_addr >> 32;
+	eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
+
+	err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn);
+	if (err) {
+		mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err);
+		goto err_out_free_mtt;
+	}
+
+	kfree(dma_list);
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	eq->cons_index = 0;
+
+	return err;
+
+err_out_free_mtt:
+	mlx4_mtt_cleanup(dev, &eq->mtt);
+
+err_out_free_eq:
+	mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
+
+err_out_free_pages:
+	for (i = 0; i < npages; ++i)
+		if (eq->page_list[i].buf)
+			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+					  eq->page_list[i].buf,
+					  eq->page_list[i].map);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+err_out_free:
+	kfree(eq->page_list);
+	kfree(dma_list);
+
+err_out:
+	return err;
+}
+
+static void mlx4_free_eq(struct mlx4_dev *dev,
+			 struct mlx4_eq *eq)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_cmd_mailbox *mailbox;
+	int err;
+	int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
+	int i;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return;
+
+	err = mlx4_HW2SW_EQ(dev, mailbox, eq->eqn);
+	if (err)
+		mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err);
+
+	if (0) {
+		mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
+		for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) {
+			if (i % 4 == 0)
+				printk("[%02x] ", i * 4);
+			printk(" %08x", be32_to_cpup(mailbox->buf + i * 4));
+			if ((i + 1) % 4 == 0)
+				printk("\n");
+		}
+	}
+
+	mlx4_mtt_cleanup(dev, &eq->mtt);
+	for (i = 0; i < npages; ++i)
+		pci_free_consistent(dev->pdev, PAGE_SIZE,
+				    eq->page_list[i].buf,
+				    eq->page_list[i].map);
+
+	kfree(eq->page_list);
+	mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn);
+	mlx4_free_cmd_mailbox(dev, mailbox);
+}
+
+static void mlx4_free_irqs(struct mlx4_dev *dev)
+{
+	struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
+	int i;
+
+	if (eq_table->have_irq)
+		free_irq(dev->pdev->irq, dev);
+	for (i = 0; i < MLX4_NUM_EQ; ++i)
+		if (eq_table->eq[i].have_irq)
+			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
+}
+
+static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
+				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
+	if (!priv->clr_base) {
+		mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	iounmap(priv->clr_base);
+}
+
+int __devinit mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int ret;
+
+	/*
+	 * We assume that mapping one page is enough for the whole EQ
+	 * context table.  This is fine with all current HCAs, because
+	 * we only use 32 EQs and each EQ uses 64 bytes of context
+	 * memory, or 1 KB total.
+	 */
+	priv->eq_table.icm_virt = icm_virt;
+	priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER);
+	if (!priv->eq_table.icm_page)
+		return -ENOMEM;
+	priv->eq_table.icm_dma  = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0,
+					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	if (pci_dma_mapping_error(priv->eq_table.icm_dma)) {
+		__free_page(priv->eq_table.icm_page);
+		return -ENOMEM;
+	}
+
+	ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt);
+	if (ret) {
+		pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
+			       PCI_DMA_BIDIRECTIONAL);
+		__free_page(priv->eq_table.icm_page);
+	}
+
+	return ret;
+}
+
+void mlx4_unmap_eq_icm(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1);
+	pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE,
+		       PCI_DMA_BIDIRECTIONAL);
+	__free_page(priv->eq_table.icm_page);
+}
+
+int __devinit mlx4_init_eq_table(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int err;
+	int i;
+
+	err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
+			       dev->caps.num_eqs - 1, dev->caps.reserved_eqs);
+	if (err)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
+		priv->eq_table.uar_map[i] = NULL;
+
+	err = mlx4_map_clr_int(dev);
+	if (err)
+		goto err_out_free;
+
+	priv->eq_table.clr_mask =
+		swab32(1 << (priv->eq_table.inta_pin & 31));
+	priv->eq_table.clr_int  = priv->clr_base +
+		(priv->eq_table.inta_pin < 32 ? 4 : 0);
+
+	err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
+			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0,
+			     &priv->eq_table.eq[MLX4_EQ_COMP]);
+	if (err)
+		goto err_out_unmap;
+
+	err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
+			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0,
+			     &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+	if (err)
+		goto err_out_comp;
+
+	if (dev->flags & MLX4_FLAG_MSI_X) {
+		static const char *eq_name[] = {
+			[MLX4_EQ_COMP]  = DRV_NAME " (comp)",
+			[MLX4_EQ_ASYNC] = DRV_NAME " (async)",
+			[MLX4_EQ_CATAS] = DRV_NAME " (catas)"
+		};
+
+		err = mlx4_create_eq(dev, 1, MLX4_EQ_CATAS,
+				     &priv->eq_table.eq[MLX4_EQ_CATAS]);
+		if (err)
+			goto err_out_async;
+
+		for (i = 0; i < MLX4_EQ_CATAS; ++i) {
+			err = request_irq(priv->eq_table.eq[i].irq,
+					  mlx4_msi_x_interrupt,
+					  0, eq_name[i], priv->eq_table.eq + i);
+			if (err)
+				goto err_out_catas;
+
+			priv->eq_table.eq[i].have_irq = 1;
+		}
+
+		err = request_irq(priv->eq_table.eq[MLX4_EQ_CATAS].irq,
+				  mlx4_catas_interrupt, 0,
+				  eq_name[MLX4_EQ_CATAS], dev);
+		if (err)
+			goto err_out_catas;
+
+		priv->eq_table.eq[MLX4_EQ_CATAS].have_irq = 1;
+	} else {
+		err = request_irq(dev->pdev->irq, mlx4_interrupt,
+				  SA_SHIRQ, DRV_NAME, dev);
+		if (err)
+			goto err_out_async;
+
+		priv->eq_table.have_irq = 1;
+	}
+
+	err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
+			  priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+	if (err)
+		mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
+			   priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
+
+	for (i = 0; i < MLX4_EQ_CATAS; ++i)
+		eq_set_ci(&priv->eq_table.eq[i], 1);
+
+	if (dev->flags & MLX4_FLAG_MSI_X) {
+		err = mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 0,
+				  priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
+		if (err)
+			mlx4_warn(dev, "MAP_EQ for catas EQ %d failed (%d)\n",
+				  priv->eq_table.eq[MLX4_EQ_CATAS].eqn, err);
+	}
+
+	return 0;
+
+err_out_catas:
+	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
+
+err_out_async:
+	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+
+err_out_comp:
+	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]);
+
+err_out_unmap:
+	mlx4_unmap_clr_int(dev);
+	mlx4_free_irqs(dev);
+
+err_out_free:
+	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+	return err;
+}
+
+void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int i;
+
+	if (dev->flags & MLX4_FLAG_MSI_X)
+		mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 1,
+			    priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
+
+	mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
+		    priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+
+	mlx4_free_irqs(dev);
+
+	for (i = 0; i < MLX4_EQ_CATAS; ++i)
+		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
+	if (dev->flags & MLX4_FLAG_MSI_X)
+		mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
+
+	mlx4_unmap_clr_int(dev);
+
+	for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
+		if (priv->eq_table.uar_map[i])
+			iounmap(priv->eq_table.uar_map[i]);
+
+	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+}