Merge branches 'cma', 'ehca', 'ipath', 'iser', 'mlx4' and 'nes' into for-next
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 7fc35cf..c825142 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -175,6 +175,13 @@
 	unsigned int next_wqe_idx;   /* Idx to first wqe to be flushed */
 };
 
+/* function to calculate the next index for the qmap */
+static inline unsigned int next_index(unsigned int cur_index, unsigned int limit)
+{
+	unsigned int temp = cur_index + 1;
+	return (temp == limit) ? 0 : temp;
+}
+
 struct ehca_qp {
 	union {
 		struct ib_qp ib_qp;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 49660df..523e733 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -113,7 +113,7 @@
 			if (h_ret != H_SUCCESS || vpage)
 				goto create_eq_exit2;
 		} else {
-			if (h_ret != H_PAGE_REGISTERED || !vpage)
+			if (h_ret != H_PAGE_REGISTERED)
 				goto create_eq_exit2;
 		}
 	}
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index bec7e02..3b77b67 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -717,6 +717,7 @@
 	const u64 *handle;
 	struct ib_pd *ibpd;
 	int ret, i, eq_size;
+	unsigned long flags;
 
 	handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
 	if (!handle) {
@@ -830,9 +831,9 @@
 		ehca_err(&shca->ib_device,
 			 "Cannot create device attributes  ret=%d", ret);
 
-	spin_lock(&shca_list_lock);
+	spin_lock_irqsave(&shca_list_lock, flags);
 	list_add(&shca->shca_list, &shca_list);
-	spin_unlock(&shca_list_lock);
+	spin_unlock_irqrestore(&shca_list_lock, flags);
 
 	return 0;
 
@@ -878,6 +879,7 @@
 static int __devexit ehca_remove(struct of_device *dev)
 {
 	struct ehca_shca *shca = dev->dev.driver_data;
+	unsigned long flags;
 	int ret;
 
 	sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp);
@@ -915,9 +917,9 @@
 
 	ib_dealloc_device(&shca->ib_device);
 
-	spin_lock(&shca_list_lock);
+	spin_lock_irqsave(&shca_list_lock, flags);
 	list_del(&shca->shca_list);
-	spin_unlock(&shca_list_lock);
+	spin_unlock_irqrestore(&shca_list_lock, flags);
 
 	return ret;
 }
@@ -975,6 +977,7 @@
 			     unsigned long action, void *data)
 {
 	static unsigned long ehca_dmem_warn_time;
+	unsigned long flags;
 
 	switch (action) {
 	case MEM_CANCEL_OFFLINE:
@@ -985,12 +988,12 @@
 	case MEM_GOING_ONLINE:
 	case MEM_GOING_OFFLINE:
 		/* only ok if no hca is attached to the lpar */
-		spin_lock(&shca_list_lock);
+		spin_lock_irqsave(&shca_list_lock, flags);
 		if (list_empty(&shca_list)) {
-			spin_unlock(&shca_list_lock);
+			spin_unlock_irqrestore(&shca_list_lock, flags);
 			return NOTIFY_OK;
 		} else {
-			spin_unlock(&shca_list_lock);
+			spin_unlock_irqrestore(&shca_list_lock, flags);
 			if (printk_timed_ratelimit(&ehca_dmem_warn_time,
 						   30 * 1000))
 				ehca_gen_err("DMEM operations are not allowed"
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index cadbf0c..f161cf1 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -1138,14 +1138,14 @@
 		return -EFAULT;
 	}
 
-	tail_idx = (qmap->tail + 1) % qmap->entries;
+	tail_idx = next_index(qmap->tail, qmap->entries);
 	wqe_idx = q_ofs / ipz_queue->qe_size;
 
 	/* check all processed wqes, whether a cqe is requested or not */
 	while (tail_idx != wqe_idx) {
 		if (qmap->map[tail_idx].cqe_req)
 			qmap->left_to_poll++;
-		tail_idx = (tail_idx + 1) % qmap->entries;
+		tail_idx = next_index(tail_idx, qmap->entries);
 	}
 	/* save index in queue, where we have to start flushing */
 	qmap->next_wqe_idx = wqe_idx;
@@ -1195,14 +1195,14 @@
 	} else {
 		spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
 		my_qp->sq_map.left_to_poll = 0;
-		my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
-						my_qp->sq_map.entries;
+		my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+							my_qp->sq_map.entries);
 		spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
 
 		spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
 		my_qp->rq_map.left_to_poll = 0;
-		my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
-						my_qp->rq_map.entries;
+		my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+							my_qp->rq_map.entries);
 		spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
 	}
 
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 00a648f..c711268 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -726,13 +726,13 @@
 		 * set left_to_poll to 0 because in error state, we will not
 		 * get any additional CQEs
 		 */
-		my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
-						my_qp->sq_map.entries;
+		my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail,
+							my_qp->sq_map.entries);
 		my_qp->sq_map.left_to_poll = 0;
 		ehca_add_to_err_list(my_qp, 1);
 
-		my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
-						my_qp->rq_map.entries;
+		my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail,
+							my_qp->rq_map.entries);
 		my_qp->rq_map.left_to_poll = 0;
 		if (HAS_RQ(my_qp))
 			ehca_add_to_err_list(my_qp, 0);
@@ -860,9 +860,8 @@
 
 		/* mark as reported and advance next_wqe pointer */
 		qmap_entry->reported = 1;
-		qmap->next_wqe_idx++;
-		if (qmap->next_wqe_idx == qmap->entries)
-			qmap->next_wqe_idx = 0;
+		qmap->next_wqe_idx = next_index(qmap->next_wqe_idx,
+						qmap->entries);
 		qmap_entry = &qmap->map[qmap->next_wqe_idx];
 
 		wc++; nr++;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index ad0aab6..69c0ce3 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -661,6 +661,8 @@
 static void __devexit cleanup_device(struct ipath_devdata *dd)
 {
 	int port;
+	struct ipath_portdata **tmp;
+	unsigned long flags;
 
 	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
 		/* can't do anything more with chip; needs re-init */
@@ -742,20 +744,21 @@
 
 	/*
 	 * free any resources still in use (usually just kernel ports)
-	 * at unload; we do for portcnt, not cfgports, because cfgports
-	 * could have changed while we were loaded.
+	 * at unload; we do for portcnt, because that's what we allocate.
+	 * We acquire lock to be really paranoid that ipath_pd isn't being
+	 * accessed from some interrupt-related code (that should not happen,
+	 * but best to be sure).
 	 */
+	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
+	tmp = dd->ipath_pd;
+	dd->ipath_pd = NULL;
+	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
 	for (port = 0; port < dd->ipath_portcnt; port++) {
-		struct ipath_portdata *pd = dd->ipath_pd[port];
-		dd->ipath_pd[port] = NULL;
+		struct ipath_portdata *pd = tmp[port];
+		tmp[port] = NULL; /* debugging paranoia */
 		ipath_free_pddata(dd, pd);
 	}
-	kfree(dd->ipath_pd);
-	/*
-	 * debuggability, in case some cleanup path tries to use it
-	 * after this
-	 */
-	dd->ipath_pd = NULL;
+	kfree(tmp);
 }
 
 static void __devexit ipath_remove_one(struct pci_dev *pdev)
@@ -2586,6 +2589,7 @@
 {
 	int ret, i;
 	struct ipath_devdata *dd = ipath_lookup(unit);
+	unsigned long flags;
 
 	if (!dd) {
 		ret = -ENODEV;
@@ -2611,18 +2615,21 @@
 		goto bail;
 	}
 
+	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
 	if (dd->ipath_pd)
 		for (i = 1; i < dd->ipath_cfgports; i++) {
-			if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
-				ipath_dbg("unit %u port %d is in use "
-					  "(PID %u cmd %s), can't reset\n",
-					  unit, i,
-					  pid_nr(dd->ipath_pd[i]->port_pid),
-					  dd->ipath_pd[i]->port_comm);
-				ret = -EBUSY;
-				goto bail;
-			}
+			if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
+				continue;
+			spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
+			ipath_dbg("unit %u port %d is in use "
+				  "(PID %u cmd %s), can't reset\n",
+				  unit, i,
+				  pid_nr(dd->ipath_pd[i]->port_pid),
+				  dd->ipath_pd[i]->port_comm);
+			ret = -EBUSY;
+			goto bail;
 		}
+	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
 
 	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
 		teardown_sdma(dd);
@@ -2656,9 +2663,12 @@
 {
 	int i, sub, any = 0;
 	struct pid *pid;
+	unsigned long flags;
 
 	if (!dd->ipath_pd)
 		return 0;
+
+	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
 	for (i = 1; i < dd->ipath_cfgports; i++) {
 		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
 			continue;
@@ -2682,6 +2692,7 @@
 			any++;
 		}
 	}
+	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
 	return any;
 }
 
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 1af1f3a..239d4e8 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -223,8 +223,13 @@
 			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
 	}
 
-	kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
-		dd->ipath_palign;
+	/*
+	 * All user buffers are 2KB buffers.  If we ever support
+	 * giving 4KB buffers to user processes, this will need some
+	 * work.
+	 */
+	kinfo->spi_pioindex = (kinfo->spi_piobufbase -
+		(dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign;
 	kinfo->spi_pioalign = dd->ipath_palign;
 
 	kinfo->spi_qpair = IPATH_KD_QP;
@@ -2041,7 +2046,9 @@
 	struct ipath_filedata *fd;
 	struct ipath_portdata *pd;
 	struct ipath_devdata *dd;
+	unsigned long flags;
 	unsigned port;
+	struct pid *pid;
 
 	ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
 		   (long)in->i_rdev, fp->private_data);
@@ -2074,14 +2081,13 @@
 		mutex_unlock(&ipath_mutex);
 		goto bail;
 	}
+	/* early; no interrupt users after this */
+	spin_lock_irqsave(&dd->ipath_uctxt_lock, flags);
 	port = pd->port_port;
-
-	if (pd->port_hdrqfull) {
-		ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
-			   "during run\n", pd->port_comm, pid_nr(pd->port_pid),
-			   pd->port_hdrqfull);
-		pd->port_hdrqfull = 0;
-	}
+	dd->ipath_pd[port] = NULL;
+	pid = pd->port_pid;
+	pd->port_pid = NULL;
+	spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags);
 
 	if (pd->port_rcvwait_to || pd->port_piowait_to
 	    || pd->port_rcvnowait || pd->port_pionowait) {
@@ -2138,13 +2144,11 @@
 			unlock_expected_tids(pd);
 		ipath_stats.sps_ports--;
 		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
-			   pd->port_comm, pid_nr(pd->port_pid),
+			   pd->port_comm, pid_nr(pid),
 			   dd->ipath_unit, port);
 	}
 
-	put_pid(pd->port_pid);
-	pd->port_pid = NULL;
-	dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
+	put_pid(pid);
 	mutex_unlock(&ipath_mutex);
 	ipath_free_pddata(dd, pd); /* after releasing the mutex */
 
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index 8bb5170..53912c3 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -86,7 +86,7 @@
 	*dentry = NULL;
 	mutex_lock(&parent->d_inode->i_mutex);
 	*dentry = lookup_one_len(name, parent, strlen(name));
-	if (!IS_ERR(dentry))
+	if (!IS_ERR(*dentry))
 		error = ipathfs_mknod(parent->d_inode, *dentry,
 				      mode, fops, data);
 	else
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 421cc2a..fbf8c537 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -721,6 +721,12 @@
 				 INFINIPATH_HWE_SERDESPLLFAILED);
 	}
 
+	dd->ibdeltainprog = 1;
+	dd->ibsymsnap =
+	     ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+	dd->iblnkerrsnap =
+	     ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
 	config1 = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig1);
 
@@ -810,6 +816,36 @@
 {
 	u64 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
 
+	if (dd->ibsymdelta || dd->iblnkerrdelta ||
+	    dd->ibdeltainprog) {
+		u64 diagc;
+		/* enable counter writes */
+		diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl,
+				 diagc | INFINIPATH_DC_COUNTERWREN);
+
+		if (dd->ibsymdelta || dd->ibdeltainprog) {
+			val = ipath_read_creg32(dd,
+					dd->ipath_cregs->cr_ibsymbolerrcnt);
+			if (dd->ibdeltainprog)
+				val -= val - dd->ibsymsnap;
+			val -= dd->ibsymdelta;
+			ipath_write_creg(dd,
+				  dd->ipath_cregs->cr_ibsymbolerrcnt, val);
+		}
+		if (dd->iblnkerrdelta || dd->ibdeltainprog) {
+			val = ipath_read_creg32(dd,
+					dd->ipath_cregs->cr_iblinkerrrecovcnt);
+			if (dd->ibdeltainprog)
+				val -= val - dd->iblnkerrsnap;
+			val -= dd->iblnkerrdelta;
+			ipath_write_creg(dd,
+				   dd->ipath_cregs->cr_iblinkerrrecovcnt, val);
+	     }
+
+	     /* and disable counter writes */
+	     ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc);
+	}
 	val |= INFINIPATH_SERDC0_TXIDLE;
 	ipath_dbg("Setting TxIdleEn on serdes (config0 = %llx)\n",
 		  (unsigned long long) val);
@@ -1749,6 +1785,31 @@
 
 static int ipath_pe_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
 {
+	if (ibup) {
+		if (dd->ibdeltainprog) {
+			dd->ibdeltainprog = 0;
+			dd->ibsymdelta +=
+				ipath_read_creg32(dd,
+				  dd->ipath_cregs->cr_ibsymbolerrcnt) -
+				dd->ibsymsnap;
+			dd->iblnkerrdelta +=
+				ipath_read_creg32(dd,
+				  dd->ipath_cregs->cr_iblinkerrrecovcnt) -
+				dd->iblnkerrsnap;
+		}
+	} else {
+		dd->ipath_lli_counter = 0;
+		if (!dd->ibdeltainprog) {
+			dd->ibdeltainprog = 1;
+			dd->ibsymsnap =
+				ipath_read_creg32(dd,
+				  dd->ipath_cregs->cr_ibsymbolerrcnt);
+			dd->iblnkerrsnap =
+				ipath_read_creg32(dd,
+				  dd->ipath_cregs->cr_iblinkerrrecovcnt);
+		}
+	}
+
 	ipath_setup_pe_setextled(dd, ipath_ib_linkstate(dd, ibcs),
 		ipath_ib_linktrstate(dd, ibcs));
 	return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index 9839e20..b2a9d4c 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -951,6 +951,12 @@
 				 INFINIPATH_HWE_SERDESPLLFAILED);
 	}
 
+	dd->ibdeltainprog = 1;
+	dd->ibsymsnap =
+	     ipath_read_creg32(dd, dd->ipath_cregs->cr_ibsymbolerrcnt);
+	dd->iblnkerrsnap =
+	     ipath_read_creg32(dd, dd->ipath_cregs->cr_iblinkerrrecovcnt);
+
 	if (!dd->ipath_ibcddrctrl) {
 		/* not on re-init after reset */
 		dd->ipath_ibcddrctrl =
@@ -1084,6 +1090,37 @@
 static void ipath_7220_quiet_serdes(struct ipath_devdata *dd)
 {
 	u64 val;
+	if (dd->ibsymdelta || dd->iblnkerrdelta ||
+	    dd->ibdeltainprog) {
+		u64 diagc;
+		/* enable counter writes */
+		diagc = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwdiagctrl);
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl,
+				 diagc | INFINIPATH_DC_COUNTERWREN);
+
+		if (dd->ibsymdelta || dd->ibdeltainprog) {
+			val = ipath_read_creg32(dd,
+					dd->ipath_cregs->cr_ibsymbolerrcnt);
+			if (dd->ibdeltainprog)
+				val -= val - dd->ibsymsnap;
+			val -= dd->ibsymdelta;
+			ipath_write_creg(dd,
+				  dd->ipath_cregs->cr_ibsymbolerrcnt, val);
+		}
+		if (dd->iblnkerrdelta || dd->ibdeltainprog) {
+			val = ipath_read_creg32(dd,
+					dd->ipath_cregs->cr_iblinkerrrecovcnt);
+			if (dd->ibdeltainprog)
+				val -= val - dd->iblnkerrsnap;
+			val -= dd->iblnkerrdelta;
+			ipath_write_creg(dd,
+				   dd->ipath_cregs->cr_iblinkerrrecovcnt, val);
+	     }
+
+	     /* and disable counter writes */
+	     ipath_write_kreg(dd, dd->ipath_kregs->kr_hwdiagctrl, diagc);
+	}
+
 	dd->ipath_flags &= ~IPATH_IB_AUTONEG_INPROG;
 	wake_up(&dd->ipath_autoneg_wait);
 	cancel_delayed_work(&dd->ipath_autoneg_work);
@@ -2325,7 +2362,7 @@
 
 static int ipath_7220_ib_updown(struct ipath_devdata *dd, int ibup, u64 ibcs)
 {
-	int ret = 0;
+	int ret = 0, symadj = 0;
 	u32 ltstate = ipath_ib_linkstate(dd, ibcs);
 
 	dd->ipath_link_width_active =
@@ -2368,6 +2405,13 @@
 			ipath_dbg("DDR negotiation try, %u/%u\n",
 				dd->ipath_autoneg_tries,
 				IPATH_AUTONEG_TRIES);
+			if (!dd->ibdeltainprog) {
+				dd->ibdeltainprog = 1;
+				dd->ibsymsnap = ipath_read_creg32(dd,
+					dd->ipath_cregs->cr_ibsymbolerrcnt);
+				dd->iblnkerrsnap = ipath_read_creg32(dd,
+					dd->ipath_cregs->cr_iblinkerrrecovcnt);
+			}
 			try_auto_neg(dd);
 			ret = 1; /* no other IB status change processing */
 		} else if ((dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)
@@ -2388,6 +2432,7 @@
 				set_speed_fast(dd,
 					dd->ipath_link_speed_enabled);
 				wake_up(&dd->ipath_autoneg_wait);
+				symadj = 1;
 			} else if (dd->ipath_flags & IPATH_IB_AUTONEG_FAILED) {
 				/*
 				 * clear autoneg failure flag, and do setup
@@ -2403,22 +2448,28 @@
 					IBA7220_IBC_IBTA_1_2_MASK;
 				ipath_write_kreg(dd,
 					IPATH_KREG_OFFSET(IBNCModeCtrl), 0);
+				symadj = 1;
 			}
 		}
 		/*
-		 * if we are in 1X, and are in autoneg width, it
-		 * could be due to an xgxs problem, so if we haven't
+		 * if we are in 1X on rev1 only, and are in autoneg width,
+		 * it could be due to an xgxs problem, so if we haven't
 		 * already tried, try twice to get to 4X; if we
 		 * tried, and couldn't, report it, since it will
 		 * probably not be what is desired.
 		 */
-		if ((dd->ipath_link_width_enabled & (IB_WIDTH_1X |
+		if (dd->ipath_minrev == 1 &&
+		    (dd->ipath_link_width_enabled & (IB_WIDTH_1X |
 			IB_WIDTH_4X)) == (IB_WIDTH_1X | IB_WIDTH_4X)
 			&& dd->ipath_link_width_active == IB_WIDTH_1X
 			&& dd->ipath_x1_fix_tries < 3) {
-			if (++dd->ipath_x1_fix_tries == 3)
+		     if (++dd->ipath_x1_fix_tries == 3) {
 				dev_info(&dd->pcidev->dev,
 					"IB link is in 1X mode\n");
+				if (!(dd->ipath_flags &
+				      IPATH_IB_AUTONEG_INPROG))
+					symadj = 1;
+		     }
 			else {
 				ipath_cdbg(VERBOSE, "IB 1X in "
 					"auto-width, try %u to be "
@@ -2429,7 +2480,8 @@
 				dd->ipath_f_xgxs_reset(dd);
 				ret = 1; /* skip other processing */
 			}
-		}
+		} else if (!(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG))
+			symadj = 1;
 
 		if (!ret) {
 			dd->delay_mult = rate_to_delay
@@ -2440,6 +2492,25 @@
 		}
 	}
 
+	if (symadj) {
+		if (dd->ibdeltainprog) {
+			dd->ibdeltainprog = 0;
+			dd->ibsymdelta += ipath_read_creg32(dd,
+				dd->ipath_cregs->cr_ibsymbolerrcnt) -
+				dd->ibsymsnap;
+			dd->iblnkerrdelta += ipath_read_creg32(dd,
+				dd->ipath_cregs->cr_iblinkerrrecovcnt) -
+				dd->iblnkerrsnap;
+		}
+	} else if (!ibup && !dd->ibdeltainprog
+		   && !(dd->ipath_flags & IPATH_IB_AUTONEG_INPROG)) {
+		dd->ibdeltainprog = 1;
+		dd->ibsymsnap =	ipath_read_creg32(dd,
+				     dd->ipath_cregs->cr_ibsymbolerrcnt);
+		dd->iblnkerrsnap = ipath_read_creg32(dd,
+				     dd->ipath_cregs->cr_iblinkerrrecovcnt);
+	}
+
 	if (!ret)
 		ipath_setup_7220_setextled(dd, ipath_ib_linkstate(dd, ibcs),
 			ltstate);
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 3e5baa4..64aeefb 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -229,6 +229,7 @@
 	spin_lock_init(&dd->ipath_kernel_tid_lock);
 	spin_lock_init(&dd->ipath_user_tid_lock);
 	spin_lock_init(&dd->ipath_sendctrl_lock);
+	spin_lock_init(&dd->ipath_uctxt_lock);
 	spin_lock_init(&dd->ipath_sdma_lock);
 	spin_lock_init(&dd->ipath_gpio_lock);
 	spin_lock_init(&dd->ipath_eep_st_lock);
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 0bd8bcb..6ba4861 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -355,6 +355,19 @@
 	/* errors masked because they occur too fast */
 	ipath_err_t ipath_maskederrs;
 	u64 ipath_lastlinkrecov; /* link recoveries at last ACTIVE */
+	/* these 5 fields are used to establish deltas for IB Symbol
+	 * errors and linkrecovery errors. They can be reported on
+	 * some chips during link negotiation prior to INIT, and with
+	 * DDR when faking DDR negotiations with non-IBTA switches.
+	 * The chip counters are adjusted at driver unload if there is
+	 * a non-zero delta.
+	 */
+	u64 ibdeltainprog;
+	u64 ibsymdelta;
+	u64 ibsymsnap;
+	u64 iblnkerrdelta;
+	u64 iblnkerrsnap;
+
 	/* time in jiffies at which to re-enable maskederrs */
 	unsigned long ipath_unmasktime;
 	/* count of egrfull errors, combined for all ports */
@@ -464,6 +477,8 @@
 	spinlock_t ipath_kernel_tid_lock;
 	spinlock_t ipath_user_tid_lock;
 	spinlock_t ipath_sendctrl_lock;
+	/* around ipath_pd and (user ports) port_cnt use (intr vs free) */
+	spinlock_t ipath_uctxt_lock;
 
 	/*
 	 * IPATH_STATUS_*,
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index 8f32b17..c0e933f 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -132,6 +132,7 @@
 	 * (see ipath_get_dma_mr and ipath_dma.c).
 	 */
 	if (sge->lkey == 0) {
+		/* always a kernel port, no locking needed */
 		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
 
 		if (pd->user) {
@@ -211,6 +212,7 @@
 	 * (see ipath_get_dma_mr and ipath_dma.c).
 	 */
 	if (rkey == 0) {
+		/* always a kernel port, no locking needed */
 		struct ipath_pd *pd = to_ipd(qp->ibqp.pd);
 
 		if (pd->user) {
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index be4fc9a..17a1231 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -348,6 +348,7 @@
  */
 static int get_pkeys(struct ipath_devdata *dd, u16 * pkeys)
 {
+	/* always a kernel port, no locking needed */
 	struct ipath_portdata *pd = dd->ipath_pd[0];
 
 	memcpy(pkeys, pd->port_pkeys, sizeof(pd->port_pkeys));
@@ -730,6 +731,7 @@
 	int i;
 	int changed = 0;
 
+	/* always a kernel port, no locking needed */
 	pd = dd->ipath_pd[0];
 
 	for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 4715911..3a5a89b 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -745,6 +745,7 @@
 	struct ipath_swqe *swq = NULL;
 	struct ipath_ibdev *dev;
 	size_t sz;
+	size_t sg_list_sz;
 	struct ib_qp *ret;
 
 	if (init_attr->create_flags) {
@@ -789,19 +790,31 @@
 			goto bail;
 		}
 		sz = sizeof(*qp);
+		sg_list_sz = 0;
 		if (init_attr->srq) {
 			struct ipath_srq *srq = to_isrq(init_attr->srq);
 
-			sz += sizeof(*qp->r_sg_list) *
-				srq->rq.max_sge;
-		} else
-			sz += sizeof(*qp->r_sg_list) *
-				init_attr->cap.max_recv_sge;
-		qp = kmalloc(sz, GFP_KERNEL);
+			if (srq->rq.max_sge > 1)
+				sg_list_sz = sizeof(*qp->r_sg_list) *
+					(srq->rq.max_sge - 1);
+		} else if (init_attr->cap.max_recv_sge > 1)
+			sg_list_sz = sizeof(*qp->r_sg_list) *
+				(init_attr->cap.max_recv_sge - 1);
+		qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
 		if (!qp) {
 			ret = ERR_PTR(-ENOMEM);
 			goto bail_swq;
 		}
+		if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
+		    init_attr->qp_type == IB_QPT_SMI ||
+		    init_attr->qp_type == IB_QPT_GSI)) {
+			qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
+			if (!qp->r_ud_sg_list) {
+				ret = ERR_PTR(-ENOMEM);
+				goto bail_qp;
+			}
+		} else
+			qp->r_ud_sg_list = NULL;
 		if (init_attr->srq) {
 			sz = 0;
 			qp->r_rq.size = 0;
@@ -818,7 +831,7 @@
 					      qp->r_rq.size * sz);
 			if (!qp->r_rq.wq) {
 				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
+				goto bail_sg_list;
 			}
 		}
 
@@ -848,7 +861,7 @@
 		if (err) {
 			ret = ERR_PTR(err);
 			vfree(qp->r_rq.wq);
-			goto bail_qp;
+			goto bail_sg_list;
 		}
 		qp->ip = NULL;
 		qp->s_tx = NULL;
@@ -925,6 +938,8 @@
 		vfree(qp->r_rq.wq);
 	ipath_free_qp(&dev->qp_table, qp);
 	free_qpn(&dev->qp_table, qp->ibqp.qp_num);
+bail_sg_list:
+	kfree(qp->r_ud_sg_list);
 bail_qp:
 	kfree(qp);
 bail_swq:
@@ -989,6 +1004,7 @@
 		kref_put(&qp->ip->ref, ipath_release_mmap_info);
 	else
 		vfree(qp->r_rq.wq);
+	kfree(qp->r_ud_sg_list);
 	vfree(qp->s_wq);
 	kfree(qp);
 	return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index 7b93cda..9170710 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -573,9 +573,8 @@
 		ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
 		qp->s_state = OP(RDMA_READ_REQUEST);
 		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
-		bth2 = qp->s_psn++ & IPATH_PSN_MASK;
-		if (ipath_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
+		bth2 = qp->s_psn & IPATH_PSN_MASK;
+		qp->s_psn = wqe->lpsn + 1;
 		ss = NULL;
 		len = 0;
 		qp->s_cur++;
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 284c9bc..8e255ad 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -698,10 +698,8 @@
 
 	addr = dma_map_single(&dd->pcidev->dev, tx->txreq.map_addr,
 			      tx->map_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(&dd->pcidev->dev, addr)) {
-		ret = -EIO;
-		goto unlock;
-	}
+	if (dma_mapping_error(&dd->pcidev->dev, addr))
+		goto ioerr;
 
 	dwoffset = tx->map_len >> 2;
 	make_sdma_desc(dd, sdmadesc, (u64) addr, dwoffset, 0);
@@ -741,6 +739,8 @@
 		dw = (len + 3) >> 2;
 		addr = dma_map_single(&dd->pcidev->dev, sge->vaddr, dw << 2,
 				      DMA_TO_DEVICE);
+		if (dma_mapping_error(&dd->pcidev->dev, addr))
+			goto unmap;
 		make_sdma_desc(dd, sdmadesc, (u64) addr, dw, dwoffset);
 		/* SDmaUseLargeBuf has to be set in every descriptor */
 		if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_USELARGEBUF)
@@ -798,7 +798,18 @@
 	list_add_tail(&tx->txreq.list, &dd->ipath_sdma_activelist);
 	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_VL15)
 		vl15_watchdog_enq(dd);
+	goto unlock;
 
+unmap:
+	while (tail != dd->ipath_sdma_descq_tail) {
+		if (!tail)
+			tail = dd->ipath_sdma_descq_cnt - 1;
+		else
+			tail--;
+		unmap_desc(dd, tail);
+	}
+ioerr:
+	ret = -EIO;
 unlock:
 	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
 fail:
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index c8e3d65..f63e143 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -112,6 +112,14 @@
 			dd->ipath_lastrpkts = val;
 		}
 		val64 = dd->ipath_rpkts;
+	} else if (creg == dd->ipath_cregs->cr_ibsymbolerrcnt) {
+		if (dd->ibdeltainprog)
+			val64 -= val64 - dd->ibsymsnap;
+		val64 -= dd->ibsymdelta;
+	} else if (creg == dd->ipath_cregs->cr_iblinkerrrecovcnt) {
+		if (dd->ibdeltainprog)
+			val64 -= val64 - dd->iblnkerrsnap;
+		val64 -= dd->iblnkerrdelta;
 	} else
 		val64 = (u64) val;
 
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 729446f..91c74cc 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -70,8 +70,6 @@
 		goto done;
 	}
 
-	rsge.sg_list = NULL;
-
 	/*
 	 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
 	 * Qkeys with the high order bit set mean use the
@@ -115,21 +113,6 @@
 		rq = &qp->r_rq;
 	}
 
-	if (rq->max_sge > 1) {
-		/*
-		 * XXX We could use GFP_KERNEL if ipath_do_send()
-		 * was always called from the tasklet instead of
-		 * from ipath_post_send().
-		 */
-		rsge.sg_list = kmalloc((rq->max_sge - 1) *
-					sizeof(struct ipath_sge),
-				       GFP_ATOMIC);
-		if (!rsge.sg_list) {
-			dev->n_pkt_drops++;
-			goto drop;
-		}
-	}
-
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 * Note that it is safe to drop the lock after changing rq->tail
@@ -147,6 +130,7 @@
 		goto drop;
 	}
 	wqe = get_rwqe_ptr(rq, tail);
+	rsge.sg_list = qp->r_ud_sg_list;
 	if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
 		spin_unlock_irqrestore(&rq->lock, flags);
 		dev->n_pkt_drops++;
@@ -242,7 +226,6 @@
 	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
 		       swqe->wr.send_flags & IB_SEND_SOLICITED);
 drop:
-	kfree(rsge.sg_list);
 	if (atomic_dec_and_test(&qp->refcount))
 		wake_up(&qp->wait);
 done:;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index eabc424..cdf0e6a 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -1852,7 +1852,7 @@
 }
 
 /**
- * ipath_get_pkey - return the indexed PKEY from the port 0 PKEY table
+ * ipath_get_pkey - return the indexed PKEY from the port PKEY table
  * @dd: the infinipath device
  * @index: the PKEY index
  */
@@ -1860,6 +1860,7 @@
 {
 	unsigned ret;
 
+	/* always a kernel port, no locking needed */
 	if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
 		ret = 0;
 	else
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 9d12ae8..11e3f61 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -431,6 +431,7 @@
 	u32 s_lsn;		/* limit sequence number (credit) */
 	struct ipath_swqe *s_wq;	/* send work queue */
 	struct ipath_swqe *s_wqe;
+	struct ipath_sge *r_ud_sg_list;
 	struct ipath_rq r_rq;		/* receive work queue */
 	struct ipath_sge r_sg_list[0];	/* verified SGEs */
 };
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 1830849..8415ecc 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -222,7 +222,7 @@
 	}
 
 	err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
-			    cq->db.dma, &cq->mcq, 0);
+			    cq->db.dma, &cq->mcq, vector, 0);
 	if (err)
 		goto err_dbmap;
 
@@ -325,15 +325,17 @@
 
 static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
 {
-	struct mlx4_cqe *cqe;
+	struct mlx4_cqe *cqe, *new_cqe;
 	int i;
 
 	i = cq->mcq.cons_index;
 	cqe = get_cqe(cq, i & cq->ibcq.cqe);
 	while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
-		memcpy(get_cqe_from_buf(&cq->resize_buf->buf,
-					(i + 1) & cq->resize_buf->cqe),
-			get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+		new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
+					   (i + 1) & cq->resize_buf->cqe);
+		memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+		new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
+			(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
 		cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
 	}
 	++cq->mcq.cons_index;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 2e80f8f..dcefe1f 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -578,7 +578,7 @@
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
 		ibdev->num_ports++;
 	ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
-	ibdev->ib_dev.num_comp_vectors	= 1;
+	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
 
 	ibdev->ib_dev.uverbs_abi_ver	= MLX4_IB_UVERBS_ABI_VERSION;
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 1595dc7..13a5bb1 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -137,14 +137,18 @@
 
 #ifdef CONFIG_INFINIBAND_NES_DEBUG
 #define nes_debug(level, fmt, args...) \
+do { \
 	if (level & nes_debug_level) \
-		printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args)
+		printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \
+} while (0)
 
-#define assert(expr)                                                \
-if (!(expr)) {                                                       \
-	printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n",  \
-		   #expr, __FILE__, __func__, __LINE__);                \
-}
+#define assert(expr) \
+do { \
+	if (!(expr)) { \
+		printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \
+			   #expr, __FILE__, __func__, __LINE__); \
+	} \
+} while (0)
 
 #define NES_EVENT_TIMEOUT   1200000
 #else
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 2caf9da..cb48041 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -86,15 +86,14 @@
 	struct nes_cm_node *);
 static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
 	struct nes_cm_node *);
-static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
+static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
 	struct sk_buff *);
 static int mini_cm_dealloc_core(struct nes_cm_core *);
 static int mini_cm_get(struct nes_cm_core *);
 static int mini_cm_set(struct nes_cm_core *, u32, u32);
 
-static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *,
+static void form_cm_frame(struct sk_buff *, struct nes_cm_node *,
 	void *, u32, void *, u32, u8);
-static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node);
 static int add_ref_cm_node(struct nes_cm_node *);
 static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
 
@@ -251,7 +250,7 @@
  * form_cm_frame - get a free packet and build empty frame Use
  * node info to build.
  */
-static struct sk_buff *form_cm_frame(struct sk_buff *skb,
+static void form_cm_frame(struct sk_buff *skb,
 	struct nes_cm_node *cm_node, void *options, u32 optionsize,
 	void *data, u32 datasize, u8 flags)
 {
@@ -339,7 +338,6 @@
 	skb_shinfo(skb)->nr_frags = 0;
 	cm_packets_created++;
 
-	return skb;
 }
 
 
@@ -356,7 +354,6 @@
 
 	nes_debug(NES_DBG_CM, "State         : %u \n",  core->state);
 
-	nes_debug(NES_DBG_CM, "Tx Free cnt   : %u \n", skb_queue_len(&core->tx_free_list));
 	nes_debug(NES_DBG_CM, "Listen Nodes  : %u \n", atomic_read(&core->listen_node_cnt));
 	nes_debug(NES_DBG_CM, "Active Nodes  : %u \n", atomic_read(&core->node_cnt));
 
@@ -381,8 +378,6 @@
 	int ret = 0;
 	u32 was_timer_set;
 
-	if (!cm_node)
-		return -EINVAL;
 	new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
 	if (!new_send)
 		return -1;
@@ -459,13 +454,23 @@
 	int ret = NETDEV_TX_OK;
 	enum nes_cm_node_state last_state;
 
+	struct list_head timer_list;
+	INIT_LIST_HEAD(&timer_list);
 	spin_lock_irqsave(&cm_core->ht_lock, flags);
 
 	list_for_each_safe(list_node, list_core_temp,
-		&cm_core->connected_nodes) {
+				&cm_core->connected_nodes) {
 		cm_node = container_of(list_node, struct nes_cm_node, list);
-		add_ref_cm_node(cm_node);
-		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+		if (!list_empty(&cm_node->recv_list) || (cm_node->send_entry)) {
+			add_ref_cm_node(cm_node);
+			list_add(&cm_node->timer_entry, &timer_list);
+		}
+	}
+	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+	list_for_each_safe(list_node, list_core_temp, &timer_list) {
+		cm_node = container_of(list_node, struct nes_cm_node,
+					timer_entry);
 		spin_lock_irqsave(&cm_node->recv_list_lock, flags);
 		list_for_each_safe(list_core, list_node_temp,
 			&cm_node->recv_list) {
@@ -519,7 +524,7 @@
 		do {
 			send_entry = cm_node->send_entry;
 			if (!send_entry)
-				continue;
+				break;
 			if (time_after(send_entry->timetosend, jiffies)) {
 				if (cm_node->state != NES_CM_STATE_TSA) {
 					if ((nexttimeout >
@@ -528,18 +533,18 @@
 						nexttimeout =
 							send_entry->timetosend;
 						settimer = 1;
-						continue;
+						break;
 					}
 				} else {
 					free_retrans_entry(cm_node);
-					continue;
+					break;
 				}
 			}
 
 			if ((cm_node->state == NES_CM_STATE_TSA) ||
 				(cm_node->state == NES_CM_STATE_CLOSED)) {
 				free_retrans_entry(cm_node);
-				continue;
+				break;
 			}
 
 			if (!send_entry->retranscount ||
@@ -557,7 +562,7 @@
 						NES_CM_EVENT_ABORTED);
 				spin_lock_irqsave(&cm_node->retrans_list_lock,
 					flags);
-				continue;
+				break;
 			}
 			atomic_inc(&send_entry->skb->users);
 			cm_packets_retrans++;
@@ -583,7 +588,7 @@
 				send_entry->retrycount--;
 				nexttimeout = jiffies + NES_SHORT_TIME;
 				settimer = 1;
-				continue;
+				break;
 			} else {
 				cm_packets_sent++;
 			}
@@ -615,14 +620,12 @@
 
 		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
 		rem_ref_cm_node(cm_node->cm_core, cm_node);
-		spin_lock_irqsave(&cm_core->ht_lock, flags);
 		if (ret != NETDEV_TX_OK) {
 			nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n",
 				cm_node);
 			break;
 		}
 	}
-	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
 
 	if (settimer) {
 		if (!timer_pending(&cm_core->tcp_timer)) {
@@ -683,7 +686,7 @@
 	optionssize += 1;
 
 	if (!skb)
-		skb = get_free_pkt(cm_node);
+		skb = dev_alloc_skb(MAX_CM_BUFFER);
 	if (!skb) {
 		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
 		return -1;
@@ -708,7 +711,7 @@
 	int flags = SET_RST | SET_ACK;
 
 	if (!skb)
-		skb = get_free_pkt(cm_node);
+		skb = dev_alloc_skb(MAX_CM_BUFFER);
 	if (!skb) {
 		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
 		return -1;
@@ -729,7 +732,7 @@
 	int ret;
 
 	if (!skb)
-		skb = get_free_pkt(cm_node);
+		skb = dev_alloc_skb(MAX_CM_BUFFER);
 
 	if (!skb) {
 		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -752,7 +755,7 @@
 
 	/* if we didn't get a frame get one */
 	if (!skb)
-		skb = get_free_pkt(cm_node);
+		skb = dev_alloc_skb(MAX_CM_BUFFER);
 
 	if (!skb) {
 		nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n");
@@ -767,59 +770,15 @@
 
 
 /**
- * get_free_pkt
- */
-static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node)
-{
-	struct sk_buff *skb, *new_skb;
-
-	/* check to see if we need to repopulate the free tx pkt queue */
-	if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) {
-		while (skb_queue_len(&cm_node->cm_core->tx_free_list) <
-				cm_node->cm_core->free_tx_pkt_max) {
-			/* replace the frame we took, we won't get it back */
-			new_skb = dev_alloc_skb(cm_node->cm_core->mtu);
-			BUG_ON(!new_skb);
-			/* add a replacement frame to the free tx list head */
-			skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb);
-		}
-	}
-
-	skb = skb_dequeue(&cm_node->cm_core->tx_free_list);
-
-	return skb;
-}
-
-
-/**
- * make_hashkey - generate hash key from node tuple
- */
-static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port,
-		nes_addr_t rem_addr)
-{
-	u32 hashkey = 0;
-
-	hashkey = loc_addr + rem_addr + loc_port + rem_port;
-	hashkey = (hashkey % NES_CM_HASHTABLE_SIZE);
-
-	return hashkey;
-}
-
-
-/**
  * find_node - find a cm node that matches the reference cm node
  */
 static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
 		u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
 {
 	unsigned long flags;
-	u32 hashkey;
 	struct list_head *hte;
 	struct nes_cm_node *cm_node;
 
-	/* make a hash index key for this packet */
-	hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr);
-
 	/* get a handle on the hte */
 	hte = &cm_core->connected_nodes;
 
@@ -887,7 +846,6 @@
 static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
 {
 	unsigned long flags;
-	u32 hashkey;
 	struct list_head *hte;
 
 	if (!cm_node || !cm_core)
@@ -896,11 +854,6 @@
 	nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
 		cm_node);
 
-	/* first, make an index into our hash table */
-	hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr,
-			cm_node->rem_port, cm_node->rem_addr);
-	cm_node->hashkey = hashkey;
-
 	spin_lock_irqsave(&cm_core->ht_lock, flags);
 
 	/* get a handle on the hash table element (list head for this slot) */
@@ -925,28 +878,36 @@
 	struct list_head *list_pos = NULL;
 	struct list_head *list_temp = NULL;
 	struct nes_cm_node *cm_node = NULL;
+	struct list_head reset_list;
 
 	nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
 		"refcnt=%d\n", listener, free_hanging_nodes,
 		atomic_read(&listener->ref_count));
 	/* free non-accelerated child nodes for this listener */
+	INIT_LIST_HEAD(&reset_list);
 	if (free_hanging_nodes) {
 		spin_lock_irqsave(&cm_core->ht_lock, flags);
 		list_for_each_safe(list_pos, list_temp,
-			&g_cm_core->connected_nodes) {
+				   &g_cm_core->connected_nodes) {
 			cm_node = container_of(list_pos, struct nes_cm_node,
 				list);
 			if ((cm_node->listener == listener) &&
-				(!cm_node->accelerated)) {
-				cleanup_retrans_entry(cm_node);
-				spin_unlock_irqrestore(&cm_core->ht_lock,
-					flags);
-				send_reset(cm_node, NULL);
-				spin_lock_irqsave(&cm_core->ht_lock, flags);
+			    (!cm_node->accelerated)) {
+				add_ref_cm_node(cm_node);
+				list_add(&cm_node->reset_entry, &reset_list);
 			}
 		}
 		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
 	}
+
+	list_for_each_safe(list_pos, list_temp, &reset_list) {
+		cm_node = container_of(list_pos, struct nes_cm_node,
+					reset_entry);
+		cleanup_retrans_entry(cm_node);
+		send_reset(cm_node, NULL);
+		rem_ref_cm_node(cm_node->cm_core, cm_node);
+	}
+
 	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
 	if (!atomic_dec_return(&listener->ref_count)) {
 		list_del(&listener->list);
@@ -1126,7 +1087,10 @@
 
 	cm_node->loopbackpartner = NULL;
 	/* get the mac addr for the remote node */
-	arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+	if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
+		arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
+	else
+		arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
 	if (arpindex < 0) {
 		arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
 		if (arpindex < 0) {
@@ -1306,7 +1270,6 @@
 static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 	struct tcphdr *tcph)
 {
-	atomic_inc(&cm_resets_recvd);
 	nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
 		"refcnt=%d\n", cm_node, cm_node->state,
 		atomic_read(&cm_node->ref_count));
@@ -1344,6 +1307,7 @@
 {
 
 	int	reset = 0;	/* whether to send reset in case of err.. */
+	int	passive_state;
 	atomic_inc(&cm_resets_recvd);
 	nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u."
 			" refcnt=%d\n", cm_node, cm_node->state,
@@ -1357,7 +1321,14 @@
 			cm_node->listener, cm_node->state);
 		active_open_err(cm_node, skb, reset);
 		break;
-	/* For PASSIVE open states, remove the cm_node event */
+	case NES_CM_STATE_MPAREQ_RCVD:
+		passive_state = atomic_add_return(1, &cm_node->passive_state);
+		if (passive_state ==  NES_SEND_RESET_EVENT)
+			create_event(cm_node, NES_CM_EVENT_RESET);
+		cleanup_retrans_entry(cm_node);
+		cm_node->state = NES_CM_STATE_CLOSED;
+		dev_kfree_skb_any(skb);
+		break;
 	case NES_CM_STATE_ESTABLISHED:
 	case NES_CM_STATE_SYN_RCVD:
 	case NES_CM_STATE_LISTENING:
@@ -1365,7 +1336,14 @@
 		passive_open_err(cm_node, skb, reset);
 		break;
 	case NES_CM_STATE_TSA:
+		active_open_err(cm_node, skb, reset);
+		break;
+	case NES_CM_STATE_CLOSED:
+		cleanup_retrans_entry(cm_node);
+		drop_packet(skb);
+		break;
 	default:
+		drop_packet(skb);
 		break;
 	}
 }
@@ -1394,6 +1372,9 @@
 		dev_kfree_skb_any(skb);
 		if (type == NES_CM_EVENT_CONNECTED)
 			cm_node->state = NES_CM_STATE_TSA;
+		else
+			atomic_set(&cm_node->passive_state,
+					NES_PASSIVE_STATE_INDICATED);
 		create_event(cm_node, type);
 
 	}
@@ -1474,7 +1455,7 @@
 	int optionsize;
 
 	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
-	skb_pull(skb, tcph->doff << 2);
+	skb_trim(skb, 0);
 	inc_sequence = ntohl(tcph->seq);
 
 	switch (cm_node->state) {
@@ -1507,6 +1488,10 @@
 		cm_node->state = NES_CM_STATE_SYN_RCVD;
 		send_syn(cm_node, 1, skb);
 		break;
+	case NES_CM_STATE_CLOSED:
+		cleanup_retrans_entry(cm_node);
+		send_reset(cm_node, skb);
+		break;
 	case NES_CM_STATE_TSA:
 	case NES_CM_STATE_ESTABLISHED:
 	case NES_CM_STATE_FIN_WAIT1:
@@ -1515,7 +1500,6 @@
 	case NES_CM_STATE_LAST_ACK:
 	case NES_CM_STATE_CLOSING:
 	case NES_CM_STATE_UNKNOWN:
-	case NES_CM_STATE_CLOSED:
 	default:
 		drop_packet(skb);
 		break;
@@ -1531,7 +1515,7 @@
 	int optionsize;
 
 	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
-	skb_pull(skb, tcph->doff << 2);
+	skb_trim(skb, 0);
 	inc_sequence = ntohl(tcph->seq);
 	switch (cm_node->state) {
 	case NES_CM_STATE_SYN_SENT:
@@ -1555,6 +1539,12 @@
 		/* passive open, so should not be here */
 		passive_open_err(cm_node, skb, 1);
 		break;
+	case NES_CM_STATE_LISTENING:
+	case NES_CM_STATE_CLOSED:
+		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+		cleanup_retrans_entry(cm_node);
+		send_reset(cm_node, skb);
+		break;
 	case NES_CM_STATE_ESTABLISHED:
 	case NES_CM_STATE_FIN_WAIT1:
 	case NES_CM_STATE_FIN_WAIT2:
@@ -1562,7 +1552,6 @@
 	case NES_CM_STATE_TSA:
 	case NES_CM_STATE_CLOSING:
 	case NES_CM_STATE_UNKNOWN:
-	case NES_CM_STATE_CLOSED:
 	case NES_CM_STATE_MPAREQ_SENT:
 	default:
 		drop_packet(skb);
@@ -1577,6 +1566,13 @@
 	u32 inc_sequence;
 	u32 rem_seq_ack;
 	u32 rem_seq;
+	int ret;
+	int optionsize;
+	u32 temp_seq = cm_node->tcp_cntxt.loc_seq_num;
+
+	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
+	cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+
 	if (check_seq(cm_node, tcph, skb))
 		return;
 
@@ -1589,7 +1585,18 @@
 	switch (cm_node->state) {
 	case NES_CM_STATE_SYN_RCVD:
 		/* Passive OPEN */
+		ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
+		if (ret)
+			break;
 		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
+		cm_node->tcp_cntxt.loc_seq_num = temp_seq;
+		if (cm_node->tcp_cntxt.rem_ack_num !=
+		    cm_node->tcp_cntxt.loc_seq_num) {
+			nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n");
+			cleanup_retrans_entry(cm_node);
+			send_reset(cm_node, skb);
+			return;
+		}
 		cm_node->state = NES_CM_STATE_ESTABLISHED;
 		if (datasize) {
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
@@ -1621,11 +1628,15 @@
 			dev_kfree_skb_any(skb);
 		}
 		break;
+	case NES_CM_STATE_LISTENING:
+	case NES_CM_STATE_CLOSED:
+		cleanup_retrans_entry(cm_node);
+		send_reset(cm_node, skb);
+		break;
 	case NES_CM_STATE_FIN_WAIT1:
 	case NES_CM_STATE_SYN_SENT:
 	case NES_CM_STATE_FIN_WAIT2:
 	case NES_CM_STATE_TSA:
-	case NES_CM_STATE_CLOSED:
 	case NES_CM_STATE_MPAREQ_RCVD:
 	case NES_CM_STATE_LAST_ACK:
 	case NES_CM_STATE_CLOSING:
@@ -1648,9 +1659,9 @@
 			nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
 				__func__, cm_node);
 			if (passive)
-				passive_open_err(cm_node, skb, 0);
+				passive_open_err(cm_node, skb, 1);
 			else
-				active_open_err(cm_node, skb, 0);
+				active_open_err(cm_node, skb, 1);
 			return 1;
 		}
 	}
@@ -1970,6 +1981,7 @@
 	struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
 {
 	int ret = 0;
+	int passive_state;
 
 	nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
 		__func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
@@ -1977,9 +1989,13 @@
 	if (cm_node->tcp_cntxt.client)
 		return ret;
 	cleanup_retrans_entry(cm_node);
-	cm_node->state = NES_CM_STATE_CLOSED;
 
-	ret = send_reset(cm_node, NULL);
+	passive_state = atomic_add_return(1, &cm_node->passive_state);
+	cm_node->state = NES_CM_STATE_CLOSED;
+	if (passive_state == NES_SEND_RESET_EVENT)
+		rem_ref_cm_node(cm_core, cm_node);
+	else
+		ret = send_reset(cm_node, NULL);
 	return ret;
 }
 
@@ -2037,7 +2053,7 @@
  * recv_pkt - recv an ETHERNET packet, and process it through CM
  * node state machine
  */
-static void mini_cm_recv_pkt(struct nes_cm_core *cm_core,
+static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
 	struct nes_vnic *nesvnic, struct sk_buff *skb)
 {
 	struct nes_cm_node *cm_node = NULL;
@@ -2045,23 +2061,16 @@
 	struct iphdr *iph;
 	struct tcphdr *tcph;
 	struct nes_cm_info nfo;
+	int skb_handled = 1;
 
 	if (!skb)
-		return;
+		return 0;
 	if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
-		dev_kfree_skb_any(skb);
-		return;
+		return 0;
 	}
 
 	iph = (struct iphdr *)skb->data;
 	tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
-	skb_reset_network_header(skb);
-	skb_set_transport_header(skb, sizeof(*tcph));
-	if (!tcph) {
-		dev_kfree_skb_any(skb);
-		return;
-	}
-	skb->len = ntohs(iph->tot_len);
 
 	nfo.loc_addr = ntohl(iph->daddr);
 	nfo.loc_port = ntohs(tcph->dest);
@@ -2082,23 +2091,21 @@
 			/* Only type of packet accepted are for */
 			/* the PASSIVE open (syn only) */
 			if ((!tcph->syn) || (tcph->ack)) {
-				cm_packets_dropped++;
+				skb_handled = 0;
 				break;
 			}
 			listener = find_listener(cm_core, nfo.loc_addr,
 				nfo.loc_port,
 				NES_CM_LISTENER_ACTIVE_STATE);
-			if (listener) {
-				nfo.cm_id = listener->cm_id;
-				nfo.conn_type = listener->conn_type;
-			} else {
-				nes_debug(NES_DBG_CM, "Unable to find listener "
-					"for the pkt\n");
-				cm_packets_dropped++;
-				dev_kfree_skb_any(skb);
+			if (!listener) {
+				nfo.cm_id = NULL;
+				nfo.conn_type = 0;
+				nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n");
+				skb_handled = 0;
 				break;
 			}
-
+			nfo.cm_id = listener->cm_id;
+			nfo.conn_type = listener->conn_type;
 			cm_node = make_cm_node(cm_core, nesvnic, &nfo,
 				listener);
 			if (!cm_node) {
@@ -2124,9 +2131,13 @@
 			dev_kfree_skb_any(skb);
 			break;
 		}
+		skb_reset_network_header(skb);
+		skb_set_transport_header(skb, sizeof(*tcph));
+		skb->len = ntohs(iph->tot_len);
 		process_packet(cm_node, skb, cm_core);
 		rem_ref_cm_node(cm_core, cm_node);
 	} while (0);
+	return skb_handled;
 }
 
 
@@ -2135,10 +2146,7 @@
  */
 static struct nes_cm_core *nes_cm_alloc_core(void)
 {
-	int i;
-
 	struct nes_cm_core *cm_core;
-	struct sk_buff *skb = NULL;
 
 	/* setup the CM core */
 	/* alloc top level core control structure */
@@ -2156,19 +2164,6 @@
 
 	atomic_set(&cm_core->events_posted, 0);
 
-	/* init the packet lists */
-	skb_queue_head_init(&cm_core->tx_free_list);
-
-	for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) {
-		skb = dev_alloc_skb(cm_core->mtu);
-		if (!skb) {
-			kfree(cm_core);
-			return NULL;
-		}
-		/* add 'raw' skb to free frame list */
-		skb_queue_head(&cm_core->tx_free_list, skb);
-	}
-
 	cm_core->api = &nes_cm_api;
 
 	spin_lock_init(&cm_core->ht_lock);
@@ -2397,7 +2392,6 @@
 			atomic_inc(&cm_disconnects);
 			cm_event.event = IW_CM_EVENT_DISCONNECT;
 			if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) {
-				issued_disconnect_reset = 1;
 				cm_event.status = IW_CM_EVENT_STATUS_RESET;
 				nes_debug(NES_DBG_CM, "Generating a CM "
 					"Disconnect Event (status reset) for "
@@ -2547,6 +2541,7 @@
 	struct nes_v4_quad nes_quad;
 	u32 crc_value;
 	int ret;
+	int passive_state;
 
 	ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
 	if (!ibqp)
@@ -2714,8 +2709,6 @@
 			conn_param->private_data_len +
 			sizeof(struct ietf_mpa_frame));
 
-	attr.qp_state = IB_QPS_RTS;
-	nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
 
 	/* notify OF layer that accept event was successfull */
 	cm_id->add_ref(cm_id);
@@ -2728,6 +2721,8 @@
 	cm_event.private_data = NULL;
 	cm_event.private_data_len = 0;
 	ret = cm_id->event_handler(cm_id, &cm_event);
+	attr.qp_state = IB_QPS_RTS;
+	nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
 	if (cm_node->loopbackpartner) {
 		cm_node->loopbackpartner->mpa_frame_size =
 			nesqp->private_data_len;
@@ -2740,6 +2735,9 @@
 		printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
 			"ret=%d\n", __func__, __LINE__, ret);
 
+	passive_state = atomic_add_return(1, &cm_node->passive_state);
+	if (passive_state == NES_SEND_RESET_EVENT)
+		create_event(cm_node, NES_CM_EVENT_RESET);
 	return 0;
 }
 
@@ -2943,15 +2941,16 @@
  */
 int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
 {
+	int rc = 0;
 	cm_packets_received++;
 	if ((g_cm_core) && (g_cm_core->api)) {
-		g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
+		rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
 	} else {
 		nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
 				" cm is not setup properly.\n");
 	}
 
-	return 0;
+	return rc;
 }
 
 
@@ -3222,6 +3221,18 @@
 	cm_event.private_data_len = 0;
 
 	ret = cm_id->event_handler(cm_id, &cm_event);
+	cm_id->add_ref(cm_id);
+	atomic_inc(&cm_closes);
+	cm_event.event = IW_CM_EVENT_CLOSE;
+	cm_event.status = IW_CM_EVENT_STATUS_OK;
+	cm_event.provider_data = cm_id->provider_data;
+	cm_event.local_addr = cm_id->local_addr;
+	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.private_data = NULL;
+	cm_event.private_data_len = 0;
+	nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);
+	ret = cm_id->event_handler(cm_id, &cm_event);
+
 	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 
 
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 367b3d2..fafa350 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -76,6 +76,10 @@
 	NES_TIMER_TYPE_CLOSE,
 };
 
+#define NES_PASSIVE_STATE_INDICATED	0
+#define NES_DO_NOT_SEND_RESET_EVENT	1
+#define NES_SEND_RESET_EVENT		2
+
 #define MAX_NES_IFS 4
 
 #define SET_ACK 1
@@ -161,6 +165,8 @@
 
 #define NES_CM_DEF_SEQ2      0x18ed5740
 #define NES_CM_DEF_LOCAL_ID2 0xb807
+#define	MAX_CM_BUFFER	512
+
 
 typedef u32 nes_addr_t;
 
@@ -254,8 +260,6 @@
 
 /* per connection node and node state information */
 struct nes_cm_node {
-	u32                       hashkey;
-
 	nes_addr_t                loc_addr, rem_addr;
 	u16                       loc_port, rem_port;
 
@@ -292,7 +296,10 @@
 	int                       apbvt_set;
 	int                       accept_pend;
 	int			freed;
+	struct list_head	timer_entry;
+	struct list_head	reset_entry;
 	struct nes_qp		*nesqp;
+	atomic_t 		passive_state;
 };
 
 /* structure for client or CM to fill when making CM api calls. */
@@ -350,7 +357,6 @@
 	u32                     mtu;
 	u32                     free_tx_pkt_max;
 	u32                     rx_pkt_posted;
-	struct sk_buff_head     tx_free_list;
 	atomic_t                ht_node_cnt;
 	struct list_head        connected_nodes;
 	/* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
@@ -390,7 +396,7 @@
 			struct nes_cm_node *);
 	int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
 			struct nes_cm_node *);
-	void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
+	int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
 			struct sk_buff *);
 	int (*destroy_cm_core)(struct nes_cm_core *);
 	int (*get)(struct nes_cm_core *);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 7c49cc8..8f70ff2 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2700,27 +2700,33 @@
 							pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */
 
 				if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) {
-					nes_cm_recv(rx_skb, nesvnic->netdev);
+					if (nes_cm_recv(rx_skb, nesvnic->netdev))
+						rx_skb = NULL;
+				}
+				if (rx_skb == NULL)
+					goto skip_rx_indicate0;
+
+
+				if ((cqe_misc & NES_NIC_CQE_TAG_VALID) &&
+				    (nesvnic->vlan_grp != NULL)) {
+					vlan_tag = (u16)(le32_to_cpu(
+							cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
+							>> 16);
+					nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
+							nesvnic->netdev->name, vlan_tag);
+					if (nes_use_lro)
+						lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
+								nesvnic->vlan_grp, vlan_tag, NULL);
+					else
+						nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
 				} else {
-					if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) {
-						vlan_tag = (u16)(le32_to_cpu(
-								cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX])
-								>> 16);
-						nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
-								nesvnic->netdev->name, vlan_tag);
-						if (nes_use_lro)
-							lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
-									nesvnic->vlan_grp, vlan_tag, NULL);
-						else
-							nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
-					} else {
-						if (nes_use_lro)
-							lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
-						else
-							nes_netif_rx(rx_skb);
-					}
+					if (nes_use_lro)
+						lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
+					else
+						nes_netif_rx(rx_skb);
 				}
 
+skip_rx_indicate0:
 				nesvnic->netdev->last_rx = jiffies;
 				/* nesvnic->netstats.rx_packets++; */
 				/* nesvnic->netstats.rx_bytes += rx_pkt_size; */
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index fb8cbd7..5611a73 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -540,11 +540,14 @@
 
 	if (!list_empty(&nesdev->cqp_avail_reqs)) {
 		spin_lock_irqsave(&nesdev->cqp.lock, flags);
-		cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
+		if (!list_empty(&nesdev->cqp_avail_reqs)) {
+			cqp_request = list_entry(nesdev->cqp_avail_reqs.next,
 				struct nes_cqp_request, list);
-		list_del_init(&cqp_request->list);
+			list_del_init(&cqp_request->list);
+		}
 		spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
-	} else {
+	}
+	if (cqp_request == NULL) {
 		cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL);
 		if (cqp_request) {
 			cqp_request->dynamic = 1;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index d36c9a0..4fdb7245 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1695,13 +1695,8 @@
 			/* use 4k pbl */
 			nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries);
 			if (nesadapter->free_4kpbl == 0) {
-				if (cqp_request->dynamic) {
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-					kfree(cqp_request);
-				} else {
-					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-				}
+				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				nes_free_cqp_request(nesdev, cqp_request);
 				if (!context)
 					pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
 							nescq->hw_cq.cq_pbase);
@@ -1717,13 +1712,8 @@
 			/* use 256 byte pbl */
 			nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries);
 			if (nesadapter->free_256pbl == 0) {
-				if (cqp_request->dynamic) {
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-					kfree(cqp_request);
-				} else {
-					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-				}
+				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				nes_free_cqp_request(nesdev, cqp_request);
 				if (!context)
 					pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem,
 							nescq->hw_cq.cq_pbase);
@@ -1928,13 +1918,8 @@
 			/* Two level PBL */
 			if ((pbl_count+1) > nesadapter->free_4kpbl) {
 				nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n");
-				if (cqp_request->dynamic) {
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-					kfree(cqp_request);
-				} else {
-					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-				}
+				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				nes_free_cqp_request(nesdev, cqp_request);
 				return -ENOMEM;
 			} else {
 				nesadapter->free_4kpbl -= pbl_count+1;
@@ -1942,13 +1927,8 @@
 		} else if (residual_page_count > 32) {
 			if (pbl_count > nesadapter->free_4kpbl) {
 				nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n");
-				if (cqp_request->dynamic) {
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-					kfree(cqp_request);
-				} else {
-					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-				}
+				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				nes_free_cqp_request(nesdev, cqp_request);
 				return -ENOMEM;
 			} else {
 				nesadapter->free_4kpbl -= pbl_count;
@@ -1956,13 +1936,8 @@
 		} else {
 			if (pbl_count > nesadapter->free_256pbl) {
 				nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n");
-				if (cqp_request->dynamic) {
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-					kfree(cqp_request);
-				} else {
-					list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs);
-					spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-				}
+				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+				nes_free_cqp_request(nesdev, cqp_request);
 				return -ENOMEM;
 			} else {
 				nesadapter->free_256pbl -= pbl_count;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 81a8262..8611195 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -252,6 +252,9 @@
 	wait_queue_head_t	     wait;          /* waitq for conn/disconn  */
 	atomic_t                     post_recv_buf_count; /* posted rx count   */
 	atomic_t                     post_send_buf_count; /* posted tx count   */
+	atomic_t                     unexpected_pdu_count;/* count of received *
+							   * unexpected pdus   *
+							   * not yet retired   */
 	char 			     name[ISER_OBJECT_NAME_SIZE];
 	struct iser_page_vec         *page_vec;     /* represents SG to fmr maps*
 						     * maps serialized as tx is*/
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index cdd2831..ed1aff2 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -183,14 +183,8 @@
 	struct iser_regd_buf *regd_data;
 	struct iser_dto      *recv_dto = NULL;
 	struct iser_device  *device = iser_conn->ib_conn->device;
-	int rx_data_size, err = 0;
-
-	rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
-	if (rx_desc == NULL) {
-		iser_err("Failed to alloc desc for post recv\n");
-		return -ENOMEM;
-	}
-	rx_desc->type = ISCSI_RX;
+	int rx_data_size, err;
+	int posts, outstanding_unexp_pdus;
 
 	/* for the login sequence we must support rx of upto 8K; login is done
 	 * after conn create/bind (connect) and conn stop/bind (reconnect),
@@ -201,46 +195,80 @@
 	else /* FIXME till user space sets conn->max_recv_dlength correctly */
 		rx_data_size = 128;
 
-	rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
-	if (rx_desc->data == NULL) {
-		iser_err("Failed to alloc data buf for post recv\n");
-		err = -ENOMEM;
-		goto post_rx_kmalloc_failure;
+	outstanding_unexp_pdus =
+		atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);
+
+	/*
+	 * in addition to the response buffer, replace those consumed by
+	 * unexpected pdus.
+	 */
+	for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) {
+		rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
+		if (rx_desc == NULL) {
+			iser_err("Failed to alloc desc for post recv %d\n",
+				 posts);
+			err = -ENOMEM;
+			goto post_rx_cache_alloc_failure;
+		}
+		rx_desc->type = ISCSI_RX;
+		rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
+		if (rx_desc->data == NULL) {
+			iser_err("Failed to alloc data buf for post recv %d\n",
+				 posts);
+			err = -ENOMEM;
+			goto post_rx_kmalloc_failure;
+		}
+
+		recv_dto = &rx_desc->dto;
+		recv_dto->ib_conn = iser_conn->ib_conn;
+		recv_dto->regd_vector_len = 0;
+
+		regd_hdr = &rx_desc->hdr_regd_buf;
+		memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
+		regd_hdr->device  = device;
+		regd_hdr->virt_addr  = rx_desc; /* == &rx_desc->iser_header */
+		regd_hdr->data_size  = ISER_TOTAL_HEADERS_LEN;
+
+		iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);
+
+		iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
+
+		regd_data = &rx_desc->data_regd_buf;
+		memset(regd_data, 0, sizeof(struct iser_regd_buf));
+		regd_data->device  = device;
+		regd_data->virt_addr  = rx_desc->data;
+		regd_data->data_size  = rx_data_size;
+
+		iser_reg_single(device, regd_data, DMA_FROM_DEVICE);
+
+		iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
+
+		err = iser_post_recv(rx_desc);
+		if (err) {
+			iser_err("Failed iser_post_recv for post %d\n", posts);
+			goto post_rx_post_recv_failure;
+		}
 	}
+	/* all posts successful */
+	return 0;
 
-	recv_dto = &rx_desc->dto;
-	recv_dto->ib_conn = iser_conn->ib_conn;
-	recv_dto->regd_vector_len = 0;
-
-	regd_hdr = &rx_desc->hdr_regd_buf;
-	memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
-	regd_hdr->device  = device;
-	regd_hdr->virt_addr  = rx_desc; /* == &rx_desc->iser_header */
-	regd_hdr->data_size  = ISER_TOTAL_HEADERS_LEN;
-
-	iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);
-
-	iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
-
-	regd_data = &rx_desc->data_regd_buf;
-	memset(regd_data, 0, sizeof(struct iser_regd_buf));
-	regd_data->device  = device;
-	regd_data->virt_addr  = rx_desc->data;
-	regd_data->data_size  = rx_data_size;
-
-	iser_reg_single(device, regd_data, DMA_FROM_DEVICE);
-
-	iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
-
-	err = iser_post_recv(rx_desc);
-	if (!err)
-		return 0;
-
-	/* iser_post_recv failed */
+post_rx_post_recv_failure:
 	iser_dto_buffs_release(recv_dto);
 	kfree(rx_desc->data);
 post_rx_kmalloc_failure:
 	kmem_cache_free(ig.desc_cache, rx_desc);
+post_rx_cache_alloc_failure:
+	if (posts > 0) {
+		/*
+		 * response buffer posted, but did not replace all unexpected
+		 * pdu recv bufs. Ignore error, retry occurs next send
+		 */
+		outstanding_unexp_pdus -= (posts - 1);
+		err = 0;
+	}
+	atomic_add(outstanding_unexp_pdus,
+		   &iser_conn->ib_conn->unexpected_pdu_count);
+
 	return err;
 }
 
@@ -274,8 +302,10 @@
 	struct iscsi_iser_conn *iser_conn = conn->dd_data;
 
 	int i;
-	/* no need to keep it in a var, we are after login so if this should
-	 * be negotiated, by now the result should be available here */
+	/*
+	 * FIXME this value should be declared to the target during login with
+	 * the MaxOutstandingUnexpectedPDUs key when supported
+	 */
 	int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
 
 	iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
@@ -478,6 +508,7 @@
 	int err = 0;
 	struct iser_regd_buf *regd_buf;
 	struct iser_device *device;
+	unsigned char opcode;
 
 	if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
 		iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
@@ -512,10 +543,15 @@
 				       data_seg_len);
 	}
 
-	if (iser_post_receive_control(conn) != 0) {
-		iser_err("post_rcv_buff failed!\n");
-		err = -ENOMEM;
-		goto send_control_error;
+	opcode = task->hdr->opcode & ISCSI_OPCODE_MASK;
+
+	/* post recv buffer for response if one is expected */
+	if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) {
+		if (iser_post_receive_control(conn) != 0) {
+			iser_err("post_rcv_buff failed!\n");
+			err = -ENOMEM;
+			goto send_control_error;
+		}
 	}
 
 	err = iser_post_send(mdesc);
@@ -586,6 +622,20 @@
 	 * parallel to the execution of iser_conn_term. So the code that waits *
 	 * for the posted rx bufs refcount to become zero handles everything   */
 	atomic_dec(&conn->ib_conn->post_recv_buf_count);
+
+	/*
+	 * if an unexpected PDU was received then the recv wr consumed must
+	 * be replaced, this is done in the next send of a control-type PDU
+	 */
+	if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) {
+		/* nop-in with itt = 0xffffffff */
+		atomic_inc(&conn->ib_conn->unexpected_pdu_count);
+	}
+	else if (opcode == ISCSI_OP_ASYNC_EVENT) {
+		/* asyncronous message */
+		atomic_inc(&conn->ib_conn->unexpected_pdu_count);
+	}
+	/* a reject PDU consumes the recv buf posted for the response */
 }
 
 void iser_snd_completion(struct iser_desc *tx_desc)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 26ff621..6dc6b17 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -498,6 +498,7 @@
 	init_waitqueue_head(&ib_conn->wait);
 	atomic_set(&ib_conn->post_recv_buf_count, 0);
 	atomic_set(&ib_conn->post_send_buf_count, 0);
+	atomic_set(&ib_conn->unexpected_pdu_count, 0);
 	atomic_set(&ib_conn->refcount, 1);
 	INIT_LIST_HEAD(&ib_conn->conn_list);
 	spin_lock_init(&ib_conn->lock);
diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c
index b7ad282..ac57b6a 100644
--- a/drivers/net/mlx4/cq.c
+++ b/drivers/net/mlx4/cq.c
@@ -189,7 +189,7 @@
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-		  int collapsed)
+		  unsigned vector, int collapsed)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -198,6 +198,11 @@
 	u64 mtt_addr;
 	int err;
 
+	if (vector >= dev->caps.num_comp_vectors)
+		return -EINVAL;
+
+	cq->vector = vector;
+
 	cq->cqn = mlx4_bitmap_alloc(&cq_table->bitmap);
 	if (cq->cqn == -1)
 		return -ENOMEM;
@@ -227,7 +232,7 @@
 
 	cq_context->flags	    = cpu_to_be32(!!collapsed << 18);
 	cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
-	cq_context->comp_eqn        = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
+	cq_context->comp_eqn	    = priv->eq_table.eq[vector].eqn;
 	cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 
 	mtt_addr = mlx4_mtt_addr(dev, mtt);
@@ -276,7 +281,7 @@
 	if (err)
 		mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
 
-	synchronize_irq(priv->eq_table.eq[MLX4_EQ_COMP].irq);
+	synchronize_irq(priv->eq_table.eq[cq->vector].irq);
 
 	spin_lock_irq(&cq_table->lock);
 	radix_tree_delete(&cq_table->tree, cq->cqn);
diff --git a/drivers/net/mlx4/en_cq.c b/drivers/net/mlx4/en_cq.c
index 1368a80..674f836 100644
--- a/drivers/net/mlx4/en_cq.c
+++ b/drivers/net/mlx4/en_cq.c
@@ -51,10 +51,13 @@
 	int err;
 
 	cq->size = entries;
-	if (mode == RX)
+	if (mode == RX) {
 		cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
-	else
+		cq->vector   = ring % mdev->dev->caps.num_comp_vectors;
+	} else {
 		cq->buf_size = sizeof(struct mlx4_cqe);
+		cq->vector   = 0;
+	}
 
 	cq->ring = ring;
 	cq->is_tx = mode;
@@ -86,7 +89,7 @@
 	memset(cq->buf, 0, cq->buf_size);
 
 	err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
-			    cq->wqres.db.dma, &cq->mcq, cq->is_tx);
+			    cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
 	if (err)
 		return err;
 
diff --git a/drivers/net/mlx4/en_main.c b/drivers/net/mlx4/en_main.c
index 4b9794e..c1c05852 100644
--- a/drivers/net/mlx4/en_main.c
+++ b/drivers/net/mlx4/en_main.c
@@ -170,9 +170,9 @@
 		mlx4_info(mdev, "Using %d tx rings for port:%d\n",
 			  mdev->profile.prof[i].tx_ring_num, i);
 		if (!mdev->profile.prof[i].rx_ring_num) {
-			mdev->profile.prof[i].rx_ring_num = 1;
+			mdev->profile.prof[i].rx_ring_num = dev->caps.num_comp_vectors;
 			mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
-				  1, i);
+				  mdev->profile.prof[i].rx_ring_num, i);
 		} else
 			mlx4_info(mdev, "Using %d rx rings for port:%d\n",
 				  mdev->profile.prof[i].rx_ring_num, i);
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index de16933..2c19bff7c 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -243,10 +243,6 @@
 		 * least that often.
 		 */
 		if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
-			/*
-			 * Conditional on hca_type is OK here because
-			 * this is a rare case, not the fast path.
-			 */
 			eq_set_ci(eq, 0);
 			set_ci = 0;
 		}
@@ -266,7 +262,7 @@
 
 	writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
 
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
 
 	return IRQ_RETVAL(work);
@@ -304,6 +300,17 @@
 			    MLX4_CMD_TIME_CLASS_A);
 }
 
+static int mlx4_num_eq_uar(struct mlx4_dev *dev)
+{
+	/*
+	 * Each UAR holds 4 EQ doorbells.  To figure out how many UARs
+	 * we need to map, take the difference of highest index and
+	 * the lowest index we'll use and add 1.
+	 */
+	return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs) / 4 -
+		dev->caps.reserved_eqs / 4 + 1;
+}
+
 static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -483,9 +490,11 @@
 
 	if (eq_table->have_irq)
 		free_irq(dev->pdev->irq, dev);
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		if (eq_table->eq[i].have_irq)
 			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
+
+	kfree(eq_table->irq_names);
 }
 
 static int mlx4_map_clr_int(struct mlx4_dev *dev)
@@ -551,57 +560,93 @@
 	__free_page(priv->eq_table.icm_page);
 }
 
+int mlx4_alloc_eq_table(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs,
+				    sizeof *priv->eq_table.eq, GFP_KERNEL);
+	if (!priv->eq_table.eq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx4_free_eq_table(struct mlx4_dev *dev)
+{
+	kfree(mlx4_priv(dev)->eq_table.eq);
+}
+
 int mlx4_init_eq_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;
 	int i;
 
+	priv->eq_table.uar_map = kcalloc(sizeof *priv->eq_table.uar_map,
+					 mlx4_num_eq_uar(dev), GFP_KERNEL);
+	if (!priv->eq_table.uar_map) {
+		err = -ENOMEM;
+		goto err_out_free;
+	}
+
 	err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
 			       dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0);
 	if (err)
-		return err;
+		goto err_out_free;
 
-	for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
+	for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
 		priv->eq_table.uar_map[i] = NULL;
 
 	err = mlx4_map_clr_int(dev);
 	if (err)
-		goto err_out_free;
+		goto err_out_bitmap;
 
 	priv->eq_table.clr_mask =
 		swab32(1 << (priv->eq_table.inta_pin & 31));
 	priv->eq_table.clr_int  = priv->clr_base +
 		(priv->eq_table.inta_pin < 32 ? 4 : 0);
 
-	err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
-			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_COMP : 0,
-			     &priv->eq_table.eq[MLX4_EQ_COMP]);
-	if (err)
-		goto err_out_unmap;
+	priv->eq_table.irq_names = kmalloc(16 * dev->caps.num_comp_vectors, GFP_KERNEL);
+	if (!priv->eq_table.irq_names) {
+		err = -ENOMEM;
+		goto err_out_bitmap;
+	}
+
+	for (i = 0; i < dev->caps.num_comp_vectors; ++i) {
+		err = mlx4_create_eq(dev, dev->caps.num_cqs + MLX4_NUM_SPARE_EQE,
+				     (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
+				     &priv->eq_table.eq[i]);
+		if (err)
+			goto err_out_unmap;
+	}
 
 	err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
-			     (dev->flags & MLX4_FLAG_MSI_X) ? MLX4_EQ_ASYNC : 0,
-			     &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+			     (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0,
+			     &priv->eq_table.eq[dev->caps.num_comp_vectors]);
 	if (err)
 		goto err_out_comp;
 
 	if (dev->flags & MLX4_FLAG_MSI_X) {
-		static const char *eq_name[] = {
-			[MLX4_EQ_COMP]  = DRV_NAME " (comp)",
-			[MLX4_EQ_ASYNC] = DRV_NAME " (async)"
-		};
+		static const char async_eq_name[] = "mlx4-async";
+		const char *eq_name;
 
-		for (i = 0; i < MLX4_NUM_EQ; ++i) {
+		for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
+			if (i < dev->caps.num_comp_vectors) {
+				snprintf(priv->eq_table.irq_names + i * 16, 16,
+					 "mlx4-comp-%d", i);
+				eq_name = priv->eq_table.irq_names + i * 16;
+			} else
+				eq_name = async_eq_name;
+
 			err = request_irq(priv->eq_table.eq[i].irq,
-					  mlx4_msi_x_interrupt,
-					  0, eq_name[i], priv->eq_table.eq + i);
+					  mlx4_msi_x_interrupt, 0, eq_name,
+					  priv->eq_table.eq + i);
 			if (err)
 				goto err_out_async;
 
 			priv->eq_table.eq[i].have_irq = 1;
 		}
-
 	} else {
 		err = request_irq(dev->pdev->irq, mlx4_interrupt,
 				  IRQF_SHARED, DRV_NAME, dev);
@@ -612,28 +657,36 @@
 	}
 
 	err = mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 0,
-			  priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+			  priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
 	if (err)
 		mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
-			   priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
+			   priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err);
 
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		eq_set_ci(&priv->eq_table.eq[i], 1);
 
 	return 0;
 
 err_out_async:
-	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
+	mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]);
 
 err_out_comp:
-	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_COMP]);
+	i = dev->caps.num_comp_vectors - 1;
 
 err_out_unmap:
+	while (i >= 0) {
+		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
+		--i;
+	}
 	mlx4_unmap_clr_int(dev);
 	mlx4_free_irqs(dev);
 
-err_out_free:
+err_out_bitmap:
 	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+
+err_out_free:
+	kfree(priv->eq_table.uar_map);
+
 	return err;
 }
 
@@ -643,18 +696,20 @@
 	int i;
 
 	mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
-		    priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+		    priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
 
 	mlx4_free_irqs(dev);
 
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
 
 	mlx4_unmap_clr_int(dev);
 
-	for (i = 0; i < ARRAY_SIZE(priv->eq_table.uar_map); ++i)
+	for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
 		if (priv->eq_table.uar_map[i])
 			iounmap(priv->eq_table.uar_map[i]);
 
 	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
+
+	kfree(priv->eq_table.uar_map);
 }
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 90a0281..710c79e 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -421,9 +421,7 @@
 				  ((u64) (MLX4_CMPT_TYPE_EQ *
 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
 				  cmpt_entry_sz,
-				  roundup_pow_of_two(MLX4_NUM_EQ +
-						     dev->caps.reserved_eqs),
-				  MLX4_NUM_EQ + dev->caps.reserved_eqs, 0, 0);
+				  dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
 	if (err)
 		goto err_cq;
 
@@ -810,12 +808,12 @@
 		if (dev->flags & MLX4_FLAG_MSI_X) {
 			mlx4_warn(dev, "NOP command failed to generate MSI-X "
 				  "interrupt IRQ %d).\n",
-				  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+				  priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
 			mlx4_warn(dev, "Trying again without MSI-X.\n");
 		} else {
 			mlx4_err(dev, "NOP command failed to generate interrupt "
 				 "(IRQ %d), aborting.\n",
-				 priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
+				 priv->eq_table.eq[dev->caps.num_comp_vectors].irq);
 			mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
 		}
 
@@ -908,31 +906,50 @@
 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	struct msix_entry entries[MLX4_NUM_EQ];
+	struct msix_entry *entries;
+	int nreq;
 	int err;
 	int i;
 
 	if (msi_x) {
-		for (i = 0; i < MLX4_NUM_EQ; ++i)
+		nreq = min(dev->caps.num_eqs - dev->caps.reserved_eqs,
+			   num_possible_cpus() + 1);
+		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
+		if (!entries)
+			goto no_msi;
+
+		for (i = 0; i < nreq; ++i)
 			entries[i].entry = i;
 
-		err = pci_enable_msix(dev->pdev, entries, ARRAY_SIZE(entries));
+	retry:
+		err = pci_enable_msix(dev->pdev, entries, nreq);
 		if (err) {
-			if (err > 0)
-				mlx4_info(dev, "Only %d MSI-X vectors available, "
-					  "not using MSI-X\n", err);
+			/* Try again if at least 2 vectors are available */
+			if (err > 1) {
+				mlx4_info(dev, "Requested %d vectors, "
+					  "but only %d MSI-X vectors available, "
+					  "trying again\n", nreq, err);
+				nreq = err;
+				goto retry;
+			}
+
 			goto no_msi;
 		}
 
-		for (i = 0; i < MLX4_NUM_EQ; ++i)
+		dev->caps.num_comp_vectors = nreq - 1;
+		for (i = 0; i < nreq; ++i)
 			priv->eq_table.eq[i].irq = entries[i].vector;
 
 		dev->flags |= MLX4_FLAG_MSI_X;
+
+		kfree(entries);
 		return;
 	}
 
 no_msi:
-	for (i = 0; i < MLX4_NUM_EQ; ++i)
+	dev->caps.num_comp_vectors = 1;
+
+	for (i = 0; i < 2; ++i)
 		priv->eq_table.eq[i].irq = dev->pdev->irq;
 }
 
@@ -1074,6 +1091,10 @@
 	if (err)
 		goto err_cmd;
 
+	err = mlx4_alloc_eq_table(dev);
+	if (err)
+		goto err_close;
+
 	mlx4_enable_msi_x(dev);
 
 	err = mlx4_setup_hca(dev);
@@ -1084,7 +1105,7 @@
 	}
 
 	if (err)
-		goto err_close;
+		goto err_free_eq;
 
 	for (port = 1; port <= dev->caps.num_ports; port++) {
 		err = mlx4_init_port_info(dev, port);
@@ -1114,6 +1135,9 @@
 	mlx4_cleanup_pd_table(dev);
 	mlx4_cleanup_uar_table(dev);
 
+err_free_eq:
+	mlx4_free_eq_table(dev);
+
 err_close:
 	if (dev->flags & MLX4_FLAG_MSI_X)
 		pci_disable_msix(pdev);
@@ -1177,6 +1201,7 @@
 		iounmap(priv->kar);
 		mlx4_uar_free(dev, &priv->driver_uar);
 		mlx4_cleanup_uar_table(dev);
+		mlx4_free_eq_table(dev);
 		mlx4_close_hca(dev);
 		mlx4_cmd_cleanup(dev);
 
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index 34c909d..e0213bad 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -63,12 +63,6 @@
 };
 
 enum {
-	MLX4_EQ_ASYNC,
-	MLX4_EQ_COMP,
-	MLX4_NUM_EQ
-};
-
-enum {
 	MLX4_NUM_PDS		= 1 << 15
 };
 
@@ -205,10 +199,11 @@
 
 struct mlx4_eq_table {
 	struct mlx4_bitmap	bitmap;
+	char		       *irq_names;
 	void __iomem	       *clr_int;
-	void __iomem	       *uar_map[(MLX4_NUM_EQ + 6) / 4];
+	void __iomem	      **uar_map;
 	u32			clr_mask;
-	struct mlx4_eq		eq[MLX4_NUM_EQ];
+	struct mlx4_eq	       *eq;
 	u64			icm_virt;
 	struct page	       *icm_page;
 	dma_addr_t		icm_dma;
@@ -328,6 +323,9 @@
 
 int mlx4_reset(struct mlx4_dev *dev);
 
+int mlx4_alloc_eq_table(struct mlx4_dev *dev);
+void mlx4_free_eq_table(struct mlx4_dev *dev);
+
 int mlx4_init_pd_table(struct mlx4_dev *dev);
 int mlx4_init_uar_table(struct mlx4_dev *dev);
 int mlx4_init_mr_table(struct mlx4_dev *dev);
diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c
index 9ca42b2..919fb9e 100644
--- a/drivers/net/mlx4/profile.c
+++ b/drivers/net/mlx4/profile.c
@@ -107,7 +107,9 @@
 	profile[MLX4_RES_AUXC].num    = request->num_qp;
 	profile[MLX4_RES_SRQ].num     = request->num_srq;
 	profile[MLX4_RES_CQ].num      = request->num_cq;
-	profile[MLX4_RES_EQ].num      = MLX4_NUM_EQ + dev_cap->reserved_eqs;
+	profile[MLX4_RES_EQ].num      = min(dev_cap->max_eqs,
+					    dev_cap->reserved_eqs +
+					    num_possible_cpus() + 1);
 	profile[MLX4_RES_DMPT].num    = request->num_mpt;
 	profile[MLX4_RES_CMPT].num    = MLX4_NUM_CMPTS;
 	profile[MLX4_RES_MTT].num     = request->num_mtt;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 371086f..8f659cc 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -206,6 +206,7 @@
 	int			reserved_cqs;
 	int			num_eqs;
 	int			reserved_eqs;
+	int			num_comp_vectors;
 	int			num_mpts;
 	int			num_mtt_segs;
 	int			fmr_reserved_mtts;
@@ -328,6 +329,7 @@
 	int			arm_sn;
 
 	int			cqn;
+	unsigned		vector;
 
 	atomic_t		refcount;
 	struct completion	free;
@@ -437,7 +439,7 @@
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
-		  int collapsed);
+		  unsigned vector, int collapsed);
 void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
 
 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);