Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: (33 commits)
  IB/ipath: Fix lockdep error upon "ifconfig ibN down"
  IB/ipath: Fix races with ib_resize_cq()
  IB/ipath: Support new PCIE device, QLE7142
  IB/ipath: Set CPU affinity early
  IB/ipath: Fix EEPROM read when driver is compiled with -Os
  IB/ipath: Fix and recover TXE piobuf and PBC parity errors
  IB/ipath: Change HT CRC message to indicate how to resolve problem
  IB/ipath: Clean up module exit code
  IB/ipath: Call mtrr_del with correct arguments
  IB/ipath: Flush RWQEs if access error or invalid error seen
  IB/ipath: Improved support for PowerPC
  IB/ipath: Drop unnecessary "(void *)" casts
  IB/ipath: Support multiple simultaneous devices of different types
  IB/ipath: Fix mismatch in shifts and masks for printing debug info
  IB/ipath: Fix compiler warnings and errors on non-x86_64 systems
  IB/ipath: Print more informative parity error messages
  IB/ipath: Ensure that PD of MR matches PD of QP checking the Rkey
  IB/ipath: RC and UC should validate SLID and DLID
  IB/ipath: Only allow complete writes to flash
  IB/ipath: Count SRQs properly
  ...
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index 08f46c8..3aae497 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -197,7 +197,7 @@
 			"resource=%x, qp_state=%s\n",
 			__FUNCTION__,
 			to_event_str(event_id),
-			be64_to_cpu(wr->ae.ae_generic.user_context),
+			(unsigned long long) be64_to_cpu(wr->ae.ae_generic.user_context),
 			be32_to_cpu(wr->ae.ae_generic.resource_type),
 			be32_to_cpu(wr->ae.ae_generic.resource),
 			to_qp_state_str(be32_to_cpu(wr->ae.ae_generic.qp_state)));
diff --git a/drivers/infiniband/hw/amso1100/c2_alloc.c b/drivers/infiniband/hw/amso1100/c2_alloc.c
index 1d25299..028a60b 100644
--- a/drivers/infiniband/hw/amso1100/c2_alloc.c
+++ b/drivers/infiniband/hw/amso1100/c2_alloc.c
@@ -115,7 +115,7 @@
 			    ((unsigned long) &(head->shared_ptr[mqsp]) -
 			     (unsigned long) head);
 		pr_debug("%s addr %p dma_addr %llx\n", __FUNCTION__,
-			 &(head->shared_ptr[mqsp]), (u64)*dma_addr);
+			 &(head->shared_ptr[mqsp]), (unsigned long long) *dma_addr);
 		return &(head->shared_ptr[mqsp]);
 	}
 	return NULL;
diff --git a/drivers/infiniband/hw/amso1100/c2_cm.c b/drivers/infiniband/hw/amso1100/c2_cm.c
index 485254e..75b93e9 100644
--- a/drivers/infiniband/hw/amso1100/c2_cm.c
+++ b/drivers/infiniband/hw/amso1100/c2_cm.c
@@ -302,7 +302,7 @@
 	vq_req = vq_req_alloc(c2dev);
 	if (!vq_req) {
 		err = -ENOMEM;
-		goto bail1;
+		goto bail0;
 	}
 	vq_req->qp = qp;
 	vq_req->cm_id = cm_id;
@@ -311,7 +311,7 @@
 	wr = kmalloc(c2dev->req_vq.msg_size, GFP_KERNEL);
 	if (!wr) {
 		err = -ENOMEM;
-		goto bail2;
+		goto bail1;
 	}
 
 	/* Build the WR */
@@ -331,7 +331,7 @@
 	/* Validate private_data length */
 	if (iw_param->private_data_len > C2_MAX_PRIVATE_DATA_SIZE) {
 		err = -EINVAL;
-		goto bail2;
+		goto bail1;
 	}
 
 	if (iw_param->private_data) {
@@ -348,19 +348,19 @@
 	err = vq_send_wr(c2dev, (union c2wr *) wr);
 	if (err) {
 		vq_req_put(c2dev, vq_req);
-		goto bail2;
+		goto bail1;
 	}
 
 	/* Wait for reply from adapter */
 	err = vq_wait_for_reply(c2dev, vq_req);
 	if (err)
-		goto bail2;
+		goto bail1;
 
 	/* Check that reply is present */
 	reply = (struct c2wr_cr_accept_rep *) (unsigned long) vq_req->reply_msg;
 	if (!reply) {
 		err = -ENOMEM;
-		goto bail2;
+		goto bail1;
 	}
 
 	err = c2_errno(reply);
@@ -368,9 +368,8 @@
 
 	if (!err)
 		c2_set_qp_state(qp, C2_QP_STATE_RTS);
- bail2:
-	kfree(wr);
  bail1:
+	kfree(wr);
 	vq_req_free(c2dev, vq_req);
  bail0:
 	if (err) {
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index dd6af55..da98d9f 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -390,14 +390,18 @@
 	}
 
 	mr = kmalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr)
+	if (!mr) {
+		vfree(page_list);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	mr->pd = to_c2pd(ib_pd);
 	pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
 		"*iova_start %llx, first pa %llx, last pa %llx\n",
 		__FUNCTION__, page_shift, pbl_depth, total_len,
-		*iova_start, page_list[0], page_list[pbl_depth-1]);
+		(unsigned long long) *iova_start,
+	       	(unsigned long long) page_list[0],
+	       	(unsigned long long) page_list[pbl_depth-1]);
   	err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
  					 (1 << page_shift), pbl_depth,
 					 total_len, 0, iova_start,
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index f49a32b..e37c568 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -527,7 +527,7 @@
 				      		DMA_FROM_DEVICE);
 	pci_unmap_addr_set(&c2dev->rep_vq, mapping, c2dev->rep_vq.host_dma);
 	pr_debug("%s rep_vq va %p dma %llx\n", __FUNCTION__, q1_pages,
-		 (u64)c2dev->rep_vq.host_dma);
+		 (unsigned long long) c2dev->rep_vq.host_dma);
 	c2_mq_rep_init(&c2dev->rep_vq,
 		   1,
 		   qsize,
@@ -550,7 +550,7 @@
 				      		DMA_FROM_DEVICE);
 	pci_unmap_addr_set(&c2dev->aeq, mapping, c2dev->aeq.host_dma);
 	pr_debug("%s aeq va %p dma %llx\n", __FUNCTION__, q1_pages,
-		 (u64)c2dev->rep_vq.host_dma);
+		 (unsigned long long) c2dev->rep_vq.host_dma);
 	c2_mq_rep_init(&c2dev->aeq,
 		       2,
 		       qsize,
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index f577905..54139d3 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -141,8 +141,9 @@
 	 * packets if ipath not configured, etc.)
 	 */
 	__u64 sps_krdrops;
+	__u64 sps_txeparity; /* PIO buffer parity error, recovered */
 	/* pad for future growth */
-	__u64 __sps_pad[46];
+	__u64 __sps_pad[45];
 };
 
 /*
@@ -185,6 +186,9 @@
 #define IPATH_RUNTIME_PCIE	0x2
 #define IPATH_RUNTIME_FORCE_WC_ORDER	0x4
 #define IPATH_RUNTIME_RCVHDR_COPY	0x8
+#define IPATH_RUNTIME_MASTER	0x10
+#define IPATH_RUNTIME_PBC_REWRITE 0x20
+#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
 
 /*
  * This structure is returned by ipath_userinit() immediately after
@@ -202,7 +206,8 @@
 	/* version of software, for feature checking. */
 	__u32 spi_sw_version;
 	/* InfiniPath port assigned, goes into sent packets */
-	__u32 spi_port;
+	__u16 spi_port;
+	__u16 spi_subport;
 	/*
 	 * IB MTU, packets IB data must be less than this.
 	 * The MTU is in bytes, and will be a multiple of 4 bytes.
@@ -218,7 +223,7 @@
 	__u32 spi_tidcnt;
 	/* size of the TID Eager list in infinipath, in entries */
 	__u32 spi_tidegrcnt;
-	/* size of a single receive header queue entry. */
+	/* size of a single receive header queue entry in words. */
 	__u32 spi_rcvhdrent_size;
 	/*
 	 * Count of receive header queue entries allocated.
@@ -310,6 +315,12 @@
 	__u32 spi_filler_for_align;
 	/* address of readonly memory copy of the rcvhdrq tail register. */
 	__u64 spi_rcvhdr_tailaddr;
+
+	/* shared memory pages for subports if IPATH_RUNTIME_MASTER is set */
+	__u64 spi_subport_uregbase;
+	__u64 spi_subport_rcvegrbuf;
+	__u64 spi_subport_rcvhdr_base;
+
 } __attribute__ ((aligned(8)));
 
 
@@ -328,12 +339,12 @@
 
 /*
  * Minor version differences are always compatible
- * a within a major version, however if if user software is larger
+ * a within a major version, however if user software is larger
  * than driver software, some new features and/or structure fields
  * may not be implemented; the user code must deal with this if it
- * cares, or it must abort after initialization reports the difference
+ * cares, or it must abort after initialization reports the difference.
  */
-#define IPATH_USER_SWMINOR 2
+#define IPATH_USER_SWMINOR 3
 
 #define IPATH_USER_SWVERSION ((IPATH_USER_SWMAJOR<<16) | IPATH_USER_SWMINOR)
 
@@ -379,7 +390,16 @@
 	 */
 	__u32 spu_rcvhdrsize;
 
-	__u64 spu_unused; /* kept for compatible layout */
+	/*
+	 * If two or more processes wish to share a port, each process
+	 * must set the spu_subport_cnt and spu_subport_id to the same
+	 * values.  The only restriction on the spu_subport_id is that
+	 * it be unique for a given node.
+	 */
+	__u16 spu_subport_cnt;
+	__u16 spu_subport_id;
+
+	__u32 spu_unused; /* kept for compatible layout */
 
 	/*
 	 * address of struct base_info to write to
@@ -392,19 +412,25 @@
 
 #define IPATH_CMD_MIN		16
 
-#define IPATH_CMD_USER_INIT	16	/* set up userspace */
+#define __IPATH_CMD_USER_INIT	16	/* old set up userspace (for old user code) */
 #define IPATH_CMD_PORT_INFO	17	/* find out what resources we got */
 #define IPATH_CMD_RECV_CTRL	18	/* control receipt of packets */
 #define IPATH_CMD_TID_UPDATE	19	/* update expected TID entries */
 #define IPATH_CMD_TID_FREE	20	/* free expected TID entries */
 #define IPATH_CMD_SET_PART_KEY	21	/* add partition key */
+#define IPATH_CMD_SLAVE_INFO	22	/* return info on slave processes */
+#define IPATH_CMD_ASSIGN_PORT	23	/* allocate HCA and port */
+#define IPATH_CMD_USER_INIT 	24	/* set up userspace */
 
-#define IPATH_CMD_MAX		21
+#define IPATH_CMD_MAX		24
 
 struct ipath_port_info {
 	__u32 num_active;	/* number of active units */
 	__u32 unit;		/* unit (chip) assigned to caller */
-	__u32 port;		/* port on unit assigned to caller */
+	__u16 port;		/* port on unit assigned to caller */
+	__u16 subport;		/* subport on unit assigned to caller */
+	__u16 num_ports;	/* number of ports available on unit */
+	__u16 num_subports;	/* number of subport slaves opened on port */
 };
 
 struct ipath_tid_info {
@@ -435,6 +461,8 @@
 		__u32 recv_ctrl;
 		/* partition key to set */
 		__u16 part_key;
+		/* user address of __u32 bitmask of active slaves */
+		__u64 slave_mask_addr;
 	} cmd;
 };
 
@@ -596,6 +624,10 @@
 
 /* K_PktFlags bits */
 #define INFINIPATH_KPF_INTR 0x1
+#define INFINIPATH_KPF_SUBPORT_MASK 0x3
+#define INFINIPATH_KPF_SUBPORT_SHIFT 1
+
+#define INFINIPATH_MAX_SUBPORT	4
 
 /* SendPIO per-buffer control */
 #define INFINIPATH_SP_TEST    0x40
@@ -610,7 +642,7 @@
 	/*
 	 * Version - 4 bits, Port - 4 bits, TID - 10 bits and Offset -
 	 * 14 bits before ECO change ~28 Dec 03.  After that, Vers 4,
-	 * Port 3, TID 11, offset 14.
+	 * Port 4, TID 11, offset 13.
 	 */
 	__le32 ver_port_tid_offset;
 	__le16 chksum;
diff --git a/drivers/infiniband/hw/ipath/ipath_cq.c b/drivers/infiniband/hw/ipath/ipath_cq.c
index 049221b..87462e0 100644
--- a/drivers/infiniband/hw/ipath/ipath_cq.c
+++ b/drivers/infiniband/hw/ipath/ipath_cq.c
@@ -46,7 +46,7 @@
  */
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 {
-	struct ipath_cq_wc *wc = cq->queue;
+	struct ipath_cq_wc *wc;
 	unsigned long flags;
 	u32 head;
 	u32 next;
@@ -57,6 +57,7 @@
 	 * Note that the head pointer might be writable by user processes.
 	 * Take care to verify it is a sane value.
 	 */
+	wc = cq->queue;
 	head = wc->head;
 	if (head >= (unsigned) cq->ibcq.cqe) {
 		head = cq->ibcq.cqe;
@@ -109,21 +110,27 @@
 int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
-	struct ipath_cq_wc *wc = cq->queue;
+	struct ipath_cq_wc *wc;
 	unsigned long flags;
 	int npolled;
+	u32 tail;
 
 	spin_lock_irqsave(&cq->lock, flags);
 
+	wc = cq->queue;
+	tail = wc->tail;
+	if (tail > (u32) cq->ibcq.cqe)
+		tail = (u32) cq->ibcq.cqe;
 	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (wc->tail == wc->head)
+		if (tail == wc->head)
 			break;
-		*entry = wc->queue[wc->tail];
-		if (wc->tail >= cq->ibcq.cqe)
-			wc->tail = 0;
+		*entry = wc->queue[tail];
+		if (tail >= cq->ibcq.cqe)
+			tail = 0;
 		else
-			wc->tail++;
+			tail++;
 	}
+	wc->tail = tail;
 
 	spin_unlock_irqrestore(&cq->lock, flags);
 
@@ -177,11 +184,6 @@
 		goto done;
 	}
 
-	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
 	/* Allocate the completion queue structure. */
 	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq) {
@@ -237,6 +239,16 @@
 	} else
 		cq->ip = NULL;
 
+	spin_lock(&dev->n_cqs_lock);
+	if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
+		spin_unlock(&dev->n_cqs_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_wc;
+	}
+
+	dev->n_cqs_allocated++;
+	spin_unlock(&dev->n_cqs_lock);
+
 	/*
 	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
 	 * The number of entries should be >= the number requested or return
@@ -253,7 +265,6 @@
 
 	ret = &cq->ibcq;
 
-	dev->n_cqs_allocated++;
 	goto done;
 
 bail_wc:
@@ -280,7 +291,9 @@
 	struct ipath_cq *cq = to_icq(ibcq);
 
 	tasklet_kill(&cq->comptask);
+	spin_lock(&dev->n_cqs_lock);
 	dev->n_cqs_allocated--;
+	spin_unlock(&dev->n_cqs_lock);
 	if (cq->ip)
 		kref_put(&cq->ip->ref, ipath_release_mmap_info);
 	else
@@ -316,10 +329,16 @@
 	return 0;
 }
 
+/**
+ * ipath_resize_cq - change the size of the CQ
+ * @ibcq: the completion queue
+ *
+ * Returns 0 for success.
+ */
 int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
 	struct ipath_cq *cq = to_icq(ibcq);
-	struct ipath_cq_wc *old_wc = cq->queue;
+	struct ipath_cq_wc *old_wc;
 	struct ipath_cq_wc *wc;
 	u32 head, tail, n;
 	int ret;
@@ -355,6 +374,7 @@
 	 * Make sure head and tail are sane since they
 	 * might be user writable.
 	 */
+	old_wc = cq->queue;
 	head = old_wc->head;
 	if (head > (u32) cq->ibcq.cqe)
 		head = (u32) cq->ibcq.cqe;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 2108466..12cefa6 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -95,16 +95,6 @@
 	"RecovIdle",
 };
 
-/*
- * These variables are initialized in the chip-specific files
- * but are defined here.
- */
-u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-u64 ipath_gpio_sda, ipath_gpio_scl;
-u64 infinipath_i_bitsextant;
-ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
-u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
-
 static void __devexit ipath_remove_one(struct pci_dev *);
 static int __devinit ipath_init_one(struct pci_dev *,
 				    const struct pci_device_id *);
@@ -527,28 +517,146 @@
 	return ret;
 }
 
+static void __devexit cleanup_device(struct ipath_devdata *dd)
+{
+	int port;
+
+	ipath_shutdown_device(dd);
+
+	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+		/* can't do anything more with chip; needs re-init */
+		*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+		if (dd->ipath_kregbase) {
+			/*
+			 * if we haven't already cleaned up before these are
+			 * to ensure any register reads/writes "fail" until
+			 * re-init
+			 */
+			dd->ipath_kregbase = NULL;
+			dd->ipath_uregbase = 0;
+			dd->ipath_sregbase = 0;
+			dd->ipath_cregbase = 0;
+			dd->ipath_kregsize = 0;
+		}
+		ipath_disable_wc(dd);
+	}
+
+	if (dd->ipath_pioavailregs_dma) {
+		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+				  (void *) dd->ipath_pioavailregs_dma,
+				  dd->ipath_pioavailregs_phys);
+		dd->ipath_pioavailregs_dma = NULL;
+	}
+	if (dd->ipath_dummy_hdrq) {
+		dma_free_coherent(&dd->pcidev->dev,
+			dd->ipath_pd[0]->port_rcvhdrq_size,
+			dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
+		dd->ipath_dummy_hdrq = NULL;
+	}
+
+	if (dd->ipath_pageshadow) {
+		struct page **tmpp = dd->ipath_pageshadow;
+		dma_addr_t *tmpd = dd->ipath_physshadow;
+		int i, cnt = 0;
+
+		ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
+			   "locked\n");
+		for (port = 0; port < dd->ipath_cfgports; port++) {
+			int port_tidbase = port * dd->ipath_rcvtidcnt;
+			int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+			for (i = port_tidbase; i < maxtid; i++) {
+				if (!tmpp[i])
+					continue;
+				pci_unmap_page(dd->pcidev, tmpd[i],
+					PAGE_SIZE, PCI_DMA_FROMDEVICE);
+				ipath_release_user_pages(&tmpp[i], 1);
+				tmpp[i] = NULL;
+				cnt++;
+			}
+		}
+		if (cnt) {
+			ipath_stats.sps_pageunlocks += cnt;
+			ipath_cdbg(VERBOSE, "There were still %u expTID "
+				   "entries locked\n", cnt);
+		}
+		if (ipath_stats.sps_pagelocks ||
+		    ipath_stats.sps_pageunlocks)
+			ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
+				   "unlocked via ipath_m{un}lock\n",
+				   (unsigned long long)
+				   ipath_stats.sps_pagelocks,
+				   (unsigned long long)
+				   ipath_stats.sps_pageunlocks);
+
+		ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
+			   dd->ipath_pageshadow);
+		vfree(dd->ipath_pageshadow);
+		dd->ipath_pageshadow = NULL;
+	}
+
+	/*
+	 * free any resources still in use (usually just kernel ports)
+	 * at unload; we do for portcnt, not cfgports, because cfgports
+	 * could have changed while we were loaded.
+	 */
+	for (port = 0; port < dd->ipath_portcnt; port++) {
+		struct ipath_portdata *pd = dd->ipath_pd[port];
+		dd->ipath_pd[port] = NULL;
+		ipath_free_pddata(dd, pd);
+	}
+	kfree(dd->ipath_pd);
+	/*
+	 * debuggability, in case some cleanup path tries to use it
+	 * after this
+	 */
+	dd->ipath_pd = NULL;
+}
+
 static void __devexit ipath_remove_one(struct pci_dev *pdev)
 {
-	struct ipath_devdata *dd;
+	struct ipath_devdata *dd = pci_get_drvdata(pdev);
 
-	ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
-	if (!pdev)
-		return;
+	ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
 
-	dd = pci_get_drvdata(pdev);
-	ipath_unregister_ib_device(dd->verbs_dev);
+	if (dd->verbs_dev)
+		ipath_unregister_ib_device(dd->verbs_dev);
+
 	ipath_diag_remove(dd);
 	ipath_user_remove(dd);
 	ipathfs_remove_device(dd);
 	ipath_device_remove_group(&pdev->dev, dd);
+
 	ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
 		   "unit %u\n", dd, (u32) dd->ipath_unit);
-	if (dd->ipath_kregbase) {
-		ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
-			   dd->ipath_kregbase);
-		iounmap((volatile void __iomem *) dd->ipath_kregbase);
-		dd->ipath_kregbase = NULL;
-	}
+
+	cleanup_device(dd);
+
+	/*
+	 * turn off rcv, send, and interrupts for all ports, all drivers
+	 * should also hard reset the chip here?
+	 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+	 * for all versions of the driver, if they were allocated
+	 */
+	if (pdev->irq) {
+		ipath_cdbg(VERBOSE,
+			   "unit %u free_irq of irq %x\n",
+			   dd->ipath_unit, pdev->irq);
+		free_irq(pdev->irq, dd);
+	} else
+		ipath_dbg("irq is 0, not doing free_irq "
+			  "for unit %u\n", dd->ipath_unit);
+	/*
+	 * we check for NULL here, because it's outside
+	 * the kregbase check, and we need to call it
+	 * after the free_irq.	Thus it's possible that
+	 * the function pointers were never initialized.
+	 */
+	if (dd->ipath_f_cleanup)
+		/* clean up chip-specific stuff */
+		dd->ipath_f_cleanup(dd);
+
+	ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
+	iounmap((volatile void __iomem *) dd->ipath_kregbase);
 	pci_release_regions(pdev);
 	ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
 	pci_disable_device(pdev);
@@ -760,8 +868,8 @@
 static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
 				     int err)
 {
-	return dd->ipath_port0_skbs ?
-		(void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
+	return dd->ipath_port0_skbinfo ?
+		(void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
 }
 
 /**
@@ -783,31 +891,34 @@
 	 */
 
 	/*
-	 * We need 4 extra bytes for unaligned transfer copying
+	 * We need 2 extra bytes for ipath_ether data sent in the
+	 * key header.  In order to keep everything dword aligned,
+	 * we'll reserve 4 bytes.
 	 */
+	len = dd->ipath_ibmaxlen + 4;
+
 	if (dd->ipath_flags & IPATH_4BYTE_TID) {
-		/* we need a 4KB multiple alignment, and there is no way
+		/* We need a 2KB multiple alignment, and there is no way
 		 * to do it except to allocate extra and then skb_reserve
 		 * enough to bring it up to the right alignment.
 		 */
-		len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
+		len += 2047;
 	}
-	else
-		len = dd->ipath_ibmaxlen + 4;
+
 	skb = __dev_alloc_skb(len, gfp_mask);
 	if (!skb) {
 		ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
 			      len);
 		goto bail;
 	}
+
+	skb_reserve(skb, 4);
+
 	if (dd->ipath_flags & IPATH_4BYTE_TID) {
-		u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
+		u32 una = (unsigned long)skb->data & 2047;
 		if (una)
-			skb_reserve(skb, 4 + (1 << 11) - una);
-		else
-			skb_reserve(skb, 4);
-	} else
-		skb_reserve(skb, 4);
+			skb_reserve(skb, 2048 - una);
+	}
 
 bail:
 	return skb;
@@ -1326,6 +1437,9 @@
 				      "for port %u rcvhdrqtailaddr failed\n",
 				      pd->port_port);
 			ret = -ENOMEM;
+			dma_free_coherent(&dd->pcidev->dev, amt,
+					  pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
+			pd->port_rcvhdrq = NULL;
 			goto bail;
 		}
 		pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1347,12 +1461,13 @@
 		ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
 			   "hdrtailaddr@%p %llx physical\n",
 			   pd->port_port, pd->port_rcvhdrq,
-			   pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr,
-			   (unsigned long long)pd->port_rcvhdrqtailaddr_phys);
+			   (unsigned long long) pd->port_rcvhdrq_phys,
+			   pd->port_rcvhdrtail_kvaddr, (unsigned long long)
+			   pd->port_rcvhdrqtailaddr_phys);
 
 	/* clear for security and sanity on each use */
 	memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
-	memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
+	memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
 
 	/*
 	 * tell chip each time we init it, even if we are re-using previous
@@ -1805,7 +1920,7 @@
 		pd->port_rcvhdrq = NULL;
 		if (pd->port_rcvhdrtail_kvaddr) {
 			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-					 (void *)pd->port_rcvhdrtail_kvaddr,
+					 pd->port_rcvhdrtail_kvaddr,
 					 pd->port_rcvhdrqtailaddr_phys);
 			pd->port_rcvhdrtail_kvaddr = NULL;
 		}
@@ -1824,24 +1939,32 @@
 			dma_free_coherent(&dd->pcidev->dev, size,
 				base, pd->port_rcvegrbuf_phys[e]);
 		}
-		vfree(pd->port_rcvegrbuf);
+		kfree(pd->port_rcvegrbuf);
 		pd->port_rcvegrbuf = NULL;
-		vfree(pd->port_rcvegrbuf_phys);
+		kfree(pd->port_rcvegrbuf_phys);
 		pd->port_rcvegrbuf_phys = NULL;
 		pd->port_rcvegrbuf_chunks = 0;
-	} else if (pd->port_port == 0 && dd->ipath_port0_skbs) {
+	} else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
 		unsigned e;
-		struct sk_buff **skbs = dd->ipath_port0_skbs;
+		struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
 
-		dd->ipath_port0_skbs = NULL;
-		ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
-			   "@ %p\n", pd->port_port, skbs);
+		dd->ipath_port0_skbinfo = NULL;
+		ipath_cdbg(VERBOSE, "free closed port %d "
+			   "ipath_port0_skbinfo @ %p\n", pd->port_port,
+			   skbinfo);
 		for (e = 0; e < dd->ipath_rcvegrcnt; e++)
-			if (skbs[e])
-				dev_kfree_skb(skbs[e]);
-		vfree(skbs);
+		if (skbinfo[e].skb) {
+			pci_unmap_single(dd->pcidev, skbinfo[e].phys,
+					 dd->ipath_ibmaxlen,
+					 PCI_DMA_FROMDEVICE);
+			dev_kfree_skb(skbinfo[e].skb);
+		}
+		vfree(skbinfo);
 	}
 	kfree(pd->port_tid_pg_list);
+	vfree(pd->subport_uregbase);
+	vfree(pd->subport_rcvegrbuf);
+	vfree(pd->subport_rcvhdr_base);
 	kfree(pd);
 }
 
@@ -1907,150 +2030,12 @@
 	return ret;
 }
 
-static void cleanup_device(struct ipath_devdata *dd)
-{
-	int port;
-
-	ipath_shutdown_device(dd);
-
-	if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
-		/* can't do anything more with chip; needs re-init */
-		*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
-		if (dd->ipath_kregbase) {
-			/*
-			 * if we haven't already cleaned up before these are
-			 * to ensure any register reads/writes "fail" until
-			 * re-init
-			 */
-			dd->ipath_kregbase = NULL;
-			dd->ipath_uregbase = 0;
-			dd->ipath_sregbase = 0;
-			dd->ipath_cregbase = 0;
-			dd->ipath_kregsize = 0;
-		}
-		ipath_disable_wc(dd);
-	}
-
-	if (dd->ipath_pioavailregs_dma) {
-		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
-				  (void *) dd->ipath_pioavailregs_dma,
-				  dd->ipath_pioavailregs_phys);
-		dd->ipath_pioavailregs_dma = NULL;
-	}
-	if (dd->ipath_dummy_hdrq) {
-		dma_free_coherent(&dd->pcidev->dev,
-			dd->ipath_pd[0]->port_rcvhdrq_size,
-			dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
-		dd->ipath_dummy_hdrq = NULL;
-	}
-
-	if (dd->ipath_pageshadow) {
-		struct page **tmpp = dd->ipath_pageshadow;
-		int i, cnt = 0;
-
-		ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
-			   "locked\n");
-		for (port = 0; port < dd->ipath_cfgports; port++) {
-			int port_tidbase = port * dd->ipath_rcvtidcnt;
-			int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
-			for (i = port_tidbase; i < maxtid; i++) {
-				if (!tmpp[i])
-					continue;
-				ipath_release_user_pages(&tmpp[i], 1);
-				tmpp[i] = NULL;
-				cnt++;
-			}
-		}
-		if (cnt) {
-			ipath_stats.sps_pageunlocks += cnt;
-			ipath_cdbg(VERBOSE, "There were still %u expTID "
-				   "entries locked\n", cnt);
-		}
-		if (ipath_stats.sps_pagelocks ||
-		    ipath_stats.sps_pageunlocks)
-			ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
-				   "unlocked via ipath_m{un}lock\n",
-				   (unsigned long long)
-				   ipath_stats.sps_pagelocks,
-				   (unsigned long long)
-				   ipath_stats.sps_pageunlocks);
-
-		ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
-			   dd->ipath_pageshadow);
-		vfree(dd->ipath_pageshadow);
-		dd->ipath_pageshadow = NULL;
-	}
-
-	/*
-	 * free any resources still in use (usually just kernel ports)
-	 * at unload; we do for portcnt, not cfgports, because cfgports
-	 * could have changed while we were loaded.
-	 */
-	for (port = 0; port < dd->ipath_portcnt; port++) {
-		struct ipath_portdata *pd = dd->ipath_pd[port];
-		dd->ipath_pd[port] = NULL;
-		ipath_free_pddata(dd, pd);
-	}
-	kfree(dd->ipath_pd);
-	/*
-	 * debuggability, in case some cleanup path tries to use it
-	 * after this
-	 */
-	dd->ipath_pd = NULL;
-}
-
 static void __exit infinipath_cleanup(void)
 {
-	struct ipath_devdata *dd, *tmp;
-	unsigned long flags;
-
-	ipath_diagpkt_remove();
-
 	ipath_exit_ipathfs();
 
 	ipath_driver_remove_group(&ipath_driver.driver);
 
-	spin_lock_irqsave(&ipath_devs_lock, flags);
-
-	/*
-	 * turn off rcv, send, and interrupts for all ports, all drivers
-	 * should also hard reset the chip here?
-	 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
-	 * for all versions of the driver, if they were allocated
-	 */
-	list_for_each_entry_safe(dd, tmp, &ipath_dev_list, ipath_list) {
-		spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
-		if (dd->ipath_kregbase)
-			cleanup_device(dd);
-
-		if (dd->pcidev) {
-			if (dd->pcidev->irq) {
-				ipath_cdbg(VERBOSE,
-					   "unit %u free_irq of irq %x\n",
-					   dd->ipath_unit, dd->pcidev->irq);
-				free_irq(dd->pcidev->irq, dd);
-			} else
-				ipath_dbg("irq is 0, not doing free_irq "
-					  "for unit %u\n", dd->ipath_unit);
-
-			/*
-			 * we check for NULL here, because it's outside
-			 * the kregbase check, and we need to call it
-			 * after the free_irq.  Thus it's possible that
-			 * the function pointers were never initialized.
-			 */
-			if (dd->ipath_f_cleanup)
-				/* clean up chip-specific stuff */
-				dd->ipath_f_cleanup(dd);
-
-			dd->pcidev = NULL;
-		}
-		spin_lock_irqsave(&ipath_devs_lock, flags);
-	}
-
-	spin_unlock_irqrestore(&ipath_devs_lock, flags);
-
 	ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
 	pci_unregister_driver(&ipath_driver);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index 3313356..a4019a6 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -100,9 +100,9 @@
 	gpioval = &dd->ipath_gpio_out;
 	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
 	if (line == i2c_line_scl)
-		mask = ipath_gpio_scl;
+		mask = dd->ipath_gpio_scl;
 	else
-		mask = ipath_gpio_sda;
+		mask = dd->ipath_gpio_sda;
 
 	if (new_line_state == i2c_line_high)
 		/* tri-state the output rather than force high */
@@ -119,12 +119,12 @@
 		write_val = 0x0UL;
 
 	if (line == i2c_line_scl) {
-		write_val <<= ipath_gpio_scl_num;
-		*gpioval = *gpioval & ~(1UL << ipath_gpio_scl_num);
+		write_val <<= dd->ipath_gpio_scl_num;
+		*gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
 		*gpioval |= write_val;
 	} else {
-		write_val <<= ipath_gpio_sda_num;
-		*gpioval = *gpioval & ~(1UL << ipath_gpio_sda_num);
+		write_val <<= dd->ipath_gpio_sda_num;
+		*gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
 		*gpioval |= write_val;
 	}
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
@@ -157,9 +157,9 @@
 	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
 	/* config line to be an input */
 	if (line == i2c_line_scl)
-		mask = ipath_gpio_scl;
+		mask = dd->ipath_gpio_scl;
 	else
-		mask = ipath_gpio_sda;
+		mask = dd->ipath_gpio_sda;
 	write_val = read_val & ~mask;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
 	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
@@ -187,6 +187,7 @@
 static void i2c_wait_for_writes(struct ipath_devdata *dd)
 {
 	(void)ipath_read_kreg32(dd, dd->ipath_kregs->kr_scratch);
+	rmb();
 }
 
 static void scl_out(struct ipath_devdata *dd, u8 bit)
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 29930e2..a9ddc69 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -41,6 +41,12 @@
 #include "ipath_kernel.h"
 #include "ipath_common.h"
 
+/*
+ * mmap64 doesn't allow all 64 bits for 32-bit applications
+ * so only use the low 43 bits.
+ */
+#define MMAP64_MASK	0x7FFFFFFFFFFUL
+
 static int ipath_open(struct inode *, struct file *);
 static int ipath_close(struct inode *, struct file *);
 static ssize_t ipath_write(struct file *, const char __user *, size_t,
@@ -57,18 +63,35 @@
 	.mmap = ipath_mmap
 };
 
-static int ipath_get_base_info(struct ipath_portdata *pd,
+static int ipath_get_base_info(struct file *fp,
 			       void __user *ubase, size_t ubase_size)
 {
+	struct ipath_portdata *pd = port_fp(fp);
 	int ret = 0;
 	struct ipath_base_info *kinfo = NULL;
 	struct ipath_devdata *dd = pd->port_dd;
+	unsigned subport_cnt;
+	int shared, master;
+	size_t sz;
 
-	if (ubase_size < sizeof(*kinfo)) {
+	subport_cnt = pd->port_subport_cnt;
+	if (!subport_cnt) {
+		shared = 0;
+		master = 0;
+		subport_cnt = 1;
+	} else {
+		shared = 1;
+		master = !subport_fp(fp);
+	}
+
+	sz = sizeof(*kinfo);
+	/* If port sharing is not requested, allow the old size structure */
+	if (!shared)
+		sz -= 3 * sizeof(u64);
+	if (ubase_size < sz) {
 		ipath_cdbg(PROC,
-			   "Base size %lu, need %lu (version mismatch?)\n",
-			   (unsigned long) ubase_size,
-			   (unsigned long) sizeof(*kinfo));
+			   "Base size %zu, need %zu (version mismatch?)\n",
+			   ubase_size, sz);
 		ret = -EINVAL;
 		goto bail;
 	}
@@ -95,7 +118,9 @@
 	kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
 	kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
 		pd->port_rcvegrbuf_chunks;
-	kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
+	kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt;
+	if (master)
+		kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt;
 	/*
 	 * for this use, may be ipath_cfgports summed over all chips that
 	 * are are configured and present
@@ -118,31 +143,75 @@
 	 * page_address() macro worked, but in 2.6.11, even that returns the
 	 * full 64 bit address (upper bits all 1's).  So far, using the
 	 * physical addresses (or chip offsets, for chip mapping) works, but
-	 * no doubt some future kernel release will chang that, and we'll be
-	 * on to yet another method of dealing with this
+	 * no doubt some future kernel release will change that, and we'll be
+	 * on to yet another method of dealing with this.
 	 */
 	kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
-	kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
+	kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys;
 	kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
 	kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
 	kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
 		(void *) dd->ipath_statusp -
 		(void *) dd->ipath_pioavailregs_dma;
-	kinfo->spi_piobufbase = (u64) pd->port_piobufs;
-	kinfo->__spi_uregbase =
-		dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+	if (!shared) {
+		kinfo->spi_piocnt = dd->ipath_pbufsport;
+		kinfo->spi_piobufbase = (u64) pd->port_piobufs;
+		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+			dd->ipath_palign * pd->port_port;
+	} else if (master) {
+		kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
+				    (dd->ipath_pbufsport % subport_cnt);
+		/* Master's PIO buffers are after all the slave's */
+		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+			dd->ipath_palign *
+			(dd->ipath_pbufsport - kinfo->spi_piocnt);
+		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+			dd->ipath_palign * pd->port_port;
+	} else {
+		unsigned slave = subport_fp(fp) - 1;
 
-	kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
-	kinfo->spi_piocnt = dd->ipath_pbufsport;
+		kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
+		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+			dd->ipath_palign * kinfo->spi_piocnt * slave;
+		kinfo->__spi_uregbase = ((u64) pd->subport_uregbase +
+			PAGE_SIZE * slave) & MMAP64_MASK;
+
+		kinfo->spi_rcvhdr_base = ((u64) pd->subport_rcvhdr_base +
+			pd->port_rcvhdrq_size * slave) & MMAP64_MASK;
+		kinfo->spi_rcvhdr_tailaddr =
+			(u64) pd->port_rcvhdrqtailaddr_phys & MMAP64_MASK;
+		kinfo->spi_rcv_egrbufs = ((u64) pd->subport_rcvegrbuf +
+			dd->ipath_rcvegrcnt * dd->ipath_rcvegrbufsize * slave) &
+			MMAP64_MASK;
+	}
+
+	kinfo->spi_pioindex = (kinfo->spi_piobufbase - dd->ipath_piobufbase) /
+		dd->ipath_palign;
 	kinfo->spi_pioalign = dd->ipath_palign;
 
 	kinfo->spi_qpair = IPATH_KD_QP;
 	kinfo->spi_piosize = dd->ipath_ibmaxlen;
 	kinfo->spi_mtu = dd->ipath_ibmaxlen;	/* maxlen, not ibmtu */
 	kinfo->spi_port = pd->port_port;
+	kinfo->spi_subport = subport_fp(fp);
 	kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
 	kinfo->spi_hw_version = dd->ipath_revision;
 
+	if (master) {
+		kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER;
+		kinfo->spi_subport_uregbase =
+			(u64) pd->subport_uregbase & MMAP64_MASK;
+		kinfo->spi_subport_rcvegrbuf =
+			(u64) pd->subport_rcvegrbuf & MMAP64_MASK;
+		kinfo->spi_subport_rcvhdr_base =
+			(u64) pd->subport_rcvhdr_base & MMAP64_MASK;
+		ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n",
+			kinfo->spi_port, kinfo->spi_runtime_flags,
+			(unsigned long long) kinfo->spi_subport_uregbase,
+			(unsigned long long) kinfo->spi_subport_rcvegrbuf,
+			(unsigned long long) kinfo->spi_subport_rcvhdr_base);
+	}
+
 	if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
 		ret = -EFAULT;
 
@@ -154,6 +223,7 @@
 /**
  * ipath_tid_update - update a port TID
  * @pd: the port
+ * @fp: the ipath device file
  * @ti: the TID information
  *
  * The new implementation as of Oct 2004 is that the driver assigns
@@ -176,11 +246,11 @@
  * virtually contiguous pages, that should change to improve
  * performance.
  */
-static int ipath_tid_update(struct ipath_portdata *pd,
+static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
 			    const struct ipath_tid_info *ti)
 {
 	int ret = 0, ntids;
-	u32 tid, porttid, cnt, i, tidcnt;
+	u32 tid, porttid, cnt, i, tidcnt, tidoff;
 	u16 *tidlist;
 	struct ipath_devdata *dd = pd->port_dd;
 	u64 physaddr;
@@ -188,6 +258,7 @@
 	u64 __iomem *tidbase;
 	unsigned long tidmap[8];
 	struct page **pagep = NULL;
+	unsigned subport = subport_fp(fp);
 
 	if (!dd->ipath_pageshadow) {
 		ret = -ENOMEM;
@@ -204,20 +275,34 @@
 		ret = -EFAULT;
 		goto done;
 	}
-	tidcnt = dd->ipath_rcvtidcnt;
-	if (cnt >= tidcnt) {
+	porttid = pd->port_port * dd->ipath_rcvtidcnt;
+	if (!pd->port_subport_cnt) {
+		tidcnt = dd->ipath_rcvtidcnt;
+		tid = pd->port_tidcursor;
+		tidoff = 0;
+	} else if (!subport) {
+		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+		tidoff = dd->ipath_rcvtidcnt - tidcnt;
+		porttid += tidoff;
+		tid = tidcursor_fp(fp);
+	} else {
+		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+		tidoff = tidcnt * (subport - 1);
+		porttid += tidoff;
+		tid = tidcursor_fp(fp);
+	}
+	if (cnt > tidcnt) {
 		/* make sure it all fits in port_tid_pg_list */
 		dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
 			 "TIDs, only trying max (%u)\n", cnt, tidcnt);
 		cnt = tidcnt;
 	}
-	pagep = (struct page **)pd->port_tid_pg_list;
-	tidlist = (u16 *) (&pagep[cnt]);
+	pagep = &((struct page **) pd->port_tid_pg_list)[tidoff];
+	tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff];
 
 	memset(tidmap, 0, sizeof(tidmap));
-	tid = pd->port_tidcursor;
 	/* before decrement; chip actual # */
-	porttid = pd->port_port * tidcnt;
 	ntids = tidcnt;
 	tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
 				   dd->ipath_rcvtidbase +
@@ -274,16 +359,19 @@
 			ret = -ENOMEM;
 			break;
 		}
-		tidlist[i] = tid;
+		tidlist[i] = tid + tidoff;
 		ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
-			   "vaddr %lx\n", i, tid, vaddr);
+			   "vaddr %lx\n", i, tid + tidoff, vaddr);
 		/* we "know" system pages and TID pages are same size */
 		dd->ipath_pageshadow[porttid + tid] = pagep[i];
+		dd->ipath_physshadow[porttid + tid] = ipath_map_page(
+			dd->pcidev, pagep[i], 0, PAGE_SIZE,
+			PCI_DMA_FROMDEVICE);
 		/*
 		 * don't need atomic or it's overhead
 		 */
 		__set_bit(tid, tidmap);
-		physaddr = page_to_phys(pagep[i]);
+		physaddr = dd->ipath_physshadow[porttid + tid];
 		ipath_stats.sps_pagelocks++;
 		ipath_cdbg(VERBOSE,
 			   "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
@@ -317,6 +405,9 @@
 					   tid);
 				dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
 						    dd->ipath_tidinvalid);
+				pci_unmap_page(dd->pcidev,
+					dd->ipath_physshadow[porttid + tid],
+					PAGE_SIZE, PCI_DMA_FROMDEVICE);
 				dd->ipath_pageshadow[porttid + tid] = NULL;
 				ipath_stats.sps_pageunlocks++;
 			}
@@ -341,7 +432,10 @@
 		}
 		if (tid == tidcnt)
 			tid = 0;
-		pd->port_tidcursor = tid;
+		if (!pd->port_subport_cnt)
+			pd->port_tidcursor = tid;
+		else
+			tidcursor_fp(fp) = tid;
 	}
 
 done:
@@ -354,6 +448,7 @@
 /**
  * ipath_tid_free - free a port TID
  * @pd: the port
+ * @subport: the subport
  * @ti: the TID info
  *
  * right now we are unlocking one page at a time, but since
@@ -367,7 +462,7 @@
  * they pass in to us.
  */
 
-static int ipath_tid_free(struct ipath_portdata *pd,
+static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
 			  const struct ipath_tid_info *ti)
 {
 	int ret = 0;
@@ -388,11 +483,20 @@
 	}
 
 	porttid = pd->port_port * dd->ipath_rcvtidcnt;
+	if (!pd->port_subport_cnt)
+		tidcnt = dd->ipath_rcvtidcnt;
+	else if (!subport) {
+		tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) +
+			 (dd->ipath_rcvtidcnt % pd->port_subport_cnt);
+		porttid += dd->ipath_rcvtidcnt - tidcnt;
+	} else {
+		tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt;
+		porttid += tidcnt * (subport - 1);
+	}
 	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
 				   dd->ipath_rcvtidbase +
 				   porttid * sizeof(*tidbase));
 
-	tidcnt = dd->ipath_rcvtidcnt;
 	limit = sizeof(tidmap) * BITS_PER_BYTE;
 	if (limit > tidcnt)
 		/* just in case size changes in future */
@@ -417,6 +521,9 @@
 				   pd->port_pid, tid);
 			dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
 					    dd->ipath_tidinvalid);
+			pci_unmap_page(dd->pcidev,
+				dd->ipath_physshadow[porttid + tid],
+				PAGE_SIZE, PCI_DMA_FROMDEVICE);
 			ipath_release_user_pages(
 				&dd->ipath_pageshadow[porttid + tid], 1);
 			dd->ipath_pageshadow[porttid + tid] = NULL;
@@ -581,20 +688,24 @@
 /**
  * ipath_manage_rcvq - manage a port's receive queue
  * @pd: the port
+ * @subport: the subport
  * @start_stop: action to carry out
  *
  * start_stop == 0 disables receive on the port, for use in queue
  * overflow conditions.  start_stop==1 re-enables, to be used to
  * re-init the software copy of the head register
  */
-static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
+static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport,
+			     int start_stop)
 {
 	struct ipath_devdata *dd = pd->port_dd;
 	u64 tval;
 
-	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
+	ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n",
 		   start_stop ? "en" : "dis", dd->ipath_unit,
-		   pd->port_port);
+		   pd->port_port, subport);
+	if (subport)
+		goto bail;
 	/* atomically clear receive enable port. */
 	if (start_stop) {
 		/*
@@ -609,7 +720,7 @@
 		 * updated and correct itself, even in the face of software
 		 * bugs.
 		 */
-		*pd->port_rcvhdrtail_kvaddr = 0;
+		*(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0;
 		set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
 			&dd->ipath_rcvctrl);
 	} else
@@ -630,6 +741,7 @@
 		tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
 	}
 	/* always; new head should be equal to new tail; see above */
+bail:
 	return 0;
 }
 
@@ -687,6 +799,36 @@
 	}
 }
 
+/*
+ * Initialize the port data with the receive buffer sizes
+ * so this can be done while the master port is locked.
+ * Otherwise, there is a race with a slave opening the port
+ * and seeing these fields uninitialized.
+ */
+static void init_user_egr_sizes(struct ipath_portdata *pd)
+{
+	struct ipath_devdata *dd = pd->port_dd;
+	unsigned egrperchunk, egrcnt, size;
+
+	/*
+	 * to avoid wasting a lot of memory, we allocate 32KB chunks of
+	 * physically contiguous memory, advance through it until used up
+	 * and then allocate more.  Of course, we need memory to store those
+	 * extra pointers, now.  Started out with 256KB, but under heavy
+	 * memory pressure (creating large files and then copying them over
+	 * NFS while doing lots of MPI jobs), we hit some allocation
+	 * failures, even though we can sleep...  (2.6.10) Still get
+	 * failures at 64K.  32K is the lowest we can go without wasting
+	 * additional memory.
+	 */
+	size = 0x8000;
+	egrperchunk = size / dd->ipath_rcvegrbufsize;
+	egrcnt = dd->ipath_rcvegrcnt;
+	pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk;
+	pd->port_rcvegrbufs_perchunk = egrperchunk;
+	pd->port_rcvegrbuf_size = size;
+}
+
 /**
  * ipath_create_user_egr - allocate eager TID buffers
  * @pd: the port to allocate TID buffers for
@@ -702,7 +844,7 @@
 static int ipath_create_user_egr(struct ipath_portdata *pd)
 {
 	struct ipath_devdata *dd = pd->port_dd;
-	unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
+	unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
 	size_t size;
 	int ret;
 	gfp_t gfp_flags;
@@ -722,31 +864,18 @@
 	ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
 		   "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
 
-	/*
-	 * to avoid wasting a lot of memory, we allocate 32KB chunks of
-	 * physically contiguous memory, advance through it until used up
-	 * and then allocate more.  Of course, we need memory to store those
-	 * extra pointers, now.  Started out with 256KB, but under heavy
-	 * memory pressure (creating large files and then copying them over
-	 * NFS while doing lots of MPI jobs), we hit some allocation
-	 * failures, even though we can sleep...  (2.6.10) Still get
-	 * failures at 64K.  32K is the lowest we can go without wasting
-	 * additional memory.
-	 */
-	size = 0x8000;
-	alloced = ALIGN(egrsize * egrcnt, size);
-	egrperchunk = size / egrsize;
-	chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
-	pd->port_rcvegrbuf_chunks = chunk;
-	pd->port_rcvegrbufs_perchunk = egrperchunk;
-	pd->port_rcvegrbuf_size = size;
-	pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
+	chunk = pd->port_rcvegrbuf_chunks;
+	egrperchunk = pd->port_rcvegrbufs_perchunk;
+	size = pd->port_rcvegrbuf_size;
+	pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]),
+				     GFP_KERNEL);
 	if (!pd->port_rcvegrbuf) {
 		ret = -ENOMEM;
 		goto bail;
 	}
 	pd->port_rcvegrbuf_phys =
-		vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
+		kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]),
+			GFP_KERNEL);
 	if (!pd->port_rcvegrbuf_phys) {
 		ret = -ENOMEM;
 		goto bail_rcvegrbuf;
@@ -791,105 +920,23 @@
 				  pd->port_rcvegrbuf_phys[e]);
 
 	}
-	vfree(pd->port_rcvegrbuf_phys);
+	kfree(pd->port_rcvegrbuf_phys);
 	pd->port_rcvegrbuf_phys = NULL;
 bail_rcvegrbuf:
-	vfree(pd->port_rcvegrbuf);
+	kfree(pd->port_rcvegrbuf);
 	pd->port_rcvegrbuf = NULL;
 bail:
 	return ret;
 }
 
-static int ipath_do_user_init(struct ipath_portdata *pd,
-			      const struct ipath_user_info *uinfo)
-{
-	int ret = 0;
-	struct ipath_devdata *dd = pd->port_dd;
-	u32 head32;
-
-	/* for now, if major version is different, bail */
-	if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
-		dev_info(&dd->pcidev->dev,
-			 "User major version %d not same as driver "
-			 "major %d\n", uinfo->spu_userversion >> 16,
-			 IPATH_USER_SWMAJOR);
-		ret = -ENODEV;
-		goto done;
-	}
-
-	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
-		ipath_dbg("User minor version %d not same as driver "
-			  "minor %d\n", uinfo->spu_userversion & 0xffff,
-			  IPATH_USER_SWMINOR);
-
-	if (uinfo->spu_rcvhdrsize) {
-		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
-		if (ret)
-			goto done;
-	}
-
-	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
-
-	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
-	pd->port_piobufs = dd->ipath_piobufbase +
-		dd->ipath_pbufsport * (pd->port_port -
-				       1) * dd->ipath_palign;
-	ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
-		   pd->port_port, pd->port_piobufs);
-
-	/*
-	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
-	 * array for time being.  If pd->port_port > chip-supported,
-	 * we need to do extra stuff here to handle by handling overflow
-	 * through port 0, someday
-	 */
-	ret = ipath_create_rcvhdrq(dd, pd);
-	if (!ret)
-		ret = ipath_create_user_egr(pd);
-	if (ret)
-		goto done;
-
-	/*
-	 * set the eager head register for this port to the current values
-	 * of the tail pointers, since we don't know if they were
-	 * updated on last use of the port.
-	 */
-	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
-	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
-	dd->ipath_lastegrheads[pd->port_port] = -1;
-	dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
-	ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
-		pd->port_port, head32);
-	pd->port_tidcursor = 0;	/* start at beginning after open */
-	/*
-	 * now enable the port; the tail registers will be written to memory
-	 * by the chip as soon as it sees the write to
-	 * dd->ipath_kregs->kr_rcvctrl.  The update only happens on
-	 * transition from 0 to 1, so clear it first, then set it as part of
-	 * enabling the port.  This will (very briefly) affect any other
-	 * open ports, but it shouldn't be long enough to be an issue.
-	 * We explictly set the in-memory copy to 0 beforehand, so we don't
-	 * have to wait to be sure the DMA update has happened.
-	 */
-	*pd->port_rcvhdrtail_kvaddr = 0ULL;
-	set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
-		&dd->ipath_rcvctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
-done:
-	return ret;
-}
-
 
 /* common code for the mappings on dma_alloc_coherent mem */
 static int ipath_mmap_mem(struct vm_area_struct *vma,
-			     struct ipath_portdata *pd, unsigned len,
-			     int write_ok, dma_addr_t addr, char *what)
+	struct ipath_portdata *pd, unsigned len, int write_ok,
+	void *kvaddr, char *what)
 {
 	struct ipath_devdata *dd = pd->port_dd;
-	unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
+	unsigned long pfn;
 	int ret;
 
 	if ((vma->vm_end - vma->vm_start) > len) {
@@ -912,17 +959,17 @@
 		vma->vm_flags &= ~VM_MAYWRITE;
 	}
 
+	pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT;
 	ret = remap_pfn_range(vma, vma->vm_start, pfn,
 			      len, vma->vm_page_prot);
 	if (ret)
-		dev_info(&dd->pcidev->dev,
-			 "%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
-			 what, pd->port_port, (unsigned long)addr, len,
-			 write_ok?'w':'o', ret);
+		dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x "
+			 "bytes r%c failed: %d\n", what, pd->port_port,
+			 pfn, len, write_ok?'w':'o', ret);
 	else
-		ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
-			what, pd->port_port, (unsigned long)addr, len,
-			 write_ok?'w':'o');
+		ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes "
+			   "r%c\n", what, pd->port_port, pfn, len,
+			   write_ok?'w':'o');
 bail:
 	return ret;
 }
@@ -957,7 +1004,8 @@
 
 static int mmap_piobufs(struct vm_area_struct *vma,
 			struct ipath_devdata *dd,
-			struct ipath_portdata *pd)
+			struct ipath_portdata *pd,
+			unsigned piobufs, unsigned piocnt)
 {
 	unsigned long phys;
 	int ret;
@@ -968,16 +1016,15 @@
 	 * process data, and catches users who might try to read the i/o
 	 * space due to a bug.
 	 */
-	if ((vma->vm_end - vma->vm_start) >
-	    (dd->ipath_pbufsport * dd->ipath_palign)) {
+	if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) {
 		dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
 			 "reqlen %lx > PAGE\n",
 			 vma->vm_end - vma->vm_start);
-		ret = -EFAULT;
+		ret = -EINVAL;
 		goto bail;
 	}
 
-	phys = dd->ipath_physaddr + pd->port_piobufs;
+	phys = dd->ipath_physaddr + piobufs;
 
 	/*
 	 * Don't mark this as non-cached, or we don't get the
@@ -1011,7 +1058,7 @@
 	struct ipath_devdata *dd = pd->port_dd;
 	unsigned long start, size;
 	size_t total_size, i;
-	dma_addr_t *phys;
+	unsigned long pfn;
 	int ret;
 
 	size = pd->port_rcvegrbuf_size;
@@ -1021,7 +1068,7 @@
 			 "reqlen %lx > actual %lx\n",
 			 vma->vm_end - vma->vm_start,
 			 (unsigned long) total_size);
-		ret = -EFAULT;
+		ret = -EINVAL;
 		goto bail;
 	}
 
@@ -1035,11 +1082,11 @@
 	vma->vm_flags &= ~VM_MAYWRITE;
 
 	start = vma->vm_start;
-	phys = pd->port_rcvegrbuf_phys;
 
 	for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
-		ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
-				      size, vma->vm_page_prot);
+		pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT;
+		ret = remap_pfn_range(vma, start, pfn, size,
+				      vma->vm_page_prot);
 		if (ret < 0)
 			goto bail;
 	}
@@ -1049,6 +1096,122 @@
 	return ret;
 }
 
+/*
+ * ipath_file_vma_nopage - handle a VMA page fault.
+ */
+static struct page *ipath_file_vma_nopage(struct vm_area_struct *vma,
+					  unsigned long address, int *type)
+{
+	unsigned long offset = address - vma->vm_start;
+	struct page *page = NOPAGE_SIGBUS;
+	void *pageptr;
+
+	/*
+	 * Convert the vmalloc address into a struct page.
+	 */
+	pageptr = (void *)(offset + (vma->vm_pgoff << PAGE_SHIFT));
+	page = vmalloc_to_page(pageptr);
+	if (!page)
+		goto out;
+
+	/* Increment the reference count. */
+	get_page(page);
+	if (type)
+		*type = VM_FAULT_MINOR;
+out:
+	return page;
+}
+
+static struct vm_operations_struct ipath_file_vm_ops = {
+	.nopage = ipath_file_vma_nopage,
+};
+
+static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
+		       struct ipath_portdata *pd, unsigned subport)
+{
+	unsigned long len;
+	struct ipath_devdata *dd;
+	void *addr;
+	size_t size;
+	int ret;
+
+	/* If the port is not shared, all addresses should be physical */
+	if (!pd->port_subport_cnt) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	dd = pd->port_dd;
+	size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
+
+	/*
+	 * Master has all the slave uregbase, rcvhdrq, and
+	 * rcvegrbufs mmapped.
+	 */
+	if (subport == 0) {
+		unsigned num_slaves = pd->port_subport_cnt - 1;
+
+		if (pgaddr == ((u64) pd->subport_uregbase & MMAP64_MASK)) {
+			addr = pd->subport_uregbase;
+			size = PAGE_SIZE * num_slaves;
+		} else if (pgaddr == ((u64) pd->subport_rcvhdr_base &
+				      MMAP64_MASK)) {
+			addr = pd->subport_rcvhdr_base;
+			size = pd->port_rcvhdrq_size * num_slaves;
+		} else if (pgaddr == ((u64) pd->subport_rcvegrbuf &
+				      MMAP64_MASK)) {
+			addr = pd->subport_rcvegrbuf;
+			size *= num_slaves;
+		} else {
+			ret = -EINVAL;
+			goto bail;
+		}
+	} else if (pgaddr == (((u64) pd->subport_uregbase +
+			       PAGE_SIZE * (subport - 1)) & MMAP64_MASK)) {
+		addr = pd->subport_uregbase + PAGE_SIZE * (subport - 1);
+		size = PAGE_SIZE;
+	} else if (pgaddr == (((u64) pd->subport_rcvhdr_base +
+			       pd->port_rcvhdrq_size * (subport - 1)) &
+			      MMAP64_MASK)) {
+		addr = pd->subport_rcvhdr_base +
+			pd->port_rcvhdrq_size * (subport - 1);
+		size = pd->port_rcvhdrq_size;
+	} else if (pgaddr == (((u64) pd->subport_rcvegrbuf +
+			       size * (subport - 1)) & MMAP64_MASK)) {
+		addr = pd->subport_rcvegrbuf + size * (subport - 1);
+		/* rcvegrbufs are read-only on the slave */
+		if (vma->vm_flags & VM_WRITE) {
+			dev_info(&dd->pcidev->dev,
+				 "Can't map eager buffers as "
+				 "writable (flags=%lx)\n", vma->vm_flags);
+			ret = -EPERM;
+			goto bail;
+		}
+		/*
+		 * Don't allow permission to later change to writeable
+		 * with mprotect.
+		 */
+		vma->vm_flags &= ~VM_MAYWRITE;
+	} else {
+		ret = -EINVAL;
+		goto bail;
+	}
+	len = vma->vm_end - vma->vm_start;
+	if (len > size) {
+		ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size);
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
+	vma->vm_ops = &ipath_file_vm_ops;
+	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	ret = 0;
+
+bail:
+	return ret;
+}
+
 /**
  * ipath_mmap - mmap various structures into user space
  * @fp: the file pointer
@@ -1064,73 +1227,99 @@
 	struct ipath_portdata *pd;
 	struct ipath_devdata *dd;
 	u64 pgaddr, ureg;
+	unsigned piobufs, piocnt;
 	int ret;
 
 	pd = port_fp(fp);
+	if (!pd) {
+		ret = -EINVAL;
+		goto bail;
+	}
 	dd = pd->port_dd;
 
 	/*
 	 * This is the ipath_do_user_init() code, mapping the shared buffers
 	 * into the user process. The address referred to by vm_pgoff is the
-	 * virtual, not physical, address; we only do one mmap for each
-	 * space mapped.
+	 * file offset passed via mmap().  For shared ports, this is the
+	 * kernel vmalloc() address of the pages to share with the master.
+	 * For non-shared or master ports, this is a physical address.
+	 * We only do one mmap for each space mapped.
 	 */
 	pgaddr = vma->vm_pgoff << PAGE_SHIFT;
 
 	/*
-	 * Must fit in 40 bits for our hardware; some checked elsewhere,
-	 * but we'll be paranoid.  Check for 0 is mostly in case one of the
-	 * allocations failed, but user called mmap anyway.   We want to catch
-	 * that before it can match.
+	 * Check for 0 in case one of the allocations failed, but user
+	 * called mmap anyway.
 	 */
-	if (!pgaddr || pgaddr >= (1ULL<<40))  {
-		ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
-			(unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
-		return -EINVAL;
-	}
-
-	/* just the offset of the port user registers, not physical addr */
-	ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
-
-	ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
-		   (unsigned long long) pgaddr, vma->vm_start,
-		   vma->vm_end - vma->vm_start);
-
-	if (vma->vm_start & (PAGE_SIZE-1)) {
-		ipath_dev_err(dd,
-			"vm_start not aligned: %lx, end=%lx phys %lx\n",
-			vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
+	if (!pgaddr)  {
 		ret = -EINVAL;
+		goto bail;
 	}
-	else if (pgaddr == ureg)
+
+	ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n",
+		   (unsigned long long) pgaddr, vma->vm_start,
+		   vma->vm_end - vma->vm_start, dd->ipath_unit,
+		   pd->port_port, subport_fp(fp));
+
+	/*
+	 * Physical addresses must fit in 40 bits for our hardware.
+	 * Check for kernel virtual addresses first, anything else must
+	 * match a HW or memory address.
+	 */
+	if (pgaddr >= (1ULL<<40)) {
+		ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp));
+		goto bail;
+	}
+
+	if (!pd->port_subport_cnt) {
+		/* port is not shared */
+		ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+		piocnt = dd->ipath_pbufsport;
+		piobufs = pd->port_piobufs;
+	} else if (!subport_fp(fp)) {
+		/* caller is the master */
+		ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
+		piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
+			 (dd->ipath_pbufsport % pd->port_subport_cnt);
+		piobufs = pd->port_piobufs +
+			dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
+	} else {
+		unsigned slave = subport_fp(fp) - 1;
+
+		/* caller is a slave */
+		ureg = 0;
+		piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
+		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
+	}
+
+	if (pgaddr == ureg)
 		ret = mmap_ureg(vma, dd, ureg);
-	else if (pgaddr == pd->port_piobufs)
-		ret = mmap_piobufs(vma, dd, pd);
-	else if (pgaddr == (u64) pd->port_rcvegr_phys)
+	else if (pgaddr == piobufs)
+		ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt);
+	else if (pgaddr == dd->ipath_pioavailregs_phys)
+		/* in-memory copy of pioavail registers */
+		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
+			      	     (void *) dd->ipath_pioavailregs_dma,
+				     "pioavail registers");
+	else if (subport_fp(fp))
+		/* Subports don't mmap the physical receive buffers */
+		ret = -EINVAL;
+	else if (pgaddr == pd->port_rcvegr_phys)
 		ret = mmap_rcvegrbufs(vma, pd);
-	else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
+	else if (pgaddr == (u64) pd->port_rcvhdrq_phys)
 		/*
 		 * The rcvhdrq itself; readonly except on HT (so have
 		 * to allow writable mapping), multiple pages, contiguous
 		 * from an i/o perspective.
 		 */
-		unsigned total_size =
-			ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
-			   * sizeof(u32), PAGE_SIZE);
-		ret = ipath_mmap_mem(vma, pd, total_size, 1,
-				     pd->port_rcvhdrq_phys,
+		ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1,
+				     pd->port_rcvhdrq,
 				     "rcvhdrq");
-	}
-	else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
+	else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys)
 		/* in-memory copy of rcvhdrq tail register */
 		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-				     pd->port_rcvhdrqtailaddr_phys,
+				     pd->port_rcvhdrtail_kvaddr,
 				     "rcvhdrq tail");
-	else if (pgaddr == dd->ipath_pioavailregs_phys)
-		/* in-memory copy of pioavail registers */
-		ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
-				     dd->ipath_pioavailregs_phys,
-				     "pioavail registers");
 	else
 		ret = -EINVAL;
 
@@ -1138,9 +1327,10 @@
 
 	if (ret < 0)
 		dev_info(&dd->pcidev->dev,
-			 "Failure %d on addr %lx, off %lx\n",
-			 -ret, vma->vm_start, vma->vm_pgoff);
-
+			 "Failure %d on off %llx len %lx\n",
+			 -ret, (unsigned long long)pgaddr,
+			 vma->vm_end - vma->vm_start);
+bail:
 	return ret;
 }
 
@@ -1154,6 +1344,8 @@
 	struct ipath_devdata *dd;
 
 	pd = port_fp(fp);
+	if (!pd)
+		goto bail;
 	dd = pd->port_dd;
 
 	bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
@@ -1176,7 +1368,7 @@
 
 	if (tail == head) {
 		set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-		if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
+		if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
 			(void)ipath_write_ureg(dd, ur_rcvhdrhead,
 					       dd->ipath_rhdrhead_intr_off
 					       | head, pd->port_port);
@@ -1200,18 +1392,80 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
+bail:
 	return pollflag;
 }
 
-static int try_alloc_port(struct ipath_devdata *dd, int port,
-			  struct file *fp)
+static int init_subports(struct ipath_devdata *dd,
+			 struct ipath_portdata *pd,
+			 const struct ipath_user_info *uinfo)
 {
+	int ret = 0;
+	unsigned num_slaves;
+	size_t size;
+
+	/* Old user binaries don't know about subports */
+	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
+		goto bail;
+	/*
+	 * If the user is requesting zero or one port,
+	 * skip the subport allocation.
+	 */
+	if (uinfo->spu_subport_cnt <= 1)
+		goto bail;
+	if (uinfo->spu_subport_cnt > 4) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	num_slaves = uinfo->spu_subport_cnt - 1;
+	pd->subport_uregbase = vmalloc(PAGE_SIZE * num_slaves);
+	if (!pd->subport_uregbase) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+	/* Note: pd->port_rcvhdrq_size isn't initialized yet. */
+	size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+		     sizeof(u32), PAGE_SIZE) * num_slaves;
+	pd->subport_rcvhdr_base = vmalloc(size);
+	if (!pd->subport_rcvhdr_base) {
+		ret = -ENOMEM;
+		goto bail_ureg;
+	}
+
+	pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks *
+					pd->port_rcvegrbuf_size *
+					num_slaves);
+	if (!pd->subport_rcvegrbuf) {
+		ret = -ENOMEM;
+		goto bail_rhdr;
+	}
+
+	pd->port_subport_cnt = uinfo->spu_subport_cnt;
+	pd->port_subport_id = uinfo->spu_subport_id;
+	pd->active_slaves = 1;
+	goto bail;
+
+bail_rhdr:
+	vfree(pd->subport_rcvhdr_base);
+bail_ureg:
+	vfree(pd->subport_uregbase);
+	pd->subport_uregbase = NULL;
+bail:
+	return ret;
+}
+
+static int try_alloc_port(struct ipath_devdata *dd, int port,
+			  struct file *fp,
+			  const struct ipath_user_info *uinfo)
+{
+	struct ipath_portdata *pd;
 	int ret;
 
-	if (!dd->ipath_pd[port]) {
-		void *p, *ptmp;
+	if (!(pd = dd->ipath_pd[port])) {
+		void *ptmp;
 
-		p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
+		pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
 
 		/*
 		 * Allocate memory for use in ipath_tid_update() just once
@@ -1221,34 +1475,36 @@
 		ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
 			       dd->ipath_rcvtidcnt * sizeof(struct page **),
 			       GFP_KERNEL);
-		if (!p || !ptmp) {
+		if (!pd || !ptmp) {
 			ipath_dev_err(dd, "Unable to allocate portdata "
 				      "memory, failing open\n");
 			ret = -ENOMEM;
-			kfree(p);
+			kfree(pd);
 			kfree(ptmp);
 			goto bail;
 		}
-		dd->ipath_pd[port] = p;
+		dd->ipath_pd[port] = pd;
 		dd->ipath_pd[port]->port_port = port;
 		dd->ipath_pd[port]->port_dd = dd;
 		dd->ipath_pd[port]->port_tid_pg_list = ptmp;
 		init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
 	}
-	if (!dd->ipath_pd[port]->port_cnt) {
-		dd->ipath_pd[port]->port_cnt = 1;
-		fp->private_data = (void *) dd->ipath_pd[port];
+	if (!pd->port_cnt) {
+		pd->userversion = uinfo->spu_userversion;
+		init_user_egr_sizes(pd);
+		if ((ret = init_subports(dd, pd, uinfo)) != 0)
+			goto bail;
 		ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
 			   current->comm, current->pid, dd->ipath_unit,
 			   port);
-		dd->ipath_pd[port]->port_pid = current->pid;
-		strncpy(dd->ipath_pd[port]->port_comm, current->comm,
-			sizeof(dd->ipath_pd[port]->port_comm));
+		pd->port_cnt = 1;
+		port_fp(fp) = pd;
+		pd->port_pid = current->pid;
+		strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
 		ipath_stats.sps_ports++;
 		ret = 0;
-		goto bail;
-	}
-	ret = -EBUSY;
+	} else
+		ret = -EBUSY;
 
 bail:
 	return ret;
@@ -1264,7 +1520,8 @@
 				     | IPATH_LINKUNK));
 }
 
-static int find_free_port(int unit, struct file *fp)
+static int find_free_port(int unit, struct file *fp,
+			  const struct ipath_user_info *uinfo)
 {
 	struct ipath_devdata *dd = ipath_lookup(unit);
 	int ret, i;
@@ -1279,8 +1536,8 @@
 		goto bail;
 	}
 
-	for (i = 0; i < dd->ipath_cfgports; i++) {
-		ret = try_alloc_port(dd, i, fp);
+	for (i = 1; i < dd->ipath_cfgports; i++) {
+		ret = try_alloc_port(dd, i, fp, uinfo);
 		if (ret != -EBUSY)
 			goto bail;
 	}
@@ -1290,13 +1547,14 @@
 	return ret;
 }
 
-static int find_best_unit(struct file *fp)
+static int find_best_unit(struct file *fp,
+			  const struct ipath_user_info *uinfo)
 {
 	int ret = 0, i, prefunit = -1, devmax;
 	int maxofallports, npresent, nup;
 	int ndev;
 
-	(void) ipath_count_units(&npresent, &nup, &maxofallports);
+	devmax = ipath_count_units(&npresent, &nup, &maxofallports);
 
 	/*
 	 * This code is present to allow a knowledgeable person to
@@ -1343,8 +1601,6 @@
 
 	if (prefunit != -1)
 		devmax = prefunit + 1;
-	else
-		devmax = ipath_count_units(NULL, NULL, NULL);
 recheck:
 	for (i = 1; i < maxofallports; i++) {
 		for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
@@ -1359,7 +1615,7 @@
 				 * next.
 				 */
 				continue;
-			ret = try_alloc_port(dd, i, fp);
+			ret = try_alloc_port(dd, i, fp, uinfo);
 			if (!ret)
 				goto done;
 		}
@@ -1395,22 +1651,183 @@
 	return ret;
 }
 
+static int find_shared_port(struct file *fp,
+			    const struct ipath_user_info *uinfo)
+{
+	int devmax, ndev, i;
+	int ret = 0;
+
+	devmax = ipath_count_units(NULL, NULL, NULL);
+
+	for (ndev = 0; ndev < devmax; ndev++) {
+		struct ipath_devdata *dd = ipath_lookup(ndev);
+
+		if (!dd)
+			continue;
+		for (i = 1; i < dd->ipath_cfgports; i++) {
+			struct ipath_portdata *pd = dd->ipath_pd[i];
+
+			/* Skip ports which are not yet open */
+			if (!pd || !pd->port_cnt)
+				continue;
+			/* Skip port if it doesn't match the requested one */
+			if (pd->port_subport_id != uinfo->spu_subport_id)
+				continue;
+			/* Verify the sharing process matches the master */
+			if (pd->port_subport_cnt != uinfo->spu_subport_cnt ||
+			    pd->userversion != uinfo->spu_userversion ||
+			    pd->port_cnt >= pd->port_subport_cnt) {
+				ret = -EINVAL;
+				goto done;
+			}
+			port_fp(fp) = pd;
+			subport_fp(fp) = pd->port_cnt++;
+			tidcursor_fp(fp) = 0;
+			pd->active_slaves |= 1 << subport_fp(fp);
+			ipath_cdbg(PROC,
+				   "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
+				   current->comm, current->pid,
+				   subport_fp(fp),
+				   pd->port_comm, pd->port_pid,
+				   dd->ipath_unit, pd->port_port);
+			ret = 1;
+			goto done;
+		}
+	}
+
+done:
+	return ret;
+}
+
 static int ipath_open(struct inode *in, struct file *fp)
 {
-	int ret, user_minor;
+	/* The real work is performed later in ipath_assign_port() */
+	fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL);
+	return fp->private_data ? 0 : -ENOMEM;
+}
+
+
+/* Get port early, so can set affinity prior to memory allocation */
+static int ipath_assign_port(struct file *fp,
+			      const struct ipath_user_info *uinfo)
+{
+	int ret;
+	int i_minor;
+	unsigned swminor;
+
+	/* Check to be sure we haven't already initialized this file */
+	if (port_fp(fp)) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	/* for now, if major version is different, bail */
+	if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+		ipath_dbg("User major version %d not same as driver "
+			  "major %d\n", uinfo->spu_userversion >> 16,
+			  IPATH_USER_SWMAJOR);
+		ret = -ENODEV;
+		goto done;
+	}
+
+	swminor = uinfo->spu_userversion & 0xffff;
+	if (swminor != IPATH_USER_SWMINOR)
+		ipath_dbg("User minor version %d not same as driver "
+			  "minor %d\n", swminor, IPATH_USER_SWMINOR);
 
 	mutex_lock(&ipath_mutex);
 
-	user_minor = iminor(in) - IPATH_USER_MINOR_BASE;
-	ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
-		   (long)in->i_rdev, user_minor);
+	if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
+	    (ret = find_shared_port(fp, uinfo))) {
+		mutex_unlock(&ipath_mutex);
+		if (ret > 0)
+			ret = 0;
+		goto done;
+	}
 
-	if (user_minor)
-		ret = find_free_port(user_minor - 1, fp);
+	i_minor = iminor(fp->f_dentry->d_inode) - IPATH_USER_MINOR_BASE;
+	ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
+		   (long)fp->f_dentry->d_inode->i_rdev, i_minor);
+
+	if (i_minor)
+		ret = find_free_port(i_minor - 1, fp, uinfo);
 	else
-		ret = find_best_unit(fp);
+		ret = find_best_unit(fp, uinfo);
 
 	mutex_unlock(&ipath_mutex);
+
+done:
+	return ret;
+}
+
+
+static int ipath_do_user_init(struct file *fp,
+			      const struct ipath_user_info *uinfo)
+{
+	int ret;
+	struct ipath_portdata *pd;
+	struct ipath_devdata *dd;
+	u32 head32;
+
+	pd = port_fp(fp);
+	dd = pd->port_dd;
+
+	if (uinfo->spu_rcvhdrsize) {
+		ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
+		if (ret)
+			goto done;
+	}
+
+	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+
+	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
+	pd->port_piobufs = dd->ipath_piobufbase +
+		dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
+	ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
+		   pd->port_port, pd->port_piobufs);
+
+	/*
+	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+	 * array for time being.  If pd->port_port > chip-supported,
+	 * we need to do extra stuff here to handle by handling overflow
+	 * through port 0, someday
+	 */
+	ret = ipath_create_rcvhdrq(dd, pd);
+	if (!ret)
+		ret = ipath_create_user_egr(pd);
+	if (ret)
+		goto done;
+
+	/*
+	 * set the eager head register for this port to the current values
+	 * of the tail pointers, since we don't know if they were
+	 * updated on last use of the port.
+	 */
+	head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
+	ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
+	dd->ipath_lastegrheads[pd->port_port] = -1;
+	dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
+	ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
+		pd->port_port, head32);
+	pd->port_tidcursor = 0;	/* start at beginning after open */
+	/*
+	 * now enable the port; the tail registers will be written to memory
+	 * by the chip as soon as it sees the write to
+	 * dd->ipath_kregs->kr_rcvctrl.  The update only happens on
+	 * transition from 0 to 1, so clear it first, then set it as part of
+	 * enabling the port.  This will (very briefly) affect any other
+	 * open ports, but it shouldn't be long enough to be an issue.
+	 * We explictly set the in-memory copy to 0 beforehand, so we don't
+	 * have to wait to be sure the DMA update has happened.
+	 */
+	*(volatile u64 *)pd->port_rcvhdrtail_kvaddr = 0ULL;
+	set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
+		&dd->ipath_rcvctrl);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+			 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+			 dd->ipath_rcvctrl);
+done:
 	return ret;
 }
 
@@ -1433,6 +1850,8 @@
 		if (!dd->ipath_pageshadow[i])
 			continue;
 
+		pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i],
+			PAGE_SIZE, PCI_DMA_FROMDEVICE);
 		ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
 						  1);
 		dd->ipath_pageshadow[i] = NULL;
@@ -1453,6 +1872,7 @@
 static int ipath_close(struct inode *in, struct file *fp)
 {
 	int ret = 0;
+	struct ipath_filedata *fd;
 	struct ipath_portdata *pd;
 	struct ipath_devdata *dd;
 	unsigned port;
@@ -1462,9 +1882,24 @@
 
 	mutex_lock(&ipath_mutex);
 
-	pd = port_fp(fp);
-	port = pd->port_port;
+	fd = (struct ipath_filedata *) fp->private_data;
 	fp->private_data = NULL;
+	pd = fd->pd;
+	if (!pd) {
+		mutex_unlock(&ipath_mutex);
+		goto bail;
+	}
+	if (--pd->port_cnt) {
+		/*
+		 * XXX If the master closes the port before the slave(s),
+		 * revoke the mmap for the eager receive queue so
+		 * the slave(s) don't wait for receive data forever.
+		 */
+		pd->active_slaves &= ~(1 << fd->subport);
+		mutex_unlock(&ipath_mutex);
+		goto bail;
+	}
+	port = pd->port_port;
 	dd = pd->port_dd;
 
 	if (pd->port_hdrqfull) {
@@ -1503,8 +1938,6 @@
 
 		/* clean up the pkeys for this port user */
 		ipath_clean_part_key(pd, dd);
-
-
 		/*
 		 * be paranoid, and never write 0's to these, just use an
 		 * unused part of the port 0 tail page.  Of course,
@@ -1523,39 +1956,49 @@
 		i = dd->ipath_pbufsport * (port - 1);
 		ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
 
+		dd->ipath_f_clear_tids(dd, pd->port_port);
+
 		if (dd->ipath_pageshadow)
 			unlock_expected_tids(pd);
 		ipath_stats.sps_ports--;
 		ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
 			   pd->port_comm, pd->port_pid,
 			   dd->ipath_unit, port);
-
-		dd->ipath_f_clear_tids(dd, pd->port_port);
 	}
 
-	pd->port_cnt = 0;
 	pd->port_pid = 0;
-
 	dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
 	mutex_unlock(&ipath_mutex);
 	ipath_free_pddata(dd, pd); /* after releasing the mutex */
 
+bail:
+	kfree(fd);
 	return ret;
 }
 
-static int ipath_port_info(struct ipath_portdata *pd,
+static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
 			   struct ipath_port_info __user *uinfo)
 {
 	struct ipath_port_info info;
 	int nup;
 	int ret;
+	size_t sz;
 
 	(void) ipath_count_units(NULL, &nup, NULL);
 	info.num_active = nup;
 	info.unit = pd->port_dd->ipath_unit;
 	info.port = pd->port_port;
+	info.subport = subport;
+	/* Don't return new fields if old library opened the port. */
+	if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
+		/* Number of user ports available for this device. */
+		info.num_ports = pd->port_dd->ipath_cfgports - 1;
+		info.num_subports = pd->port_subport_cnt;
+		sz = sizeof(info);
+	} else
+		sz = sizeof(info) - 2 * sizeof(u16);
 
-	if (copy_to_user(uinfo, &info, sizeof(info))) {
+	if (copy_to_user(uinfo, &info, sz)) {
 		ret = -EFAULT;
 		goto bail;
 	}
@@ -1565,6 +2008,16 @@
 	return ret;
 }
 
+static int ipath_get_slave_info(struct ipath_portdata *pd,
+				void __user *slave_mask_addr)
+{
+	int ret = 0;
+
+	if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32)))
+		ret = -EFAULT;
+	return ret;
+}
+
 static ssize_t ipath_write(struct file *fp, const char __user *data,
 			   size_t count, loff_t *off)
 {
@@ -1591,6 +2044,8 @@
 	consumed = sizeof(cmd.type);
 
 	switch (cmd.type) {
+	case IPATH_CMD_ASSIGN_PORT:
+	case __IPATH_CMD_USER_INIT:
 	case IPATH_CMD_USER_INIT:
 		copy = sizeof(cmd.cmd.user_info);
 		dest = &cmd.cmd.user_info;
@@ -1617,6 +2072,11 @@
 		dest = &cmd.cmd.part_key;
 		src = &ucmd->cmd.part_key;
 		break;
+	case IPATH_CMD_SLAVE_INFO:
+		copy = sizeof(cmd.cmd.slave_mask_addr);
+		dest = &cmd.cmd.slave_mask_addr;
+		src = &ucmd->cmd.slave_mask_addr;
+		break;
 	default:
 		ret = -EINVAL;
 		goto bail;
@@ -1634,34 +2094,55 @@
 
 	consumed += copy;
 	pd = port_fp(fp);
+	if (!pd && cmd.type != __IPATH_CMD_USER_INIT &&
+		cmd.type != IPATH_CMD_ASSIGN_PORT) {
+		ret = -EINVAL;
+		goto bail;
+	}
 
 	switch (cmd.type) {
+	case IPATH_CMD_ASSIGN_PORT:
+		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+		if (ret)
+			goto bail;
+		break;
+	case __IPATH_CMD_USER_INIT:
+		/* backwards compatibility, get port first */
+		ret = ipath_assign_port(fp, &cmd.cmd.user_info);
+		if (ret)
+			goto bail;
+		/* and fall through to current version. */
 	case IPATH_CMD_USER_INIT:
-		ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
-		if (ret < 0)
+		ret = ipath_do_user_init(fp, &cmd.cmd.user_info);
+		if (ret)
 			goto bail;
 		ret = ipath_get_base_info(
-			pd, (void __user *) (unsigned long)
+			fp, (void __user *) (unsigned long)
 			cmd.cmd.user_info.spu_base_info,
 			cmd.cmd.user_info.spu_base_info_size);
 		break;
 	case IPATH_CMD_RECV_CTRL:
-		ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
+		ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl);
 		break;
 	case IPATH_CMD_PORT_INFO:
-		ret = ipath_port_info(pd,
+		ret = ipath_port_info(pd, subport_fp(fp),
 				      (struct ipath_port_info __user *)
 				      (unsigned long) cmd.cmd.port_info);
 		break;
 	case IPATH_CMD_TID_UPDATE:
-		ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
+		ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info);
 		break;
 	case IPATH_CMD_TID_FREE:
-		ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
+		ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info);
 		break;
 	case IPATH_CMD_SET_PART_KEY:
 		ret = ipath_set_part_key(pd, cmd.cmd.part_key);
 		break;
+	case IPATH_CMD_SLAVE_INFO:
+		ret = ipath_get_slave_info(pd,
+					   (void __user *) (unsigned long)
+					   cmd.cmd.slave_mask_addr);
+		break;
 	}
 
 	if (ret >= 0)
@@ -1858,4 +2339,3 @@
 bail:
 	return;
 }
-
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index c8a8af0..a507d0b 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -356,19 +356,16 @@
 
 	pos = *ppos;
 
-	if ( pos < 0) {
+	if (pos != 0) {
 		ret = -EINVAL;
 		goto bail;
 	}
 
-	if (pos >= sizeof(struct ipath_flash)) {
-		ret = 0;
+	if (count != sizeof(struct ipath_flash)) {
+		ret = -EINVAL;
 		goto bail;
 	}
 
-	if (count > sizeof(struct ipath_flash) - pos)
-		count = sizeof(struct ipath_flash) - pos;
-
 	tmp = kmalloc(count, GFP_KERNEL);
 	if (!tmp) {
 		ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 5c9b509..9e4e8d4 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -252,8 +252,8 @@
 };
 
 /* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1FF
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1FF
+#define INFINIPATH_I_RCVURG_MASK ((1U<<9)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<9)-1)
 
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 #define INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT 0
@@ -338,7 +338,7 @@
 	if (crcbits) {
 		u16 ctrl0, ctrl1;
 		snprintf(bitsmsg, sizeof bitsmsg,
-			 "[HT%s lane %s CRC (%llx); ignore till reload]",
+			 "[HT%s lane %s CRC (%llx); powercycle to completely clear]",
 			 !(crcbits & _IPATH_HTLINK1_CRCBITS) ?
 			 "0 (A)" : (!(crcbits & _IPATH_HTLINK0_CRCBITS)
 				    ? "1 (B)" : "0+1 (A+B)"),
@@ -389,17 +389,28 @@
 				     _IPATH_HTLINK1_CRCBITS)));
 }
 
+/* 6110 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
+	INFINIPATH_HWE_MSG(HTCBUSIREQPARITYERR, "HTC Ireq Parity"),
+	INFINIPATH_HWE_MSG(HTCBUSTREQPARITYERR, "HTC Treq Parity"),
+	INFINIPATH_HWE_MSG(HTCBUSTRESPPARITYERR, "HTC Tresp Parity"),
+	INFINIPATH_HWE_MSG(HTCMISCERR5, "HT core Misc5"),
+	INFINIPATH_HWE_MSG(HTCMISCERR6, "HT core Misc6"),
+	INFINIPATH_HWE_MSG(HTCMISCERR7, "HT core Misc7"),
+	INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
 /**
- * ipath_ht_handle_hwerrors - display hardware errors
+ * ipath_ht_handle_hwerrors - display hardware errors.
  * @dd: the infinipath device
  * @msg: the output buffer
  * @msgl: the size of the output buffer
  *
- * Use same msg buffer as regular errors to avoid
- * excessive stack use.  Most hardware errors are catastrophic, but for
- * right now, we'll print them and continue.
- * We reuse the same message buffer as ipath_handle_errors() to avoid
- * excessive stack usage.
+ * Use same msg buffer as regular errors to avoid excessive stack
+ * use.  Most hardware errors are catastrophic, but for right now,
+ * we'll print them and continue.  We reuse the same message buffer as
+ * ipath_handle_errors() to avoid excessive stack usage.
  */
 static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 				     size_t msgl)
@@ -440,19 +451,49 @@
 	 * make sure we get this much out, unless told to be quiet,
 	 * or it's occurred within the last 5 seconds
 	 */
-	if ((hwerrs & ~dd->ipath_lasthwerror) ||
+	if ((hwerrs & ~(dd->ipath_lasthwerror |
+			((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			  INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			<< INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
 	    (ipath_debug & __IPATH_VERBDBG))
 		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
 			 "(cleared)\n", (unsigned long long) hwerrs);
 	dd->ipath_lasthwerror |= hwerrs;
 
-	if (hwerrs & ~infinipath_hwe_bitsextant)
+	if (hwerrs & ~dd->ipath_hwe_bitsextant)
 		ipath_dev_err(dd, "hwerror interrupt with unknown errors "
 			      "%llx set\n", (unsigned long long)
-			      (hwerrs & ~infinipath_hwe_bitsextant));
+			      (hwerrs & ~dd->ipath_hwe_bitsextant));
 
 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
 	if (ctrl & INFINIPATH_C_FREEZEMODE) {
+		/*
+		 * parity errors in send memory are recoverable,
+		 * just cancel the send (if indicated in * sendbuffererror),
+		 * count the occurrence, unfreeze (if no other handled
+		 * hardware error bits are set), and continue. They can
+		 * occur if a processor speculative read is done to the PIO
+		 * buffer while we are sending a packet, for example.
+		 */
+		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+			ipath_stats.sps_txeparity++;
+			ipath_dbg("Recovering from TXE parity error (%llu), "
+			    	  "hwerrstatus=%llx\n",
+				  (unsigned long long) ipath_stats.sps_txeparity,
+				  (unsigned long long) hwerrs);
+			ipath_disarm_senderrbufs(dd);
+			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+			if (!hwerrs) { /* else leave in freeze mode */
+				ipath_write_kreg(dd,
+						 dd->ipath_kregs->kr_control,
+						 dd->ipath_control);
+				return;
+			}
+		}
 		if (hwerrs) {
 			/*
 			 * if any set that we aren't ignoring; only
@@ -499,44 +540,16 @@
 			 bits);
 		strlcat(msg, bitsmsg, msgl);
 	}
-	if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
-		      << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
-	if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
-		      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
-	if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
-		strlcat(msg, "[IB2IPATH Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
-		strlcat(msg, "[IPATH2IB Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCBUSIREQPARITYERR)
-		strlcat(msg, "[HTC Ireq Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCBUSTREQPARITYERR)
-		strlcat(msg, "[HTC Treq Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCBUSTRESPPARITYERR)
-		strlcat(msg, "[HTC Tresp Parity]", msgl);
+
+	ipath_format_hwerrors(hwerrs,
+			      ipath_6110_hwerror_msgs,
+			      sizeof(ipath_6110_hwerror_msgs) /
+			      sizeof(ipath_6110_hwerror_msgs[0]),
+			      msg, msgl);
 
 	if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
 		hwerr_crcbits(dd, hwerrs, msg, msgl);
 
-	if (hwerrs & INFINIPATH_HWE_HTCMISCERR5)
-		strlcat(msg, "[HT core Misc5]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCMISCERR6)
-		strlcat(msg, "[HT core Misc6]", msgl);
-	if (hwerrs & INFINIPATH_HWE_HTCMISCERR7)
-		strlcat(msg, "[HT core Misc7]", msgl);
 	if (hwerrs & INFINIPATH_HWE_MEMBISTFAILED) {
 		strlcat(msg, "[Memory BIST test failed, InfiniPath hardware unusable]",
 			msgl);
@@ -573,11 +586,6 @@
 				 dd->ipath_hwerrmask);
 	}
 
-	if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
-		strlcat(msg, "[Rx Dsync]", msgl);
-	if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
-		strlcat(msg, "[SerDes PLL]", msgl);
-
 	ipath_dev_err(dd, "%s hardware error\n", msg);
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
 		/*
@@ -1080,21 +1088,21 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
 }
 
-static void ipath_init_ht_variables(void)
+static void ipath_init_ht_variables(struct ipath_devdata *dd)
 {
-	ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-	ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-	ipath_gpio_sda = IPATH_GPIO_SDA;
-	ipath_gpio_scl = IPATH_GPIO_SCL;
+	dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+	dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+	dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+	dd->ipath_gpio_scl = IPATH_GPIO_SCL;
 
-	infinipath_i_bitsextant =
+	dd->ipath_i_bitsextant =
 		(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
 		(INFINIPATH_I_RCVAVAIL_MASK <<
 		 INFINIPATH_I_RCVAVAIL_SHIFT) |
 		INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
 		INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
 
-	infinipath_e_bitsextant =
+	dd->ipath_e_bitsextant =
 		INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
 		INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
 		INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -1112,7 +1120,7 @@
 		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
 		INFINIPATH_E_HARDWARE;
 
-	infinipath_hwe_bitsextant =
+	dd->ipath_hwe_bitsextant =
 		(INFINIPATH_HWE_HTCMEMPARITYERR_MASK <<
 		 INFINIPATH_HWE_HTCMEMPARITYERR_SHIFT) |
 		(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
@@ -1141,8 +1149,8 @@
 		INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
 		INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
 
-	infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-	infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
 }
 
 /**
@@ -1607,5 +1615,5 @@
 	 * do very early init that is needed before ipath_f_bus is
 	 * called
 	 */
-	ipath_init_ht_variables();
+	ipath_init_ht_variables(dd);
 }
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index d86516d..a72ab9d 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -263,8 +263,8 @@
 };
 
 /* kr_intstatus, kr_intclear, kr_intmask bits */
-#define INFINIPATH_I_RCVURG_MASK 0x1F
-#define INFINIPATH_I_RCVAVAIL_MASK 0x1F
+#define INFINIPATH_I_RCVURG_MASK ((1U<<5)-1)
+#define INFINIPATH_I_RCVAVAIL_MASK ((1U<<5)-1)
 
 /* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
 #define INFINIPATH_HWE_PCIEMEMPARITYERR_MASK  0x000000000000003fULL
@@ -294,6 +294,33 @@
 #define IPATH_GPIO_SCL (1ULL << \
 	(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
 
+/*
+ * Rev2 silicon allows suppressing check for ArmLaunch errors.
+ * this can speed up short packet sends on systems that do
+ * not guaranteee write-order.
+ */
+#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
+
+/* 6120 specific hardware errors... */
+static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
+	INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
+	INFINIPATH_HWE_MSG(PCIECPLTIMEOUT, "PCIe completion timeout"),
+	/*
+	 * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
+	 * parity or memory parity error failures, because most likely we
+	 * won't be able to talk to the core of the chip.  Nonetheless, we
+	 * might see them, if they are in parts of the PCIe core that aren't
+	 * essential.
+	 */
+	INFINIPATH_HWE_MSG(PCIE1PLLFAILED, "PCIePLL1"),
+	INFINIPATH_HWE_MSG(PCIE0PLLFAILED, "PCIePLL0"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYXTLH, "PCIe XTLH core parity"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYXADM, "PCIe ADM TX core parity"),
+	INFINIPATH_HWE_MSG(PCIEBUSPARITYRADM, "PCIe ADM RX core parity"),
+	INFINIPATH_HWE_MSG(RXDSYNCMEMPARITYERR, "Rx Dsync"),
+	INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
+};
+
 /**
  * ipath_pe_handle_hwerrors - display hardware errors.
  * @dd: the infinipath device
@@ -343,19 +370,49 @@
 	 * make sure we get this much out, unless told to be quiet,
 	 * or it's occurred within the last 5 seconds
 	 */
-	if ((hwerrs & ~dd->ipath_lasthwerror) ||
+	if ((hwerrs & ~(dd->ipath_lasthwerror |
+			((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			  INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
 	    (ipath_debug & __IPATH_VERBDBG))
 		dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
 			 "(cleared)\n", (unsigned long long) hwerrs);
 	dd->ipath_lasthwerror |= hwerrs;
 
-	if (hwerrs & ~infinipath_hwe_bitsextant)
+	if (hwerrs & ~dd->ipath_hwe_bitsextant)
 		ipath_dev_err(dd, "hwerror interrupt with unknown errors "
 			      "%llx set\n", (unsigned long long)
-			      (hwerrs & ~infinipath_hwe_bitsextant));
+			      (hwerrs & ~dd->ipath_hwe_bitsextant));
 
 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
 	if (ctrl & INFINIPATH_C_FREEZEMODE) {
+		/*
+		 * parity errors in send memory are recoverable,
+		 * just cancel the send (if indicated in * sendbuffererror),
+		 * count the occurrence, unfreeze (if no other handled
+		 * hardware error bits are set), and continue. They can
+		 * occur if a processor speculative read is done to the PIO
+		 * buffer while we are sending a packet, for example.
+		 */
+		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+			      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
+			ipath_stats.sps_txeparity++;
+			ipath_dbg("Recovering from TXE parity error (%llu), "
+			    	  "hwerrstatus=%llx\n",
+				  (unsigned long long) ipath_stats.sps_txeparity,
+				  (unsigned long long) hwerrs);
+			ipath_disarm_senderrbufs(dd);
+			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+			if (!hwerrs) { /* else leave in freeze mode */
+				ipath_write_kreg(dd,
+						 dd->ipath_kregs->kr_control,
+						 dd->ipath_control);
+			    return;
+			}
+		}
 		if (hwerrs) {
 			/*
 			 * if any set that we aren't ignoring only make the
@@ -379,9 +436,8 @@
 		} else {
 			ipath_dbg("Clearing freezemode on ignored hardware "
 				  "error\n");
-			ctrl &= ~INFINIPATH_C_FREEZEMODE;
 			ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-					 ctrl);
+			   		 dd->ipath_control);
 		}
 	}
 
@@ -396,24 +452,13 @@
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
 				 dd->ipath_hwerrmask);
 	}
-	if (hwerrs & (INFINIPATH_HWE_RXEMEMPARITYERR_MASK
-		      << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_RXEMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[RXE Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
-	if (hwerrs & (INFINIPATH_HWE_TXEMEMPARITYERR_MASK
-		      << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
-		bits = (u32) ((hwerrs >>
-			       INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) &
-			      INFINIPATH_HWE_TXEMEMPARITYERR_MASK);
-		snprintf(bitsmsg, sizeof bitsmsg, "[TXE Parity Errs %x] ",
-			 bits);
-		strlcat(msg, bitsmsg, msgl);
-	}
+
+	ipath_format_hwerrors(hwerrs,
+			      ipath_6120_hwerror_msgs,
+			      sizeof(ipath_6120_hwerror_msgs)/
+			      sizeof(ipath_6120_hwerror_msgs[0]),
+			      msg, msgl);
+
 	if (hwerrs & (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK
 		      << INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT)) {
 		bits = (u32) ((hwerrs >>
@@ -423,10 +468,6 @@
 			 "[PCIe Mem Parity Errs %x] ", bits);
 		strlcat(msg, bitsmsg, msgl);
 	}
-	if (hwerrs & INFINIPATH_HWE_IBCBUSTOSPCPARITYERR)
-		strlcat(msg, "[IB2IPATH Parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_IBCBUSFRSPCPARITYERR)
-		strlcat(msg, "[IPATH2IB Parity]", msgl);
 
 #define _IPATH_PLL_FAIL (INFINIPATH_HWE_COREPLL_FBSLIP |	\
 			 INFINIPATH_HWE_COREPLL_RFSLIP )
@@ -452,34 +493,6 @@
 				 dd->ipath_hwerrmask);
 	}
 
-	if (hwerrs & INFINIPATH_HWE_PCIEPOISONEDTLP)
-		strlcat(msg, "[PCIe Poisoned TLP]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIECPLTIMEOUT)
-		strlcat(msg, "[PCIe completion timeout]", msgl);
-
-	/*
-	 * In practice, it's unlikely wthat we'll see PCIe PLL, or bus
-	 * parity or memory parity error failures, because most likely we
-	 * won't be able to talk to the core of the chip.  Nonetheless, we
-	 * might see them, if they are in parts of the PCIe core that aren't
-	 * essential.
-	 */
-	if (hwerrs & INFINIPATH_HWE_PCIE1PLLFAILED)
-		strlcat(msg, "[PCIePLL1]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIE0PLLFAILED)
-		strlcat(msg, "[PCIePLL0]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXTLH)
-		strlcat(msg, "[PCIe XTLH core parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYXADM)
-		strlcat(msg, "[PCIe ADM TX core parity]", msgl);
-	if (hwerrs & INFINIPATH_HWE_PCIEBUSPARITYRADM)
-		strlcat(msg, "[PCIe ADM RX core parity]", msgl);
-
-	if (hwerrs & INFINIPATH_HWE_RXDSYNCMEMPARITYERR)
-		strlcat(msg, "[Rx Dsync]", msgl);
-	if (hwerrs & INFINIPATH_HWE_SERDESPLLFAILED)
-		strlcat(msg, "[SerDes PLL]", msgl);
-
 	ipath_dev_err(dd, "%s hardware error\n", msg);
 	if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) {
 		/*
@@ -525,6 +538,9 @@
 	case 5:
 		n = "InfiniPath_QMH7140";
 		break;
+	case 6:
+		n = "InfiniPath_QLE7142";
+		break;
 	default:
 		ipath_dev_err(dd,
 			      "Don't yet know about board with ID %u\n",
@@ -571,9 +587,12 @@
 	if (!dd->ipath_boardrev)	// no PLL for Emulator
 		val &= ~INFINIPATH_HWE_SERDESPLLFAILED;
 
-	/* workaround bug 9460 in internal interface bus parity checking */
-	val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
-
+	if (dd->ipath_minrev < 2) {
+		/* workaround bug 9460 in internal interface bus parity
+		 * checking. Fixed (HW bug 9490) in Rev2.
+		 */
+		val &= ~INFINIPATH_HWE_PCIEBUSPARITYRADM;
+	}
 	dd->ipath_hwerrmask = val;
 }
 
@@ -583,8 +602,8 @@
  */
 static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
 {
-	u64 val, tmp, config1;
-	int ret = 0, change = 0;
+	u64 val, tmp, config1, prev_val;
+	int ret = 0;
 
 	ipath_dbg("Trying to bringup serdes\n");
 
@@ -641,6 +660,7 @@
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
+	prev_val = val;
 	if (((val >> INFINIPATH_XGXS_MDIOADDR_SHIFT) &
 	     INFINIPATH_XGXS_MDIOADDR_MASK) != 3) {
 		val &=
@@ -648,11 +668,9 @@
 			  INFINIPATH_XGXS_MDIOADDR_SHIFT);
 		/* MDIO address 3 */
 		val |= 3ULL << INFINIPATH_XGXS_MDIOADDR_SHIFT;
-		change = 1;
 	}
 	if (val & INFINIPATH_XGXS_RESET) {
 		val &= ~INFINIPATH_XGXS_RESET;
-		change = 1;
 	}
 	if (((val >> INFINIPATH_XGXS_RX_POL_SHIFT) &
 	     INFINIPATH_XGXS_RX_POL_MASK) != dd->ipath_rx_pol_inv ) {
@@ -661,9 +679,19 @@
 		         INFINIPATH_XGXS_RX_POL_SHIFT);
 		val |= dd->ipath_rx_pol_inv <<
 			INFINIPATH_XGXS_RX_POL_SHIFT;
-		change = 1;
 	}
-	if (change)
+	if (dd->ipath_minrev >= 2) {
+		/* Rev 2. can tolerate multiple writes to PBC, and
+		 * allowing them can provide lower latency on some
+		 * CPUs, but this feature is off by default, only
+		 * turned on by setting D63 of XGXSconfig reg.
+		 * May want to make this conditional more
+		 * fine-grained in future. This is not exactly
+		 * related to XGXS, but where the bit ended up.
+		 */
+		val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
+	}
+	if (val != prev_val)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_serdesconfig0);
@@ -717,9 +745,25 @@
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_serdesconfig0, val);
 }
 
-/* this is not yet needed on this chip, so just return 0. */
 static int ipath_pe_intconfig(struct ipath_devdata *dd)
 {
+	u64 val;
+	u32 chiprev;
+
+	/*
+	 * If the chip supports added error indication via GPIO pins,
+	 * enable interrupts on those bits so the interrupt routine
+	 * can count the events. Also set flag so interrupt routine
+	 * can know they are expected.
+	 */
+	chiprev = dd->ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT;
+	if ((chiprev & INFINIPATH_R_CHIPREVMINOR_MASK) > 1) {
+		/* Rev2+ reports extra errors via internal GPIO pins */
+		dd->ipath_flags |= IPATH_GPIO_ERRINTRS;
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+		val |= IPATH_GPIO_ERRINTR_MASK;
+		ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+	}
 	return 0;
 }
 
@@ -853,21 +897,23 @@
 	return 0;
 }
 
-static void ipath_init_pe_variables(void)
+static void ipath_init_pe_variables(struct ipath_devdata *dd)
 {
 	/*
 	 * bits for selecting i2c direction and values,
 	 * used for I2C serial flash
 	 */
-	ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
-	ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
-	ipath_gpio_sda = IPATH_GPIO_SDA;
-	ipath_gpio_scl = IPATH_GPIO_SCL;
+	dd->ipath_gpio_sda_num = _IPATH_GPIO_SDA_NUM;
+	dd->ipath_gpio_scl_num = _IPATH_GPIO_SCL_NUM;
+	dd->ipath_gpio_sda = IPATH_GPIO_SDA;
+	dd->ipath_gpio_scl = IPATH_GPIO_SCL;
 
 	/* variables for sanity checking interrupt and errors */
-	infinipath_hwe_bitsextant =
+	dd->ipath_hwe_bitsextant =
 		(INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
 		 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
+		(INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+		 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
 		(INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
 		 INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
 		INFINIPATH_HWE_PCIE1PLLFAILED |
@@ -883,13 +929,13 @@
 		INFINIPATH_HWE_SERDESPLLFAILED |
 		INFINIPATH_HWE_IBCBUSTOSPCPARITYERR |
 		INFINIPATH_HWE_IBCBUSFRSPCPARITYERR;
-	infinipath_i_bitsextant =
+	dd->ipath_i_bitsextant =
 		(INFINIPATH_I_RCVURG_MASK << INFINIPATH_I_RCVURG_SHIFT) |
 		(INFINIPATH_I_RCVAVAIL_MASK <<
 		 INFINIPATH_I_RCVAVAIL_SHIFT) |
 		INFINIPATH_I_ERROR | INFINIPATH_I_SPIOSENT |
 		INFINIPATH_I_SPIOBUFAVAIL | INFINIPATH_I_GPIO;
-	infinipath_e_bitsextant =
+	dd->ipath_e_bitsextant =
 		INFINIPATH_E_RFORMATERR | INFINIPATH_E_RVCRC |
 		INFINIPATH_E_RICRC | INFINIPATH_E_RMINPKTLEN |
 		INFINIPATH_E_RMAXPKTLEN | INFINIPATH_E_RLONGPKTLEN |
@@ -907,8 +953,8 @@
 		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET |
 		INFINIPATH_E_HARDWARE;
 
-	infinipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
-	infinipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
+	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
 }
 
 /* setup the MSI stuff again after a reset.  I'd like to just call
@@ -1082,6 +1128,45 @@
 	mmiowb();
 	spin_unlock_irqrestore(&dd->ipath_tid_lock, flags);
 }
+/**
+ * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
+ * @dd: the infinipath device
+ * @tidptr: pointer to the expected TID (in chip) to udpate
+ * @tidtype: 0 for eager, 1 for expected
+ * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
+ *
+ * This exists as a separate routine to allow for selection of the
+ * appropriate "flavor". The static calls in cleanup just use the
+ * revision-agnostic form, as they are not performance critical.
+ */
+static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
+			     u32 type, unsigned long pa)
+{
+	u32 __iomem *tidp32 = (u32 __iomem *)tidptr;
+
+	if (pa != dd->ipath_tidinvalid) {
+		if (pa & ((1U << 11) - 1)) {
+			dev_info(&dd->pcidev->dev, "BUG: physaddr %lx "
+				 "not 2KB aligned!\n", pa);
+			return;
+		}
+		pa >>= 11;
+		/* paranoia check */
+		if (pa & (7<<29))
+			ipath_dev_err(dd,
+				      "BUG: Physical page address 0x%lx "
+				      "has bits set in 31-29\n", pa);
+
+		if (type == 0)
+			pa |= dd->ipath_tidtemplate;
+		else /* for now, always full 4KB page */
+			pa |= 2 << 29;
+	}
+	if (dd->ipath_kregbase)
+		writel(pa, tidp32);
+	mmiowb();
+}
+
 
 /**
  * ipath_pe_clear_tid - clear all TID entries for a port, expected and eager
@@ -1203,7 +1288,7 @@
 
 /**
  * ipath_init_pe_get_base_info - set chip-specific flags for user code
- * @dd: the infinipath device
+ * @pd: the infinipath port
  * @kbase: ipath_base_info pointer
  *
  * We set the PCIE flag because the lower bandwidth on PCIe vs
@@ -1212,6 +1297,7 @@
 static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
 {
 	struct ipath_base_info *kinfo = kbase;
+	struct ipath_devdata *dd;
 
 	if (ipath_unordered_wc()) {
 		kinfo->spi_runtime_flags |= IPATH_RUNTIME_FORCE_WC_ORDER;
@@ -1220,8 +1306,20 @@
 	else
 		ipath_cdbg(PROC, "Not Intel processor, WC ordered\n");
 
-	kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
+	if (pd == NULL)
+		goto done;
 
+	dd = pd->port_dd;
+
+	if (dd != NULL && dd->ipath_minrev >= 2) {
+		ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
+		kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
+		ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
+		kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
+	}
+
+done:
+	kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
 	return 0;
 }
 
@@ -1244,7 +1342,10 @@
 	dd->ipath_f_quiet_serdes = ipath_pe_quiet_serdes;
 	dd->ipath_f_bringup_serdes = ipath_pe_bringup_serdes;
 	dd->ipath_f_clear_tids = ipath_pe_clear_tids;
-	dd->ipath_f_put_tid = ipath_pe_put_tid;
+	if (dd->ipath_minrev >= 2)
+		dd->ipath_f_put_tid = ipath_pe_put_tid_2;
+	else
+		dd->ipath_f_put_tid = ipath_pe_put_tid;
 	dd->ipath_f_cleanup = ipath_setup_pe_cleanup;
 	dd->ipath_f_setextled = ipath_setup_pe_setextled;
 	dd->ipath_f_get_base_info = ipath_pe_get_base_info;
@@ -1259,6 +1360,6 @@
 	dd->ipath_kregs = &ipath_pe_kregs;
 	dd->ipath_cregs = &ipath_pe_cregs;
 
-	ipath_init_pe_variables();
+	ipath_init_pe_variables(dd);
 }
 
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 44669dc..d819cca 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -88,13 +88,13 @@
 static int create_port0_egr(struct ipath_devdata *dd)
 {
 	unsigned e, egrcnt;
-	struct sk_buff **skbs;
+	struct ipath_skbinfo *skbinfo;
 	int ret;
 
 	egrcnt = dd->ipath_rcvegrcnt;
 
-	skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
-	if (skbs == NULL) {
+	skbinfo = vmalloc(sizeof(*dd->ipath_port0_skbinfo) * egrcnt);
+	if (skbinfo == NULL) {
 		ipath_dev_err(dd, "allocation error for eager TID "
 			      "skb array\n");
 		ret = -ENOMEM;
@@ -109,13 +109,13 @@
 		 * 4 bytes so that the data buffer stays word aligned.
 		 * See ipath_kreceive() for more details.
 		 */
-		skbs[e] = ipath_alloc_skb(dd, GFP_KERNEL);
-		if (!skbs[e]) {
+		skbinfo[e].skb = ipath_alloc_skb(dd, GFP_KERNEL);
+		if (!skbinfo[e].skb) {
 			ipath_dev_err(dd, "SKB allocation error for "
 				      "eager TID %u\n", e);
 			while (e != 0)
-				dev_kfree_skb(skbs[--e]);
-			vfree(skbs);
+				dev_kfree_skb(skbinfo[--e].skb);
+			vfree(skbinfo);
 			ret = -ENOMEM;
 			goto bail;
 		}
@@ -124,14 +124,17 @@
 	 * After loop above, so we can test non-NULL to see if ready
 	 * to use at receive, etc.
 	 */
-	dd->ipath_port0_skbs = skbs;
+	dd->ipath_port0_skbinfo = skbinfo;
 
 	for (e = 0; e < egrcnt; e++) {
-		unsigned long phys =
-			virt_to_phys(dd->ipath_port0_skbs[e]->data);
+		dd->ipath_port0_skbinfo[e].phys =
+		  ipath_map_single(dd->pcidev,
+				   dd->ipath_port0_skbinfo[e].skb->data,
+				   dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
 		dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
 				    ((char __iomem *) dd->ipath_kregbase +
-				     dd->ipath_rcvegrbase), 0, phys);
+				     dd->ipath_rcvegrbase), 0,
+				    dd->ipath_port0_skbinfo[e].phys);
 	}
 
 	ret = 0;
@@ -432,16 +435,33 @@
  */
 static void init_shadow_tids(struct ipath_devdata *dd)
 {
-	dd->ipath_pageshadow = (struct page **)
-		vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+	struct page **pages;
+	dma_addr_t *addrs;
+
+	pages = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
 			sizeof(struct page *));
-	if (!dd->ipath_pageshadow)
+	if (!pages) {
 		ipath_dev_err(dd, "failed to allocate shadow page * "
 			      "array, no expected sends!\n");
-	else
-		memset(dd->ipath_pageshadow, 0,
-		       dd->ipath_cfgports * dd->ipath_rcvtidcnt *
-		       sizeof(struct page *));
+		dd->ipath_pageshadow = NULL;
+		return;
+	}
+
+	addrs = vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+			sizeof(dma_addr_t));
+	if (!addrs) {
+		ipath_dev_err(dd, "failed to allocate shadow dma handle "
+			      "array, no expected sends!\n");
+		vfree(dd->ipath_pageshadow);
+		dd->ipath_pageshadow = NULL;
+		return;
+	}
+
+	memset(pages, 0, dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+	       sizeof(struct page *));
+
+	dd->ipath_pageshadow = pages;
+	dd->ipath_physshadow = addrs;
 }
 
 static void enable_chip(struct ipath_devdata *dd,
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 49bf7bb..6bee53c 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -37,6 +37,50 @@
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
+/*
+ * Called when we might have an error that is specific to a particular
+ * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+ */
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+{
+	u32 piobcnt;
+	unsigned long sbuf[4];
+	/*
+	 * it's possible that sendbuffererror could have bits set; might
+	 * have already done this as a result of hardware error handling
+	 */
+	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+	/* read these before writing errorclear */
+	sbuf[0] = ipath_read_kreg64(
+		dd, dd->ipath_kregs->kr_sendbuffererror);
+	sbuf[1] = ipath_read_kreg64(
+		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
+	if (piobcnt > 128) {
+		sbuf[2] = ipath_read_kreg64(
+			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
+		sbuf[3] = ipath_read_kreg64(
+			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
+	}
+
+	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
+		int i;
+		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
+			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
+					  "SendbufErrs %lx %lx", sbuf[0],
+					  sbuf[1]);
+			if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
+				printk(" %lx %lx ", sbuf[2], sbuf[3]);
+			printk("\n");
+		}
+
+		for (i = 0; i < piobcnt; i++)
+			if (test_bit(i, sbuf))
+				ipath_disarm_piobufs(dd, i, 1);
+		dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
+	}
+}
+
+
 /* These are all rcv-related errors which we want to count for stats */
 #define E_SUM_PKTERRS \
 	(INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
@@ -68,53 +112,9 @@
 
 static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
 {
-	unsigned long sbuf[4];
 	u64 ignore_this_time = 0;
-	u32 piobcnt;
 
-	/* if possible that sendbuffererror could be valid */
-	piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
-	/* read these before writing errorclear */
-	sbuf[0] = ipath_read_kreg64(
-		dd, dd->ipath_kregs->kr_sendbuffererror);
-	sbuf[1] = ipath_read_kreg64(
-		dd, dd->ipath_kregs->kr_sendbuffererror + 1);
-	if (piobcnt > 128) {
-		sbuf[2] = ipath_read_kreg64(
-			dd, dd->ipath_kregs->kr_sendbuffererror + 2);
-		sbuf[3] = ipath_read_kreg64(
-			dd, dd->ipath_kregs->kr_sendbuffererror + 3);
-	}
-
-	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
-		int i;
-
-		ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
-		if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
-			printk("%lx %lx ", sbuf[2], sbuf[3]);
-		for (i = 0; i < piobcnt; i++) {
-			if (test_bit(i, sbuf)) {
-				u32 __iomem *piobuf;
-				if (i < dd->ipath_piobcnt2k)
-					piobuf = (u32 __iomem *)
-						(dd->ipath_pio2kbase +
-						 i * dd->ipath_palign);
-				else
-					piobuf = (u32 __iomem *)
-						(dd->ipath_pio4kbase +
-						 (i - dd->ipath_piobcnt2k) *
-						 dd->ipath_4kalign);
-
-				ipath_cdbg(PKT,
-					   "PIObuf[%u] @%p pbc is %x; ",
-					   i, piobuf, readl(piobuf));
-
-				ipath_disarm_piobufs(dd, i, 1);
-			}
-		}
-		if (ipath_debug & __IPATH_PKTDBG)
-			printk("\n");
-	}
+	ipath_disarm_senderrbufs(dd);
 	if ((errs & E_SUM_LINK_PKTERRS) &&
 	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
 		/*
@@ -132,6 +132,82 @@
 	return ignore_this_time;
 }
 
+/* generic hw error messages... */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_MSG(a) \
+	{ \
+		.mask = ( INFINIPATH_HWE_TXEMEMPARITYERR_##a <<    \
+			  INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT ),   \
+		.msg = "TXE " #a " Memory Parity"	     \
+	}
+#define INFINIPATH_HWE_RXEMEMPARITYERR_MSG(a) \
+	{ \
+		.mask = ( INFINIPATH_HWE_RXEMEMPARITYERR_##a <<    \
+			  INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT ),   \
+		.msg = "RXE " #a " Memory Parity"	     \
+	}
+
+static const struct ipath_hwerror_msgs ipath_generic_hwerror_msgs[] = {
+	INFINIPATH_HWE_MSG(IBCBUSFRSPCPARITYERR, "IPATH2IB Parity"),
+	INFINIPATH_HWE_MSG(IBCBUSTOSPCPARITYERR, "IB2IPATH Parity"),
+
+	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOBUF),
+	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOPBC),
+	INFINIPATH_HWE_TXEMEMPARITYERR_MSG(PIOLAUNCHFIFO),
+
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(RCVBUF),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(LOOKUPQ),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EAGERTID),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(EXPTID),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(FLAGBUF),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(DATAINFO),
+	INFINIPATH_HWE_RXEMEMPARITYERR_MSG(HDRINFO),
+};
+
+/**
+ * ipath_format_hwmsg - format a single hwerror message
+ * @msg message buffer
+ * @msgl length of message buffer
+ * @hwmsg message to add to message buffer
+ */
+static void ipath_format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
+{
+	strlcat(msg, "[", msgl);
+	strlcat(msg, hwmsg, msgl);
+	strlcat(msg, "]", msgl);
+}
+
+/**
+ * ipath_format_hwerrors - format hardware error messages for display
+ * @hwerrs hardware errors bit vector
+ * @hwerrmsgs hardware error descriptions
+ * @nhwerrmsgs number of hwerrmsgs
+ * @msg message buffer
+ * @msgl message buffer length
+ */
+void ipath_format_hwerrors(u64 hwerrs,
+			   const struct ipath_hwerror_msgs *hwerrmsgs,
+			   size_t nhwerrmsgs,
+			   char *msg, size_t msgl)
+{
+	int i;
+	const int glen =
+	    sizeof(ipath_generic_hwerror_msgs) /
+	    sizeof(ipath_generic_hwerror_msgs[0]);
+
+	for (i=0; i<glen; i++) {
+		if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
+			ipath_format_hwmsg(msg, msgl,
+					   ipath_generic_hwerror_msgs[i].msg);
+		}
+	}
+
+	for (i=0; i<nhwerrmsgs; i++) {
+		if (hwerrs & hwerrmsgs[i].mask) {
+			ipath_format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
+		}
+	}
+}
+
 /* return the strings for the most common link states */
 static char *ib_linkstate(u32 linkstate)
 {
@@ -404,10 +480,10 @@
 		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
 	}
 
-	if (!noprint && (errs & ~infinipath_e_bitsextant))
+	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
 		ipath_dev_err(dd, "error interrupt with unknown errors "
 			      "%llx set\n", (unsigned long long)
-			      (errs & ~infinipath_e_bitsextant));
+			      (errs & ~dd->ipath_e_bitsextant));
 
 	if (errs & E_SUM_ERRS)
 		ignore_this_time = handle_e_sum_errs(dd, errs);
@@ -478,6 +554,14 @@
 			~(INFINIPATH_E_HARDWARE |
 			  INFINIPATH_E_IBSTATUSCHANGED);
 	}
+
+	/* likely due to cancel, so suppress */
+	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
+		dd->ipath_lastcancel > jiffies) {
+		ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
+		errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
+	}
+
 	if (!errs)
 		return 0;
 
@@ -529,7 +613,7 @@
 				 * don't report same point multiple times,
 				 * except kernel
 				 */
-				tl = (u32) * pd->port_rcvhdrtail_kvaddr;
+				tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
 				if (tl == dd->ipath_lastrcvhdrqtails[i])
 					continue;
 				hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
@@ -729,9 +813,9 @@
 	int rcvdint = 0;
 
 	portr = ((istat >> INFINIPATH_I_RCVAVAIL_SHIFT) &
-		 infinipath_i_rcvavail_mask)
+		 dd->ipath_i_rcvavail_mask)
 		| ((istat >> INFINIPATH_I_RCVURG_SHIFT) &
-		   infinipath_i_rcvurg_mask);
+		   dd->ipath_i_rcvurg_mask);
 	for (i = 1; i < dd->ipath_cfgports; i++) {
 		struct ipath_portdata *pd = dd->ipath_pd[i];
 		if (portr & (1 << i) && pd && pd->port_cnt &&
@@ -808,7 +892,7 @@
 	if (oldhead != curtail) {
 		if (dd->ipath_flags & IPATH_GPIO_INTR) {
 			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-					 (u64) (1 << 2));
+					 (u64) (1 << IPATH_GPIO_PORT0_BIT));
 			istat = port0rbits | INFINIPATH_I_GPIO;
 		}
 		else
@@ -838,10 +922,10 @@
 	if (unexpected)
 		unexpected = 0;
 
-	if (unlikely(istat & ~infinipath_i_bitsextant))
+	if (unlikely(istat & ~dd->ipath_i_bitsextant))
 		ipath_dev_err(dd,
 			      "interrupt with unknown interrupts %x set\n",
-			      istat & (u32) ~ infinipath_i_bitsextant);
+			      istat & (u32) ~ dd->ipath_i_bitsextant);
 	else
 		ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
 
@@ -867,26 +951,80 @@
 
 	if (istat & INFINIPATH_I_GPIO) {
 		/*
-		 * Packets are available in the port 0 rcv queue.
-		 * Eventually this needs to be generalized to check
-		 * IPATH_GPIO_INTR, and the specific GPIO bit, if
-		 * GPIO interrupts are used for anything else.
+		 * GPIO interrupts fall in two broad classes:
+		 * GPIO_2 indicates (on some HT4xx boards) that a packet
+		 *        has arrived for Port 0. Checking for this
+		 *        is controlled by flag IPATH_GPIO_INTR.
+		 * GPIO_3..5 on IBA6120 Rev2 chips indicate errors
+		 *        that we need to count. Checking for this
+		 *        is controlled by flag IPATH_GPIO_ERRINTRS.
 		 */
-		if (unlikely(!(dd->ipath_flags & IPATH_GPIO_INTR))) {
-			u32 gpiostatus;
-			gpiostatus = ipath_read_kreg32(
-				dd, dd->ipath_kregs->kr_gpio_status);
-			ipath_dbg("Unexpected GPIO interrupt bits %x\n",
-				  gpiostatus);
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-					 gpiostatus);
+		u32 gpiostatus;
+		u32 to_clear = 0;
+
+		gpiostatus = ipath_read_kreg32(
+			dd, dd->ipath_kregs->kr_gpio_status);
+		/* First the error-counter case.
+		 */
+		if ((gpiostatus & IPATH_GPIO_ERRINTR_MASK) &&
+		    (dd->ipath_flags & IPATH_GPIO_ERRINTRS)) {
+			/* want to clear the bits we see asserted. */
+			to_clear |= (gpiostatus & IPATH_GPIO_ERRINTR_MASK);
+
+			/*
+			 * Count appropriately, clear bits out of our copy,
+			 * as they have been "handled".
+			 */
+			if (gpiostatus & (1 << IPATH_GPIO_RXUVL_BIT)) {
+				ipath_dbg("FlowCtl on UnsupVL\n");
+				dd->ipath_rxfc_unsupvl_errs++;
+			}
+			if (gpiostatus & (1 << IPATH_GPIO_OVRUN_BIT)) {
+				ipath_dbg("Overrun Threshold exceeded\n");
+				dd->ipath_overrun_thresh_errs++;
+			}
+			if (gpiostatus & (1 << IPATH_GPIO_LLI_BIT)) {
+				ipath_dbg("Local Link Integrity error\n");
+				dd->ipath_lli_errs++;
+			}
+			gpiostatus &= ~IPATH_GPIO_ERRINTR_MASK;
 		}
-		else {
-			/* Clear GPIO status bit 2 */
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
-					(u64) (1 << 2));
+		/* Now the Port0 Receive case */
+		if ((gpiostatus & (1 << IPATH_GPIO_PORT0_BIT)) &&
+		    (dd->ipath_flags & IPATH_GPIO_INTR)) {
+			/*
+			 * GPIO status bit 2 is set, and we expected it.
+			 * clear it and indicate in p0bits.
+			 * This probably only happens if a Port0 pkt
+			 * arrives at _just_ the wrong time, and we
+			 * handle that by seting chk0rcv;
+			 */
+			to_clear |= (1 << IPATH_GPIO_PORT0_BIT);
+			gpiostatus &= ~(1 << IPATH_GPIO_PORT0_BIT);
 			chk0rcv = 1;
 		}
+		if (unlikely(gpiostatus)) {
+			/*
+			 * Some unexpected bits remain. If they could have
+			 * caused the interrupt, complain and clear.
+			 * MEA: this is almost certainly non-ideal.
+			 * we should look into auto-disable of unexpected
+			 * GPIO interrupts, possibly on a "three strikes"
+			 * basis.
+			 */
+			u32 mask;
+			mask = ipath_read_kreg32(
+				dd, dd->ipath_kregs->kr_gpio_mask);
+			if (mask & gpiostatus) {
+				ipath_dbg("Unexpected GPIO IRQ bits %x\n",
+				  gpiostatus & mask);
+				to_clear |= (gpiostatus & mask);
+			}
+		}
+		if (to_clear) {
+			ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_clear,
+					(u64) to_clear);
+		}
 	}
 	chk0rcv |= istat & port0rbits;
 
@@ -911,9 +1049,9 @@
 		istat &= ~port0rbits;
 	}
 
-	if (istat & ((infinipath_i_rcvavail_mask <<
+	if (istat & ((dd->ipath_i_rcvavail_mask <<
 		      INFINIPATH_I_RCVAVAIL_SHIFT)
-		     | (infinipath_i_rcvurg_mask <<
+		     | (dd->ipath_i_rcvurg_mask <<
 			INFINIPATH_I_RCVURG_SHIFT)))
 		handle_urcv(dd, istat);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index a8a5627..d7540b7 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -39,6 +39,8 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
 #include <asm/io.h>
 
 #include "ipath_common.h"
@@ -62,7 +64,7 @@
 	/* rcvhdrq base, needs mmap before useful */
 	void *port_rcvhdrq;
 	/* kernel virtual address where hdrqtail is updated */
-	volatile __le64 *port_rcvhdrtail_kvaddr;
+	void *port_rcvhdrtail_kvaddr;
 	/*
 	 * temp buffer for expected send setup, allocated at open, instead
 	 * of each setup call
@@ -79,8 +81,8 @@
 	dma_addr_t port_rcvhdrq_phys;
 	dma_addr_t port_rcvhdrqtailaddr_phys;
 	/*
-	 * number of opens on this instance (0 or 1; ignoring forks, dup,
-	 * etc. for now)
+	 * number of opens (including slave subports) on this instance
+	 * (ignoring forks, dup, etc. for now)
 	 */
 	int port_cnt;
 	/*
@@ -89,6 +91,10 @@
 	 */
 	/* instead of calculating it */
 	unsigned port_port;
+	/* non-zero if port is being shared. */
+	u16 port_subport_cnt;
+	/* non-zero if port is being shared. */
+	u16 port_subport_id;
 	/* chip offset of PIO buffers for this port */
 	u32 port_piobufs;
 	/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -121,6 +127,16 @@
 	u16 port_pkeys[4];
 	/* so file ops can get at unit */
 	struct ipath_devdata *port_dd;
+	/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
+	void *subport_uregbase;
+	/* An array of pages for the eager receive buffers * N */
+	void *subport_rcvegrbuf;
+	/* An array of pages for the eager header queue entries * N */
+	void *subport_rcvhdr_base;
+	/* The version of the library which opened this port */
+	u32 userversion;
+	/* Bitmask of active slaves */
+	u32 active_slaves;
 };
 
 struct sk_buff;
@@ -132,6 +148,11 @@
 	void *l_arg;
 };
 
+struct ipath_skbinfo {
+	struct sk_buff *skb;
+	dma_addr_t phys;
+};
+
 struct ipath_devdata {
 	struct list_head ipath_list;
 
@@ -154,7 +175,7 @@
 	/* ipath_cfgports pointers */
 	struct ipath_portdata **ipath_pd;
 	/* sk_buffs used by port 0 eager receive queue */
-	struct sk_buff **ipath_port0_skbs;
+	struct ipath_skbinfo *ipath_port0_skbinfo;
 	/* kvirt address of 1st 2k pio buffer */
 	void __iomem *ipath_pio2kbase;
 	/* kvirt address of 1st 4k pio buffer */
@@ -315,12 +336,16 @@
 	u8 ipath_ht_slave_off;
 	/* for write combining settings */
 	unsigned long ipath_wc_cookie;
+	unsigned long ipath_wc_base;
+	unsigned long ipath_wc_len;
 	/* ref count for each pkey */
 	atomic_t ipath_pkeyrefs[4];
 	/* shadow copy of all exptids physaddr; used only by funcsim */
 	u64 *ipath_tidsimshadow;
 	/* shadow copy of struct page *'s for exp tid pages */
 	struct page **ipath_pageshadow;
+	/* shadow copy of dma handles for exp tid pages */
+	dma_addr_t *ipath_physshadow;
 	/* lock to workaround chip bug 9437 */
 	spinlock_t ipath_tid_lock;
 
@@ -402,6 +427,9 @@
 	unsigned long ipath_rcvctrl;
 	/* shadow kr_sendctrl */
 	unsigned long ipath_sendctrl;
+	/* ports waiting for PIOavail intr */
+	unsigned long ipath_portpiowait;
+	unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
 
 	/* value we put in kr_rcvhdrcnt */
 	u32 ipath_rcvhdrcnt;
@@ -465,8 +493,6 @@
 	u32 ipath_htwidth;
 	/* HT speed (200,400,800,1000) from HT config */
 	u32 ipath_htspeed;
-	/* ports waiting for PIOavail intr */
-	unsigned long ipath_portpiowait;
 	/*
 	 * number of sequential ibcstatus change for polling active/quiet
 	 * (i.e., link not coming up).
@@ -510,8 +536,47 @@
 	u32 ipath_lli_counter;
 	/* local link integrity errors */
 	u32 ipath_lli_errors;
+	/*
+	 * Above counts only cases where _successive_ LocalLinkIntegrity
+	 * errors were seen in the receive headers of kern-packets.
+	 * Below are the three (monotonically increasing) counters
+	 * maintained via GPIO interrupts on iba6120-rev2.
+	 */
+	u32 ipath_rxfc_unsupvl_errs;
+	u32 ipath_overrun_thresh_errs;
+	u32 ipath_lli_errs;
+
+	/*
+	 * Not all devices managed by a driver instance are the same
+	 * type, so these fields must be per-device.
+	 */
+	u64 ipath_i_bitsextant;
+	ipath_err_t ipath_e_bitsextant;
+	ipath_err_t ipath_hwe_bitsextant;
+
+	/*
+	 * Below should be computable from number of ports,
+	 * since they are never modified.
+	 */
+	u32 ipath_i_rcvavail_mask;
+	u32 ipath_i_rcvurg_mask;
+
+	/*
+	 * Register bits for selecting i2c direction and values, used for
+	 * I2C serial flash.
+	 */
+	u16 ipath_gpio_sda_num;
+	u16 ipath_gpio_scl_num;
+	u64 ipath_gpio_sda;
+	u64 ipath_gpio_scl;
 };
 
+/* Private data for file operations */
+struct ipath_filedata {
+	struct ipath_portdata *pd;
+	unsigned subport;
+	unsigned tidcursor;
+};
 extern struct list_head ipath_dev_list;
 extern spinlock_t ipath_devs_lock;
 extern struct ipath_devdata *ipath_lookup(int unit);
@@ -521,6 +586,7 @@
 void ipath_disable_wc(struct ipath_devdata *dd);
 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
 void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
 
 struct file_operations;
 int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
@@ -572,7 +638,11 @@
 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 
 /* for use in system calls, where we want to know device type, etc. */
-#define port_fp(fp) ((struct ipath_portdata *) (fp)->private_data)
+#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
+#define subport_fp(fp) \
+	((struct ipath_filedata *)(fp)->private_data)->subport
+#define tidcursor_fp(fp) \
+	((struct ipath_filedata *)(fp)->private_data)->tidcursor
 
 /*
  * values for ipath_flags
@@ -612,6 +682,15 @@
 		/* can miss port0 rx interrupts */
 #define IPATH_POLL_RX_INTR  0x40000
 #define IPATH_DISABLED      0x80000 /* administratively disabled */
+		/* Use GPIO interrupts for new counters */
+#define IPATH_GPIO_ERRINTRS 0x100000
+
+/* Bits in GPIO for the added interrupts */
+#define IPATH_GPIO_PORT0_BIT 2
+#define IPATH_GPIO_RXUVL_BIT 3
+#define IPATH_GPIO_OVRUN_BIT 4
+#define IPATH_GPIO_LLI_BIT 5
+#define IPATH_GPIO_ERRINTR_MASK 0x38
 
 /* portdata flag bit offsets */
 		/* waiting for a packet to arrive */
@@ -799,6 +878,13 @@
 int ipathfs_remove_device(struct ipath_devdata *);
 
 /*
+ * dma_addr wrappers - all 0's invalid for hw
+ */
+dma_addr_t ipath_map_page(struct pci_dev *, struct page *, unsigned long,
+			  size_t, int);
+dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
+
+/*
  * Flush write combining store buffers (if present) and perform a write
  * barrier.
  */
@@ -855,4 +941,20 @@
 
 #endif /* _IPATH_DEBUGGING */
 
+/*
+ * this is used for formatting hw error messages...
+ */
+struct ipath_hwerror_msgs {
+	u64 mask;
+	const char *msg;
+};
+
+#define INFINIPATH_HWE_MSG(a, b) { .mask = INFINIPATH_HWE_##a, .msg = b }
+
+/* in ipath_intr.c... */
+void ipath_format_hwerrors(u64 hwerrs,
+			   const struct ipath_hwerror_msgs *hwerrmsgs,
+			   size_t nhwerrmsgs,
+			   char *msg, size_t lmsg);
+
 #endif				/* _IPATH_KERNEL_H */
diff --git a/drivers/infiniband/hw/ipath/ipath_keys.c b/drivers/infiniband/hw/ipath/ipath_keys.c
index ba1b932..9a6cbd0 100644
--- a/drivers/infiniband/hw/ipath/ipath_keys.c
+++ b/drivers/infiniband/hw/ipath/ipath_keys.c
@@ -118,9 +118,10 @@
  * Check the IB SGE for validity and initialize our internal version
  * of it.
  */
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
 		  struct ib_sge *sge, int acc)
 {
+	struct ipath_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
 	struct ipath_mregion *mr;
 	unsigned n, m;
 	size_t off;
@@ -140,7 +141,8 @@
 		goto bail;
 	}
 	mr = rkt->table[(sge->lkey >> (32 - ib_ipath_lkey_table_size))];
-	if (unlikely(mr == NULL || mr->lkey != sge->lkey)) {
+	if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
+		     qp->ibqp.pd != mr->pd)) {
 		ret = 0;
 		goto bail;
 	}
@@ -188,9 +190,10 @@
  *
  * Return 1 if successful, otherwise 0.
  */
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
 		  u32 len, u64 vaddr, u32 rkey, int acc)
 {
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_lkey_table *rkt = &dev->lk_table;
 	struct ipath_sge *sge = &ss->sge;
 	struct ipath_mregion *mr;
@@ -214,7 +217,8 @@
 	}
 
 	mr = rkt->table[(rkey >> (32 - ib_ipath_lkey_table_size))];
-	if (unlikely(mr == NULL || mr->lkey != rkey)) {
+	if (unlikely(mr == NULL || mr->lkey != rkey ||
+		     qp->ibqp.pd != mr->pd)) {
 		ret = 0;
 		goto bail;
 	}
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 72d1db8..25908b0 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -87,7 +87,8 @@
 	struct ipath_devdata *dd = to_idev(ibdev)->dd;
 	u32 vendor, majrev, minrev;
 
-	if (smp->attr_mod)
+	/* GUID 0 is illegal */
+	if (smp->attr_mod || (dd->ipath_guid == 0))
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	nip->base_version = 1;
@@ -131,10 +132,15 @@
 	 * We only support one GUID for now.  If this changes, the
 	 * portinfo.guid_cap field needs to be updated too.
 	 */
-	if (startgx == 0)
-		/* The first is a copy of the read-only HW GUID. */
-		*p = to_idev(ibdev)->dd->ipath_guid;
-	else
+	if (startgx == 0) {
+		__be64 g = to_idev(ibdev)->dd->ipath_guid;
+		if (g == 0)
+			/* GUID 0 is illegal */
+			smp->status |= IB_SMP_INVALID_FIELD;
+		else
+			/* The first is a copy of the read-only HW GUID. */
+			*p = g;
+	} else
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return reply(smp);
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index b36f6fb..a0673c1 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -138,6 +138,7 @@
 		goto bail;
 	}
 
+	mr->mr.pd = pd;
 	mr->mr.user_base = *iova_start;
 	mr->mr.iova = *iova_start;
 	mr->mr.length = 0;
@@ -197,6 +198,7 @@
 		goto bail;
 	}
 
+	mr->mr.pd = pd;
 	mr->mr.user_base = region->user_base;
 	mr->mr.iova = region->virt_base;
 	mr->mr.length = region->length;
@@ -289,6 +291,7 @@
 	 * Resources are allocated but no valid mapping (RKEY can't be
 	 * used).
 	 */
+	fmr->mr.pd = pd;
 	fmr->mr.user_base = 0;
 	fmr->mr.iova = 0;
 	fmr->mr.length = 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 224b0f4..46c1c89 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -335,6 +335,7 @@
 	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 	qp->r_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
 	qp->r_nak_state = 0;
+	qp->r_wrid_valid = 0;
 	qp->s_rnr_timeout = 0;
 	qp->s_head = 0;
 	qp->s_tail = 0;
@@ -342,6 +343,7 @@
 	qp->s_last = 0;
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
+	qp->s_wait_credit = 0;
 	if (qp->r_rq.wq) {
 		qp->r_rq.wq->head = 0;
 		qp->r_rq.wq->tail = 0;
@@ -352,12 +354,13 @@
 /**
  * ipath_error_qp - put a QP into an error state
  * @qp: the QP to put into an error state
+ * @err: the receive completion error to signal if a RWQE is active
  *
  * Flushes both send and receive work queues.
  * QP s_lock should be held and interrupts disabled.
  */
 
-void ipath_error_qp(struct ipath_qp *qp)
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
 {
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
@@ -373,7 +376,6 @@
 		list_del_init(&qp->piowait);
 	spin_unlock(&dev->pending_lock);
 
-	wc.status = IB_WC_WR_FLUSH_ERR;
 	wc.vendor_err = 0;
 	wc.byte_len = 0;
 	wc.imm_data = 0;
@@ -385,6 +387,12 @@
 	wc.sl = 0;
 	wc.dlid_path_bits = 0;
 	wc.port_num = 0;
+	if (qp->r_wrid_valid) {
+		qp->r_wrid_valid = 0;
+		wc.status = err;
+		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
+	}
+	wc.status = IB_WC_WR_FLUSH_ERR;
 
 	while (qp->s_last != qp->s_head) {
 		struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
@@ -501,7 +509,7 @@
 		break;
 
 	case IB_QPS_ERR:
-		ipath_error_qp(qp);
+		ipath_error_qp(qp, IB_WC_GENERAL_ERR);
 		break;
 
 	default:
@@ -516,7 +524,7 @@
 		qp->remote_qpn = attr->dest_qp_num;
 
 	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_next_psn = attr->sq_psn;
+		qp->s_psn = qp->s_next_psn = attr->sq_psn;
 		qp->s_last_psn = qp->s_next_psn - 1;
 	}
 
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index a086540..a504cf6 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -201,6 +201,18 @@
 	    qp->s_rnr_timeout)
 		goto done;
 
+	/* Limit the number of packets sent without an ACK. */
+	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) {
+		qp->s_wait_credit = 1;
+		dev->n_rc_stalls++;
+		spin_lock(&dev->pending_lock);
+		if (list_empty(&qp->timerwait))
+			list_add_tail(&qp->timerwait,
+				      &dev->pending[dev->pending_index]);
+		spin_unlock(&dev->pending_lock);
+		goto done;
+	}
+
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 	bth0 = 0;
@@ -221,7 +233,7 @@
 			/* Check if send work queue is empty. */
 			if (qp->s_tail == qp->s_head)
 				goto done;
-			qp->s_psn = wqe->psn = qp->s_next_psn;
+			wqe->psn = qp->s_next_psn;
 			newreq = 1;
 		}
 		/*
@@ -393,12 +405,6 @@
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
-			/*
-			 * Request an ACK every 1/2 MB to avoid retransmit
-			 * timeouts.
-			 */
-			if (((wqe->length - len) % (512 * 1024)) == 0)
-				bth2 |= 1 << 31;
 			len = pmtu;
 			break;
 		}
@@ -435,12 +441,6 @@
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		if (len > pmtu) {
-			/*
-			 * Request an ACK every 1/2 MB to avoid retransmit
-			 * timeouts.
-			 */
-			if (((wqe->length - len) % (512 * 1024)) == 0)
-				bth2 |= 1 << 31;
 			len = pmtu;
 			break;
 		}
@@ -498,6 +498,8 @@
 		 */
 		goto done;
 	}
+	if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT - 1) >= 0)
+		bth2 |= 1 << 31;	/* Request ACK. */
 	qp->s_len -= len;
 	qp->s_hdrwords = hwords;
 	qp->s_cur_sge = ss;
@@ -737,6 +739,15 @@
 	return;
 }
 
+static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
+{
+	if (qp->s_wait_credit) {
+		qp->s_wait_credit = 0;
+		tasklet_hi_schedule(&qp->s_task);
+	}
+	qp->s_last_psn = psn;
+}
+
 /**
  * do_rc_ack - process an incoming RC ACK
  * @qp: the QP the ACK came in on
@@ -805,7 +816,7 @@
 			 * The last valid PSN seen is the previous
 			 * request's.
 			 */
-			qp->s_last_psn = wqe->psn - 1;
+			update_last_psn(qp, wqe->psn - 1);
 			/* Retry this request. */
 			ipath_restart_rc(qp, wqe->psn, &wc);
 			/*
@@ -864,7 +875,7 @@
 		ipath_get_credit(qp, aeth);
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
 		qp->s_retry = qp->s_retry_cnt;
-		qp->s_last_psn = psn;
+		update_last_psn(qp, psn);
 		ret = 1;
 		goto bail;
 
@@ -883,7 +894,7 @@
 			goto bail;
 
 		/* The last valid PSN is the previous PSN. */
-		qp->s_last_psn = psn - 1;
+		update_last_psn(qp, psn - 1);
 
 		dev->n_rc_resends += (int)qp->s_psn - (int)psn;
 
@@ -898,7 +909,7 @@
 	case 3:		/* NAK */
 		/* The last valid PSN seen is the previous request's. */
 		if (qp->s_last != qp->s_tail)
-			qp->s_last_psn = wqe->psn - 1;
+			update_last_psn(qp, wqe->psn - 1);
 		switch ((aeth >> IPATH_AETH_CREDIT_SHIFT) &
 			IPATH_AETH_CREDIT_MASK) {
 		case 0:	/* PSN sequence error */
@@ -1071,7 +1082,7 @@
 		 * since we don't want s_sge modified.
 		 */
 		qp->s_len -= pmtu;
-		qp->s_last_psn = psn;
+		update_last_psn(qp, psn);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		ipath_copy_sge(&qp->s_sge, data, pmtu);
 		goto bail;
@@ -1223,7 +1234,7 @@
 			 * Address range must be a subset of the original
 			 * request and start on pmtu boundaries.
 			 */
-			ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
+			ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
 					   qp->s_rdma_len, vaddr, rkey,
 					   IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok)) {
@@ -1282,6 +1293,14 @@
 	return 1;
 }
 
+static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
+{
+	spin_lock_irq(&qp->s_lock);
+	qp->state = IB_QPS_ERR;
+	ipath_error_qp(qp, err);
+	spin_unlock_irq(&qp->s_lock);
+}
+
 /**
  * ipath_rc_rcv - process an incoming RC packet
  * @dev: the device this packet came in on
@@ -1309,6 +1328,10 @@
 	struct ib_reth *reth;
 	int header_in_data;
 
+	/* Validate the SLID. See Ch. 9.6.1.5 */
+	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+		goto done;
+
 	/* Check for GRH */
 	if (!has_grh) {
 		ohdr = &hdr->u.oth;
@@ -1370,8 +1393,7 @@
 		 */
 		if (qp->r_ack_state >= OP(COMPARE_SWAP))
 			goto send_ack;
-		/* XXX Flush WQEs */
-		qp->state = IB_QPS_ERR;
+		ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
 		qp->r_ack_state = OP(SEND_ONLY);
 		qp->r_nak_state = IB_NAK_INVALID_REQUEST;
 		qp->r_ack_psn = qp->r_psn;
@@ -1477,9 +1499,9 @@
 			goto nack_inv;
 		ipath_copy_sge(&qp->r_sge, data, tlen);
 		qp->r_msn++;
-		if (opcode == OP(RDMA_WRITE_LAST) ||
-		    opcode == OP(RDMA_WRITE_ONLY))
+		if (!qp->r_wrid_valid)
 			break;
+		qp->r_wrid_valid = 0;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
 		wc.opcode = IB_WC_RECV;
@@ -1517,7 +1539,7 @@
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(dev, &qp->r_sge,
+			ok = ipath_rkey_ok(qp, &qp->r_sge,
 					   qp->r_len, vaddr, rkey,
 					   IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
@@ -1559,7 +1581,7 @@
 			int ok;
 
 			/* Check rkey & NAK */
-			ok = ipath_rkey_ok(dev, &qp->s_rdma_sge,
+			ok = ipath_rkey_ok(qp, &qp->s_rdma_sge,
 					   qp->s_rdma_len, vaddr, rkey,
 					   IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok)) {
@@ -1618,7 +1640,7 @@
 			goto nack_inv;
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
-		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge,
+		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
 					    sizeof(u64), vaddr, rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc;
@@ -1670,8 +1692,7 @@
 	 * is pending though.
 	 */
 	if (qp->r_ack_state < OP(COMPARE_SWAP)) {
-		/* XXX Flush WQEs */
-		qp->state = IB_QPS_ERR;
+		ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR);
 		qp->r_ack_state = OP(RDMA_WRITE_ONLY);
 		qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
 		qp->r_ack_psn = qp->r_psn;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 6e23b3d..dffc760 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -134,10 +134,24 @@
 #define INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT 40
 #define INFINIPATH_HWE_RXEMEMPARITYERR_MASK 0x7FULL
 #define INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT 44
-#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
-#define INFINIPATH_HWE_MEMBISTFAILED        0x0040000000000000ULL
 #define INFINIPATH_HWE_IBCBUSTOSPCPARITYERR 0x4000000000000000ULL
 #define INFINIPATH_HWE_IBCBUSFRSPCPARITYERR 0x8000000000000000ULL
+/* txe mem parity errors (shift by INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF	0x1ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC	0x2ULL
+#define INFINIPATH_HWE_TXEMEMPARITYERR_PIOLAUNCHFIFO 0x4ULL
+/* rxe mem parity errors (shift by INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) */
+#define INFINIPATH_HWE_RXEMEMPARITYERR_RCVBUF   0x01ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_LOOKUPQ  0x02ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID 0x04ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_EXPTID   0x08ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_FLAGBUF  0x10ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_DATAINFO 0x20ULL
+#define INFINIPATH_HWE_RXEMEMPARITYERR_HDRINFO  0x40ULL
+/* waldo specific -- find the rest in ipath_6110.c */
+#define INFINIPATH_HWE_RXDSYNCMEMPARITYERR  0x0000000400000000ULL
+/* monty specific -- find the rest in ipath_6120.c */
+#define INFINIPATH_HWE_MEMBISTFAILED	0x0040000000000000ULL
 
 /* kr_hwdiagctrl bits */
 #define INFINIPATH_DC_FORCETXEMEMPARITYERR_MASK 0xFULL
@@ -209,9 +223,9 @@
 
 /* combination link status states that we use with some frequency */
 #define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
-		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
+		<< INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
 		(INFINIPATH_IBCS_LINKSTATE_MASK \
-		<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
+		<<INFINIPATH_IBCS_LINKSTATE_SHIFT))
 #define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
 		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
 		(INFINIPATH_IBCS_LT_STATE_LINKUP \
@@ -302,6 +316,17 @@
 
 typedef u64 ipath_err_t;
 
+/* The following change with the type of device, so
+ * need to be part of the ipath_devdata struct, or
+ * we could have problems plugging in devices of
+ * different types (e.g. one HT, one PCIE)
+ * in one system, to be managed by one driver.
+ * On the other hand, this file is may also be included
+ * by other code, so leave the declarations here
+ * temporarily. Minor footprint issue if common-model
+ * linker used, none if C89+ linker used.
+ */
+
 /* mask of defined bits for various registers */
 extern u64 infinipath_i_bitsextant;
 extern ipath_err_t infinipath_e_bitsextant, infinipath_hwe_bitsextant;
@@ -310,13 +335,6 @@
 extern u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
 
 /*
- * register bits for selecting i2c direction and values, used for I2C serial
- * flash
- */
-extern u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
-extern u64 ipath_gpio_sda, ipath_gpio_scl;
-
-/*
  * These are the infinipath general register numbers (not offsets).
  * The kernel registers are used directly, those beyond the kernel
  * registers are calculated from one of the base registers.  The use of
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 5c1da2d..f753051 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -108,7 +108,6 @@
 
 static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
 {
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	int user = to_ipd(qp->ibqp.pd)->user;
 	int i, j, ret;
 	struct ib_wc wc;
@@ -119,8 +118,7 @@
 			continue;
 		/* Check LKEY */
 		if ((user && wqe->sg_list[i].lkey == 0) ||
-		    !ipath_lkey_ok(&dev->lk_table,
-				   &qp->r_sg_list[j], &wqe->sg_list[i],
+		    !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
 				   IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
 		qp->r_len += wqe->sg_list[i].length;
@@ -231,6 +229,7 @@
 		}
 	}
 	spin_unlock_irqrestore(&rq->lock, flags);
+	qp->r_wrid_valid = 1;
 
 bail:
 	return ret;
@@ -326,7 +325,7 @@
 	case IB_WR_RDMA_WRITE:
 		if (wqe->length == 0)
 			break;
-		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, wqe->length,
+		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
 					    IB_ACCESS_REMOTE_WRITE))) {
@@ -350,7 +349,7 @@
 		break;
 
 	case IB_WR_RDMA_READ:
-		if (unlikely(!ipath_rkey_ok(dev, &sqp->s_sge, wqe->length,
+		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
 					    IB_ACCESS_REMOTE_READ)))
@@ -365,7 +364,7 @@
 
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
-		if (unlikely(!ipath_rkey_ok(dev, &qp->r_sge, sizeof(u64),
+		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
 					    IB_ACCESS_REMOTE_ATOMIC)))
@@ -575,8 +574,7 @@
 		}
 		if (wr->sg_list[i].length == 0)
 			continue;
-		if (!ipath_lkey_ok(&to_idev(qp->ibqp.device)->lk_table,
-				   &wqe->sg_list[j], &wr->sg_list[i],
+		if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
 				   acc)) {
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			ret = -EINVAL;
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 941e866..9403350 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -104,11 +104,6 @@
 	u32 sz;
 	struct ib_srq *ret;
 
-	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
 	if (srq_init_attr->attr.max_wr == 0) {
 		ret = ERR_PTR(-EINVAL);
 		goto done;
@@ -180,10 +175,17 @@
 	spin_lock_init(&srq->rq.lock);
 	srq->rq.wq->head = 0;
 	srq->rq.wq->tail = 0;
-	srq->rq.max_sge = srq_init_attr->attr.max_sge;
 	srq->limit = srq_init_attr->attr.srq_limit;
 
-	dev->n_srqs_allocated++;
+	spin_lock(&dev->n_srqs_lock);
+	if (dev->n_srqs_allocated == ib_ipath_max_srqs) {
+		spin_unlock(&dev->n_srqs_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_wq;
+	}
+
+ 	dev->n_srqs_allocated++;
+	spin_unlock(&dev->n_srqs_lock);
 
 	ret = &srq->ibsrq;
 	goto done;
@@ -351,8 +353,13 @@
 	struct ipath_srq *srq = to_isrq(ibsrq);
 	struct ipath_ibdev *dev = to_idev(ibsrq->device);
 
+	spin_lock(&dev->n_srqs_lock);
 	dev->n_srqs_allocated--;
-	vfree(srq->rq.wq);
+	spin_unlock(&dev->n_srqs_lock);
+	if (srq->ip)
+		kref_put(&srq->ip->ref, ipath_release_mmap_info);
+	else
+		vfree(srq->rq.wq);
 	kfree(srq);
 
 	return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index e299148..182de34 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -257,7 +257,7 @@
 	struct ipath_devdata *dd = dev_get_drvdata(dev);
 	ssize_t ret;
 	unsigned short guid[8];
-	__be64 nguid;
+	__be64 new_guid;
 	u8 *ng;
 	int i;
 
@@ -266,7 +266,7 @@
 		   &guid[4], &guid[5], &guid[6], &guid[7]) != 8)
 		goto invalid;
 
-	ng = (u8 *) &nguid;
+	ng = (u8 *) &new_guid;
 
 	for (i = 0; i < 8; i++) {
 		if (guid[i] > 0xff)
@@ -274,7 +274,10 @@
 		ng[i] = guid[i];
 	}
 
-	dd->ipath_guid = nguid;
+	if (new_guid == 0)
+		goto invalid;
+
+	dd->ipath_guid = new_guid;
 	dd->ipath_nguid = 1;
 
 	ret = strlen(buf);
@@ -297,6 +300,16 @@
 	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_nguid);
 }
 
+static ssize_t show_nports(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+
+	/* Return the number of user ports available. */
+	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->ipath_cfgports - 1);
+}
+
 static ssize_t show_serial(struct device *dev,
 			   struct device_attribute *attr,
 			   char *buf)
@@ -608,6 +621,7 @@
 static DEVICE_ATTR(mtu, S_IWUSR | S_IRUGO, show_mtu, store_mtu);
 static DEVICE_ATTR(enabled, S_IWUSR | S_IRUGO, show_enabled, store_enabled);
 static DEVICE_ATTR(nguid, S_IRUGO, show_nguid, NULL);
+static DEVICE_ATTR(nports, S_IRUGO, show_nports, NULL);
 static DEVICE_ATTR(reset, S_IWUSR, NULL, store_reset);
 static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
 static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
@@ -623,6 +637,7 @@
 	&dev_attr_mlid.attr,
 	&dev_attr_mtu.attr,
 	&dev_attr_nguid.attr,
+	&dev_attr_nports.attr,
 	&dev_attr_serial.attr,
 	&dev_attr_status.attr,
 	&dev_attr_status_str.attr,
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index 0fd3cde..e636cfd 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -246,6 +246,10 @@
 	struct ib_reth *reth;
 	int header_in_data;
 
+	/* Validate the SLID. See Ch. 9.6.1.5 */
+	if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
+		goto done;
+
 	/* Check for GRH */
 	if (!has_grh) {
 		ohdr = &hdr->u.oth;
@@ -440,7 +444,7 @@
 			int ok;
 
 			/* Check rkey */
-			ok = ipath_rkey_ok(dev, &qp->r_sge, qp->r_len,
+			ok = ipath_rkey_ok(qp, &qp->r_sge, qp->r_len,
 					   vaddr, rkey,
 					   IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok)) {
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 6991d1d..49f1102 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -39,7 +39,6 @@
 static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
 		    u32 *lengthp, struct ipath_sge_state *ss)
 {
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	int user = to_ipd(qp->ibqp.pd)->user;
 	int i, j, ret;
 	struct ib_wc wc;
@@ -50,8 +49,7 @@
 			continue;
 		/* Check LKEY */
 		if ((user && wqe->sg_list[i].lkey == 0) ||
-		    !ipath_lkey_ok(&dev->lk_table,
-				   j ? &ss->sg_list[j - 1] : &ss->sge,
+		    !ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
 				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
 		*lengthp += wqe->sg_list[i].length;
@@ -343,7 +341,7 @@
 
 		if (wr->sg_list[i].length == 0)
 			continue;
-		if (!ipath_lkey_ok(&dev->lk_table, ss.num_sge ?
+		if (!ipath_lkey_ok(qp, ss.num_sge ?
 				   sg_list + ss.num_sge - 1 : &ss.sge,
 				   &wr->sg_list[i], 0)) {
 			ret = -EINVAL;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index e32fca9..413754b 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -90,6 +90,62 @@
 }
 
 /**
+ * ipath_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t ipath_map_page(struct pci_dev *hwdev, struct page *page,
+	unsigned long offset, size_t size, int direction)
+{
+	dma_addr_t phys;
+
+	phys = pci_map_page(hwdev, page, offset, size, direction);
+
+	if (phys == 0) {
+		pci_unmap_page(hwdev, phys, size, direction);
+		phys = pci_map_page(hwdev, page, offset, size, direction);
+		/*
+		 * FIXME: If we get 0 again, we should keep this page,
+		 * map another, then free the 0 page.
+		 */
+	}
+
+	return phys;
+}
+
+/**
+ * ipath_map_single - a safety wrapper around pci_map_single()
+ *
+ * Same idea as ipath_map_page().
+ */
+dma_addr_t ipath_map_single(struct pci_dev *hwdev, void *ptr, size_t size,
+	int direction)
+{
+	dma_addr_t phys;
+
+	phys = pci_map_single(hwdev, ptr, size, direction);
+
+	if (phys == 0) {
+		pci_unmap_single(hwdev, phys, size, direction);
+		phys = pci_map_single(hwdev, ptr, size, direction);
+		/*
+		 * FIXME: If we get 0 again, we should keep this page,
+		 * map another, then free the 0 page.
+		 */
+	}
+
+	return phys;
+}
+
+/**
  * ipath_get_user_pages - lock user pages into memory
  * @start_page: the start page
  * @num_pages: the number of pages
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index b8381c5..42eaed8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -898,7 +898,8 @@
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_erricrccnt) +
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errvcrccnt) +
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_errlpcrccnt) +
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt);
+		ipath_snap_cntr(dd, dd->ipath_cregs->cr_badformatcnt) +
+		dd->ipath_rxfc_unsupvl_errs;
 	cntrs->port_rcv_remphys_errors =
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_rcvebpcnt);
 	cntrs->port_xmit_discards =
@@ -911,8 +912,10 @@
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
 	cntrs->port_rcv_packets =
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
-	cntrs->local_link_integrity_errors = dd->ipath_lli_errors;
-	cntrs->excessive_buffer_overrun_errors = 0; /* XXX */
+	cntrs->local_link_integrity_errors =
+		(dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+		dd->ipath_lli_errs : dd->ipath_lli_errors;
+	cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs;
 
 	ret = 0;
 
@@ -1199,6 +1202,7 @@
 	struct ipath_ah *ah;
 	struct ib_ah *ret;
 	struct ipath_ibdev *dev = to_idev(pd->device);
+	unsigned long flags;
 
 	/* A multicast address requires a GRH (see ch. 8.4.1). */
 	if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
@@ -1225,16 +1229,16 @@
 		goto bail;
 	}
 
-	spin_lock(&dev->n_ahs_lock);
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
 	if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
-		spin_unlock(&dev->n_ahs_lock);
+		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 		kfree(ah);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail;
 	}
 
 	dev->n_ahs_allocated++;
-	spin_unlock(&dev->n_ahs_lock);
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
 	/* ib_create_ah() will initialize ah->ibah. */
 	ah->attr = *ah_attr;
@@ -1255,10 +1259,11 @@
 {
 	struct ipath_ibdev *dev = to_idev(ibah->device);
 	struct ipath_ah *ah = to_iah(ibah);
+	unsigned long flags;
 
-	spin_lock(&dev->n_ahs_lock);
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
 	dev->n_ahs_allocated--;
-	spin_unlock(&dev->n_ahs_lock);
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
 	kfree(ah);
 
@@ -1380,11 +1385,13 @@
 	 * processing.
 	 */
 	if (dd->ipath_flags & IPATH_GPIO_INTR) {
+		u64 val;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
 				 0x2074076542310ULL);
 		/* Enable GPIO bit 2 interrupt */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
-				 (u64) (1 << 2));
+		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+		val |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
+		ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
 	}
 
 	init_timer(&dd->verbs_timer);
@@ -1399,8 +1406,17 @@
 static int disable_timer(struct ipath_devdata *dd)
 {
 	/* Disable GPIO bit 2 interrupt */
-	if (dd->ipath_flags & IPATH_GPIO_INTR)
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 0);
+	if (dd->ipath_flags & IPATH_GPIO_INTR) {
+                u64 val;
+                /* Disable GPIO bit 2 interrupt */
+                val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_mask);
+                val &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
+                ipath_write_kreg( dd, dd->ipath_kregs->kr_gpio_mask, val);
+		/*
+		 * We might want to undo changes to debugportselect,
+		 * but how?
+		 */
+	}
 
 	del_timer_sync(&dd->verbs_timer);
 
@@ -1683,6 +1699,7 @@
 		      "RC OTH NAKs %d\n"
 		      "RC timeouts %d\n"
 		      "RC RDMA dup %d\n"
+		      "RC stalls   %d\n"
 		      "piobuf wait %d\n"
 		      "no piobuf   %d\n"
 		      "PKT drops   %d\n"
@@ -1690,7 +1707,7 @@
 		      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
 		      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
 		      dev->n_other_naks, dev->n_timeouts,
-		      dev->n_rdma_dup_busy, dev->n_piowait,
+		      dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
 		      dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
 	for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
 		const struct ipath_opcode_stats *si = &dev->opstats[i];
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 09bbb3f..8039f6e 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -220,6 +220,7 @@
 };
 
 struct ipath_mregion {
+	struct ib_pd *pd;	/* shares refcnt of ibmr.pd */
 	u64 user_base;		/* User's address for this region */
 	u64 iova;		/* IB start address of this region */
 	size_t length;
@@ -364,12 +365,14 @@
 	u8 r_min_rnr_timer;	/* retry timeout value for RNR NAKs */
 	u8 r_reuse_sge;		/* for UC receive errors */
 	u8 r_sge_inx;		/* current index into sg_list */
+	u8 r_wrid_valid;	/* r_wrid set but CQ entry not yet made */
 	u8 qp_access_flags;
 	u8 s_max_sge;		/* size of s_wq->sg_list */
 	u8 s_retry_cnt;		/* number of times to retry */
 	u8 s_rnr_retry_cnt;
 	u8 s_retry;		/* requester retry counter */
 	u8 s_rnr_retry;		/* requester RNR retry counter */
+	u8 s_wait_credit;	/* limit number of unacked packets sent */
 	u8 s_pkey_index;	/* PKEY index to use */
 	u8 timeout;		/* Timeout for this QP */
 	enum ib_mtu path_mtu;
@@ -393,6 +396,8 @@
 #define IPATH_S_BUSY		0
 #define IPATH_S_SIGNAL_REQ_WR	1
 
+#define IPATH_PSN_CREDIT	2048
+
 /*
  * Since struct ipath_swqe is not a fixed size, we can't simply index into
  * struct ipath_qp.s_wq.  This function does the array index computation.
@@ -521,6 +526,7 @@
 	u32 n_rnr_naks;
 	u32 n_other_naks;
 	u32 n_timeouts;
+	u32 n_rc_stalls;
 	u32 n_pkt_drops;
 	u32 n_vl15_dropped;
 	u32 n_wqe_errs;
@@ -634,6 +640,8 @@
 
 int ipath_destroy_qp(struct ib_qp *ibqp);
 
+void ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err);
+
 int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		    int attr_mask, struct ib_udata *udata);
 
@@ -653,12 +661,6 @@
 
 void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int sig);
 
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
-		  u32 len, u64 vaddr, u32 rkey, int acc);
-
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
-		  struct ib_sge *sge, int acc);
-
 void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length);
 
 void ipath_skip_sge(struct ipath_sge_state *ss, u32 length);
@@ -683,10 +685,10 @@
 
 void ipath_free_lkey(struct ipath_lkey_table *rkt, u32 lkey);
 
-int ipath_lkey_ok(struct ipath_lkey_table *rkt, struct ipath_sge *isge,
+int ipath_lkey_ok(struct ipath_qp *qp, struct ipath_sge *isge,
 		  struct ib_sge *sge, int acc);
 
-int ipath_rkey_ok(struct ipath_ibdev *dev, struct ipath_sge_state *ss,
+int ipath_rkey_ok(struct ipath_qp *qp, struct ipath_sge_state *ss,
 		  u32 len, u64 vaddr, u32 rkey, int acc);
 
 int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
index 036fde6..0095bb7 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
@@ -38,13 +38,23 @@
 #include "ipath_kernel.h"
 
 /**
- * ipath_unordered_wc - indicate whether write combining is ordered
+ * ipath_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: infinipath device
  *
- * PowerPC systems (at least those in the 970 processor family)
- * write partially filled store buffers in address order, but will write
- * completely filled store buffers in "random" order, and therefore must
- * have serialization for correctness with current InfiniPath chips.
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int ipath_enable_wc(struct ipath_devdata *dd)
+{
+	return 0;
+}
+
+/**
+ * ipath_unordered_wc - indicate whether write combining is unordered
  *
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
  */
 int ipath_unordered_wc(void)
 {
diff --git a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
index f8f9e2e..04696e6 100644
--- a/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
+++ b/drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
@@ -123,6 +123,8 @@
 			ipath_cdbg(VERBOSE, "Set mtrr for chip to WC, "
 				   "cookie is %d\n", cookie);
 			dd->ipath_wc_cookie = cookie;
+			dd->ipath_wc_base = (unsigned long) pioaddr;
+			dd->ipath_wc_len = (unsigned long) piolen;
 		}
 	}
 
@@ -136,9 +138,16 @@
 void ipath_disable_wc(struct ipath_devdata *dd)
 {
 	if (dd->ipath_wc_cookie) {
+		int r;
 		ipath_cdbg(VERBOSE, "undoing WCCOMB on pio buffers\n");
-		mtrr_del(dd->ipath_wc_cookie, 0, 0);
-		dd->ipath_wc_cookie = 0;
+		r = mtrr_del(dd->ipath_wc_cookie, dd->ipath_wc_base,
+			     dd->ipath_wc_len);
+		if (r < 0)
+			dev_info(&dd->pcidev->dev,
+				 "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+				 dd->ipath_wc_cookie, dd->ipath_wc_base,
+				 dd->ipath_wc_len, r);
+		dd->ipath_wc_cookie = 0; /* even on failure */
 	}
 }
 
diff --git a/drivers/infiniband/ulp/iser/Kconfig b/drivers/infiniband/ulp/iser/Kconfig
index 365a1b5..aecbb90 100644
--- a/drivers/infiniband/ulp/iser/Kconfig
+++ b/drivers/infiniband/ulp/iser/Kconfig
@@ -1,11 +1,12 @@
 config INFINIBAND_ISER
-	tristate "ISCSI RDMA Protocol"
+	tristate "iSCSI Extensions for RDMA (iSER)"
 	depends on INFINIBAND && SCSI && INET
 	select SCSI_ISCSI_ATTRS
 	---help---
-	  Support for the ISCSI RDMA Protocol over InfiniBand.  This
-	  allows you to access storage devices that speak ISER/ISCSI
-	  over InfiniBand.
+	  Support for the iSCSI Extensions for RDMA (iSER) Protocol
+          over InfiniBand. This allows you to access storage devices
+          that speak iSCSI over iSER over InfiniBand.
 
-	  The ISER protocol is defined by IETF.
-	  See <http://www.ietf.org/>.
+	  The iSER protocol is defined by IETF.
+	  See <http://www.ietf.org/internet-drafts/draft-ietf-ips-iser-05.txt>
+	  and <http://www.infinibandta.org/members/spec/iser_annex_060418.pdf>
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 2a14fe2..eb6f98d 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -317,6 +317,8 @@
 	struct iscsi_iser_conn *iser_conn = conn->dd_data;
 
 	iscsi_conn_teardown(cls_conn);
+	if (iser_conn->ib_conn)
+		iser_conn->ib_conn->iser_conn = NULL;
 	kfree(iser_conn);
 }
 
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 2cf9ae0..9c53916 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -192,7 +192,7 @@
 
 struct iser_dto {
 	struct iscsi_iser_cmd_task *ctask;
-	struct iscsi_iser_conn     *conn;
+	struct iser_conn *ib_conn;
 	int                        notify_enable;
 
 	/* vector of registered buffers */
@@ -355,4 +355,11 @@
 
 int iser_conn_state_comp(struct iser_conn *ib_conn,
 			 enum iser_ib_conn_state comp);
+
+int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
+			    struct iser_data_buf       *data,
+			    enum   iser_data_dir       iser_dir,
+			    enum   dma_data_direction  dma_dir);
+
+void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask);
 #endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ccf56f6..9b3d79c 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -66,42 +66,6 @@
 	dto->regd_vector_len++;
 }
 
-static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
-				  struct iser_data_buf       *data,
-				  enum   iser_data_dir       iser_dir,
-				  enum   dma_data_direction  dma_dir)
-{
-	struct device *dma_device;
-
-	iser_ctask->dir[iser_dir] = 1;
-	dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
-
-	data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir);
-	if (data->dma_nents == 0) {
-		iser_err("dma_map_sg failed!!!\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
-{
-	struct device  *dma_device;
-	struct iser_data_buf *data;
-
-	dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
-
-	if (iser_ctask->dir[ISER_DIR_IN]) {
-		data = &iser_ctask->data[ISER_DIR_IN];
-		dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE);
-	}
-
-	if (iser_ctask->dir[ISER_DIR_OUT]) {
-		data = &iser_ctask->data[ISER_DIR_OUT];
-		dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE);
-	}
-}
-
 /* Register user buffer memory and initialize passive rdma
  *  dto descriptor. Total data size is stored in
  *  iser_ctask->data[ISER_DIR_IN].data_len
@@ -249,7 +213,7 @@
 	}
 
 	recv_dto = &rx_desc->dto;
-	recv_dto->conn          = iser_conn;
+	recv_dto->ib_conn = iser_conn->ib_conn;
 	recv_dto->regd_vector_len = 0;
 
 	regd_hdr = &rx_desc->hdr_regd_buf;
@@ -296,7 +260,7 @@
 	regd_hdr->virt_addr  = tx_desc; /* == &tx_desc->iser_header */
 	regd_hdr->data_size  = ISER_TOTAL_HEADERS_LEN;
 
-	send_dto->conn          = iser_conn;
+	send_dto->ib_conn         = iser_conn->ib_conn;
 	send_dto->notify_enable   = 1;
 	send_dto->regd_vector_len = 0;
 
@@ -588,7 +552,7 @@
 			 unsigned long dto_xfer_len)
 {
 	struct iser_dto        *dto = &rx_desc->dto;
-	struct iscsi_iser_conn *conn = dto->conn;
+	struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
 	struct iscsi_session *session = conn->iscsi_conn->session;
 	struct iscsi_cmd_task *ctask;
 	struct iscsi_iser_cmd_task *iser_ctask;
@@ -641,7 +605,8 @@
 void iser_snd_completion(struct iser_desc *tx_desc)
 {
 	struct iser_dto        *dto = &tx_desc->dto;
-	struct iscsi_iser_conn *iser_conn = dto->conn;
+	struct iser_conn       *ib_conn = dto->ib_conn;
+	struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
 	struct iscsi_conn      *conn = iser_conn->iscsi_conn;
 	struct iscsi_mgmt_task *mtask;
 
@@ -652,7 +617,7 @@
 	if (tx_desc->type == ISCSI_TX_DATAOUT)
 		kmem_cache_free(ig.desc_cache, tx_desc);
 
-	atomic_dec(&iser_conn->ib_conn->post_send_buf_count);
+	atomic_dec(&ib_conn->post_send_buf_count);
 
 	write_lock(conn->recv_lock);
 	if (conn->suspend_tx) {
@@ -698,14 +663,19 @@
 void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask)
 {
 	int deferred;
+	int is_rdma_aligned = 1;
 
 	/* if we were reading, copy back to unaligned sglist,
 	 * anyway dma_unmap and free the copy
 	 */
-	if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL)
+	if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) {
+		is_rdma_aligned = 0;
 		iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN);
-	if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL)
+	}
+	if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
+		is_rdma_aligned = 0;
 		iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT);
+	}
 
 	if (iser_ctask->dir[ISER_DIR_IN]) {
 		deferred = iser_regd_buff_release
@@ -725,7 +695,9 @@
 		}
 	}
 
-	iser_dma_unmap_task_data(iser_ctask);
+       /* if the data was unaligned, it was already unmapped and then copied */
+       if (is_rdma_aligned)
+		iser_dma_unmap_task_data(iser_ctask);
 }
 
 void iser_dto_buffs_release(struct iser_dto *dto)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index d0b03f4..0606744 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -369,6 +369,44 @@
 	}
 }
 
+int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask,
+			    struct iser_data_buf       *data,
+			    enum   iser_data_dir       iser_dir,
+			    enum   dma_data_direction  dma_dir)
+{
+	struct device *dma_device;
+
+	iser_ctask->dir[iser_dir] = 1;
+	dma_device =
+		iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
+
+	data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir);
+	if (data->dma_nents == 0) {
+		iser_err("dma_map_sg failed!!!\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
+{
+	struct device  *dma_device;
+	struct iser_data_buf *data;
+
+	dma_device =
+		iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device;
+
+	if (iser_ctask->dir[ISER_DIR_IN]) {
+		data = &iser_ctask->data[ISER_DIR_IN];
+		dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE);
+	}
+
+	if (iser_ctask->dir[ISER_DIR_OUT]) {
+		data = &iser_ctask->data[ISER_DIR_OUT];
+		dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE);
+	}
+}
+
 /**
  * iser_reg_rdma_mem - Registers memory intended for RDMA,
  * obtaining rkey and va
@@ -394,6 +432,10 @@
 		iser_err("rdma alignment violation %d/%d aligned\n",
 			 aligned_len, mem->size);
 		iser_data_buf_dump(mem);
+
+		/* unmap the command data before accessing it */
+		iser_dma_unmap_task_data(iser_ctask);
+
 		/* allocate copy buf, if we are writing, copy the */
 		/* unaligned scatterlist, dma map the copy        */
 		if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ecdca7f..18a0000 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -571,6 +571,8 @@
 	/* on EVENT_ADDR_ERROR there's no device yet for this conn */
 	if (device != NULL)
 		iser_device_try_release(device);
+	if (ib_conn->iser_conn)
+		ib_conn->iser_conn->ib_conn = NULL;
 	kfree(ib_conn);
 }
 
@@ -694,7 +696,7 @@
 	struct iser_dto   *recv_dto = &rx_desc->dto;
 
 	/* Retrieve conn */
-	ib_conn = recv_dto->conn->ib_conn;
+	ib_conn = recv_dto->ib_conn;
 
 	iser_dto_to_iov(recv_dto, iov, 2);
 
@@ -727,7 +729,7 @@
 	struct iser_conn  *ib_conn;
 	struct iser_dto   *dto = &tx_desc->dto;
 
-	ib_conn = dto->conn->ib_conn;
+	ib_conn = dto->ib_conn;
 
 	iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
 
@@ -774,7 +776,7 @@
 static void iser_handle_comp_error(struct iser_desc *desc)
 {
 	struct iser_dto  *dto     = &desc->dto;
-	struct iser_conn *ib_conn = dto->conn->ib_conn;
+	struct iser_conn *ib_conn = dto->ib_conn;
 
 	iser_dto_buffs_release(dto);