Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 5ac86f5..0c3c695 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -37,58 +37,41 @@
  * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
  */
 
-#include <linux/dma-mapping.h>
-
-#include <asm/bug.h>
-
-#include <rdma/ib_smi.h>
-
-#include "smi.h"
-#include "agent_priv.h"
-#include "mad_priv.h"
 #include "agent.h"
+#include "smi.h"
 
-spinlock_t ib_agent_port_list_lock;
+#define SPFX "ib_agent: "
+
+struct ib_agent_port_private {
+	struct list_head port_list;
+	struct ib_mad_agent *agent[2];
+};
+
+static DEFINE_SPINLOCK(ib_agent_port_list_lock);
 static LIST_HEAD(ib_agent_port_list);
 
-/*
- * Caller must hold ib_agent_port_list_lock
- */
-static inline struct ib_agent_port_private *
-__ib_get_agent_port(struct ib_device *device, int port_num,
-		    struct ib_mad_agent *mad_agent)
+static struct ib_agent_port_private *
+__ib_get_agent_port(struct ib_device *device, int port_num)
 {
 	struct ib_agent_port_private *entry;
 
-	BUG_ON(!(!!device ^ !!mad_agent));  /* Exactly one MUST be (!NULL) */
-
-	if (device) {
-		list_for_each_entry(entry, &ib_agent_port_list, port_list) {
-			if (entry->smp_agent->device == device &&
-			    entry->port_num == port_num)
-				return entry;
-		}
-	} else {
-		list_for_each_entry(entry, &ib_agent_port_list, port_list) {
-			if ((entry->smp_agent == mad_agent) ||
-			    (entry->perf_mgmt_agent == mad_agent))
-				return entry;
-		}
+	list_for_each_entry(entry, &ib_agent_port_list, port_list) {
+		if (entry->agent[0]->device == device &&
+		    entry->agent[0]->port_num == port_num)
+			return entry;
 	}
 	return NULL;
 }
 
-static inline struct ib_agent_port_private *
-ib_get_agent_port(struct ib_device *device, int port_num,
-		  struct ib_mad_agent *mad_agent)
+static struct ib_agent_port_private *
+ib_get_agent_port(struct ib_device *device, int port_num)
 {
 	struct ib_agent_port_private *entry;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ib_agent_port_list_lock, flags);
-	entry = __ib_get_agent_port(device, port_num, mad_agent);
+	entry = __ib_get_agent_port(device, port_num);
 	spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
-
 	return entry;
 }
 
@@ -100,192 +83,76 @@
 
 	if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
 		return 1;
-	port_priv = ib_get_agent_port(device, port_num, NULL);
+
+	port_priv = ib_get_agent_port(device, port_num);
 	if (!port_priv) {
 		printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
-		       "not open\n",
-		       device->name, port_num);
+		       "not open\n", device->name, port_num);
 		return 1;
 	}
 
-	return smi_check_local_smp(port_priv->smp_agent, smp);
+	return smi_check_local_smp(port_priv->agent[0], smp);
 }
 
-static int agent_mad_send(struct ib_mad_agent *mad_agent,
-			  struct ib_agent_port_private *port_priv,
-			  struct ib_mad_private *mad_priv,
-			  struct ib_grh *grh,
-			  struct ib_wc *wc)
-{
-	struct ib_agent_send_wr *agent_send_wr;
-	struct ib_sge gather_list;
-	struct ib_send_wr send_wr;
-	struct ib_send_wr *bad_send_wr;
-	struct ib_ah_attr ah_attr;
-	unsigned long flags;
-	int ret = 1;
-
-	agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL);
-	if (!agent_send_wr)
-		goto out;
-	agent_send_wr->mad = mad_priv;
-
-	gather_list.addr = dma_map_single(mad_agent->device->dma_device,
-					  &mad_priv->mad,
-					  sizeof(mad_priv->mad),
-					  DMA_TO_DEVICE);
-	gather_list.length = sizeof(mad_priv->mad);
-	gather_list.lkey = mad_agent->mr->lkey;
-
-	send_wr.next = NULL;
-	send_wr.opcode = IB_WR_SEND;
-	send_wr.sg_list = &gather_list;
-	send_wr.num_sge = 1;
-	send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */
-	send_wr.wr.ud.timeout_ms = 0;
-	send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
-
-	ah_attr.dlid = wc->slid;
-	ah_attr.port_num = mad_agent->port_num;
-	ah_attr.src_path_bits = wc->dlid_path_bits;
-	ah_attr.sl = wc->sl;
-	ah_attr.static_rate = 0;
-	ah_attr.ah_flags = 0; /* No GRH */
-	if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
-		if (wc->wc_flags & IB_WC_GRH) {
-			ah_attr.ah_flags = IB_AH_GRH;
-			/* Should sgid be looked up ? */
-			ah_attr.grh.sgid_index = 0;
-			ah_attr.grh.hop_limit = grh->hop_limit;
-			ah_attr.grh.flow_label = be32_to_cpu(
-				grh->version_tclass_flow)  & 0xfffff;
-			ah_attr.grh.traffic_class = (be32_to_cpu(
-				grh->version_tclass_flow) >> 20) & 0xff;
-			memcpy(ah_attr.grh.dgid.raw,
-			       grh->sgid.raw,
-			       sizeof(ah_attr.grh.dgid));
-		}
-	}
-
-	agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr);
-	if (IS_ERR(agent_send_wr->ah)) {
-		printk(KERN_ERR SPFX "No memory for address handle\n");
-		kfree(agent_send_wr);
-		goto out;
-	}
-
-	send_wr.wr.ud.ah = agent_send_wr->ah;
-	if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
-		send_wr.wr.ud.pkey_index = wc->pkey_index;
-		send_wr.wr.ud.remote_qkey = IB_QP1_QKEY;
-	} else { 	/* for SMPs */
-		send_wr.wr.ud.pkey_index = 0;
-		send_wr.wr.ud.remote_qkey = 0;
-	}
-	send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr;
-	send_wr.wr_id = (unsigned long)agent_send_wr;
-
-	pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr);
-
-	/* Send */
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
-	if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) {
-		spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
-		dma_unmap_single(mad_agent->device->dma_device,
-				 pci_unmap_addr(agent_send_wr, mapping),
-				 sizeof(mad_priv->mad),
-				 DMA_TO_DEVICE);
-		ib_destroy_ah(agent_send_wr->ah);
-		kfree(agent_send_wr);
-	} else {
-		list_add_tail(&agent_send_wr->send_list,
-			      &port_priv->send_posted_list);
-		spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
-		ret = 0;
-	}
-
-out:
-	return ret;
-}
-
-int agent_send(struct ib_mad_private *mad,
-	       struct ib_grh *grh,
-	       struct ib_wc *wc,
-	       struct ib_device *device,
-	       int port_num)
+int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+			struct ib_wc *wc, struct ib_device *device,
+			int port_num, int qpn)
 {
 	struct ib_agent_port_private *port_priv;
-	struct ib_mad_agent *mad_agent;
+	struct ib_mad_agent *agent;
+	struct ib_mad_send_buf *send_buf;
+	struct ib_ah *ah;
+	int ret;
 
-	port_priv = ib_get_agent_port(device, port_num, NULL);
+	port_priv = ib_get_agent_port(device, port_num);
 	if (!port_priv) {
-		printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n",
-		       device->name, port_num);
-		return 1;
+		printk(KERN_ERR SPFX "Unable to find port agent\n");
+		return -ENODEV;
 	}
 
-	/* Get mad agent based on mgmt_class in MAD */
-	switch (mad->mad.mad.mad_hdr.mgmt_class) {
-		case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
-		case IB_MGMT_CLASS_SUBN_LID_ROUTED:
-			mad_agent = port_priv->smp_agent;
-			break;
-		case IB_MGMT_CLASS_PERF_MGMT:
-			mad_agent = port_priv->perf_mgmt_agent;
-			break;
-		default:
-			return 1;
+	agent = port_priv->agent[qpn];
+	ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
+	if (IS_ERR(ah)) {
+		ret = PTR_ERR(ah);
+		printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret);
+		return ret;
 	}
 
-	return agent_mad_send(mad_agent, port_priv, mad, grh, wc);
+	send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
+				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+				      GFP_KERNEL);
+	if (IS_ERR(send_buf)) {
+		ret = PTR_ERR(send_buf);
+		printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret);
+		goto err1;
+	}
+
+	memcpy(send_buf->mad, mad, sizeof *mad);
+	send_buf->ah = ah;
+	if ((ret = ib_post_send_mad(send_buf, NULL))) {
+		printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
+		goto err2;
+	}
+	return 0;
+err2:
+	ib_free_send_mad(send_buf);
+err1:
+	ib_destroy_ah(ah);
+	return ret;
 }
 
 static void agent_send_handler(struct ib_mad_agent *mad_agent,
 			       struct ib_mad_send_wc *mad_send_wc)
 {
-	struct ib_agent_port_private	*port_priv;
-	struct ib_agent_send_wr		*agent_send_wr;
-	unsigned long			flags;
-
-	/* Find matching MAD agent */
-	port_priv = ib_get_agent_port(NULL, 0, mad_agent);
-	if (!port_priv) {
-		printk(KERN_ERR SPFX "agent_send_handler: no matching MAD "
-		       "agent %p\n", mad_agent);
-		return;
-	}
-
-	agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id;
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
-	/* Remove completed send from posted send MAD list */
-	list_del(&agent_send_wr->send_list);
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
-
-	dma_unmap_single(mad_agent->device->dma_device,
-			 pci_unmap_addr(agent_send_wr, mapping),
-			 sizeof(agent_send_wr->mad->mad),
-			 DMA_TO_DEVICE);
-
-	ib_destroy_ah(agent_send_wr->ah);
-
-	/* Release allocated memory */
-	kmem_cache_free(ib_mad_cache, agent_send_wr->mad);
-	kfree(agent_send_wr);
+	ib_destroy_ah(mad_send_wc->send_buf->ah);
+	ib_free_send_mad(mad_send_wc->send_buf);
 }
 
 int ib_agent_port_open(struct ib_device *device, int port_num)
 {
-	int ret;
 	struct ib_agent_port_private *port_priv;
 	unsigned long flags;
-
-	/* First, check if port already open for SMI */
-	port_priv = ib_get_agent_port(device, port_num, NULL);
-	if (port_priv) {
-		printk(KERN_DEBUG SPFX "%s port %d already open\n",
-		       device->name, port_num);
-		return 0;
-	}
+	int ret;
 
 	/* Create new device info */
 	port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
@@ -294,32 +161,25 @@
 		ret = -ENOMEM;
 		goto error1;
 	}
-
 	memset(port_priv, 0, sizeof *port_priv);
-	port_priv->port_num = port_num;
-	spin_lock_init(&port_priv->send_list_lock);
-	INIT_LIST_HEAD(&port_priv->send_posted_list);
 
-	/* Obtain send only MAD agent for SM class (SMI QP) */
-	port_priv->smp_agent = ib_register_mad_agent(device, port_num,
-						     IB_QPT_SMI,
-						     NULL, 0,
+	/* Obtain send only MAD agent for SMI QP */
+	port_priv->agent[0] = ib_register_mad_agent(device, port_num,
+						    IB_QPT_SMI, NULL, 0,
 						    &agent_send_handler,
-						     NULL, NULL);
-
-	if (IS_ERR(port_priv->smp_agent)) {
-		ret = PTR_ERR(port_priv->smp_agent);
+						    NULL, NULL);
+	if (IS_ERR(port_priv->agent[0])) {
+		ret = PTR_ERR(port_priv->agent[0]);
 		goto error2;
 	}
 
-	/* Obtain send only MAD agent for PerfMgmt class (GSI QP) */
-	port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num,
-							   IB_QPT_GSI,
-							   NULL, 0,
-							  &agent_send_handler,
-							   NULL, NULL);
-	if (IS_ERR(port_priv->perf_mgmt_agent)) {
-		ret = PTR_ERR(port_priv->perf_mgmt_agent);
+	/* Obtain send only MAD agent for GSI QP */
+	port_priv->agent[1] = ib_register_mad_agent(device, port_num,
+						    IB_QPT_GSI, NULL, 0,
+						    &agent_send_handler,
+						    NULL, NULL);
+	if (IS_ERR(port_priv->agent[1])) {
+		ret = PTR_ERR(port_priv->agent[1]);
 		goto error3;
 	}
 
@@ -330,7 +190,7 @@
 	return 0;
 
 error3:
-	ib_unregister_mad_agent(port_priv->smp_agent);
+	ib_unregister_mad_agent(port_priv->agent[0]);
 error2:
 	kfree(port_priv);
 error1:
@@ -343,7 +203,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&ib_agent_port_list_lock, flags);
-	port_priv = __ib_get_agent_port(device, port_num, NULL);
+	port_priv = __ib_get_agent_port(device, port_num);
 	if (port_priv == NULL) {
 		spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
 		printk(KERN_ERR SPFX "Port %d not found\n", port_num);
@@ -352,9 +212,8 @@
 	list_del(&port_priv->port_list);
 	spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
 
-	ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
-	ib_unregister_mad_agent(port_priv->smp_agent);
+	ib_unregister_mad_agent(port_priv->agent[1]);
+	ib_unregister_mad_agent(port_priv->agent[0]);
 	kfree(port_priv);
-
 	return 0;
 }
diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h
index d942684..c5f3cfe 100644
--- a/drivers/infiniband/core/agent.h
+++ b/drivers/infiniband/core/agent.h
@@ -39,17 +39,14 @@
 #ifndef __AGENT_H_
 #define __AGENT_H_
 
-extern spinlock_t ib_agent_port_list_lock;
+#include <rdma/ib_mad.h>
 
-extern int ib_agent_port_open(struct ib_device *device,
-			      int port_num);
+extern int ib_agent_port_open(struct ib_device *device, int port_num);
 
 extern int ib_agent_port_close(struct ib_device *device, int port_num);
 
-extern int agent_send(struct ib_mad_private *mad,
-		      struct ib_grh *grh,
-		      struct ib_wc *wc,
-		      struct ib_device *device,
-		      int port_num);
+extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+			       struct ib_wc *wc, struct ib_device *device,
+			       int port_num, int qpn);
 
 #endif	/* __AGENT_H_ */
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h
deleted file mode 100644
index 2ec6d7f..0000000
--- a/drivers/infiniband/core/agent_priv.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Mellanox Technologies Ltd.  All rights reserved.
- * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $
- */
-
-#ifndef __IB_AGENT_PRIV_H__
-#define __IB_AGENT_PRIV_H__
-
-#include <linux/pci.h>
-
-#define SPFX "ib_agent: "
-
-struct ib_agent_send_wr {
-	struct list_head send_list;
-	struct ib_ah *ah;
-	struct ib_mad_private *mad;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
-struct ib_agent_port_private {
-	struct list_head port_list;
-	struct list_head send_posted_list;
-	spinlock_t send_list_lock;
-	int port_num;
-	struct ib_mad_agent *smp_agent;	      /* SM class */
-	struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */
-};
-
-#endif	/* __IB_AGENT_PRIV_H__ */
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 54db6d4..580c3a2 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -135,6 +135,7 @@
 	__be64 tid;
 	__be32 local_qpn;
 	__be32 remote_qpn;
+	enum ib_qp_type qp_type;
 	__be32 sq_psn;
 	__be32 rq_psn;
 	int timeout_ms;
@@ -175,8 +176,7 @@
 
 	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
 			       cm_id_priv->av.pkey_index,
-			       ah, 0, sizeof(struct ib_mad_hdr),
-			       sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
+			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
 	if (IS_ERR(m)) {
 		ib_destroy_ah(ah);
@@ -184,7 +184,8 @@
 	}
 
 	/* Timeout set by caller if response is expected. */
-	m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries;
+	m->ah = ah;
+	m->retries = cm_id_priv->max_cm_retries;
 
 	atomic_inc(&cm_id_priv->refcount);
 	m->context[0] = cm_id_priv;
@@ -205,20 +206,20 @@
 		return PTR_ERR(ah);
 
 	m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
-			       ah, 0, sizeof(struct ib_mad_hdr),
-			       sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
+			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
 	if (IS_ERR(m)) {
 		ib_destroy_ah(ah);
 		return PTR_ERR(m);
 	}
+	m->ah = ah;
 	*msg = m;
 	return 0;
 }
 
 static void cm_free_msg(struct ib_mad_send_buf *msg)
 {
-	ib_destroy_ah(msg->send_wr.wr.ud.ah);
+	ib_destroy_ah(msg->ah);
 	if (msg->context[0])
 		cm_deref_id(msg->context[0]);
 	ib_free_send_mad(msg);
@@ -366,9 +367,15 @@
 		cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
 					  service_node);
 		if ((cur_cm_id_priv->id.service_mask & service_id) ==
-		    (service_mask & cur_cm_id_priv->id.service_id))
-			return cm_id_priv;
-		if (service_id < cur_cm_id_priv->id.service_id)
+		    (service_mask & cur_cm_id_priv->id.service_id) &&
+		    (cm_id_priv->id.device == cur_cm_id_priv->id.device))
+			return cur_cm_id_priv;
+
+		if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
+			link = &(*link)->rb_left;
+		else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
+			link = &(*link)->rb_right;
+		else if (service_id < cur_cm_id_priv->id.service_id)
 			link = &(*link)->rb_left;
 		else
 			link = &(*link)->rb_right;
@@ -378,7 +385,8 @@
 	return NULL;
 }
 
-static struct cm_id_private * cm_find_listen(__be64 service_id)
+static struct cm_id_private * cm_find_listen(struct ib_device *device,
+					     __be64 service_id)
 {
 	struct rb_node *node = cm.listen_service_table.rb_node;
 	struct cm_id_private *cm_id_priv;
@@ -386,9 +394,15 @@
 	while (node) {
 		cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
 		if ((cm_id_priv->id.service_mask & service_id) ==
-		    (cm_id_priv->id.service_mask & cm_id_priv->id.service_id))
+		     cm_id_priv->id.service_id &&
+		    (cm_id_priv->id.device == device))
 			return cm_id_priv;
-		if (service_id < cm_id_priv->id.service_id)
+
+		if (device < cm_id_priv->id.device)
+			node = node->rb_left;
+		else if (device > cm_id_priv->id.device)
+			node = node->rb_right;
+		else if (service_id < cm_id_priv->id.service_id)
 			node = node->rb_left;
 		else
 			node = node->rb_right;
@@ -523,7 +537,8 @@
 	ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
 }
 
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+				 ib_cm_handler cm_handler,
 				 void *context)
 {
 	struct cm_id_private *cm_id_priv;
@@ -535,6 +550,7 @@
 
 	memset(cm_id_priv, 0, sizeof *cm_id_priv);
 	cm_id_priv->id.state = IB_CM_IDLE;
+	cm_id_priv->id.device = device;
 	cm_id_priv->id.cm_handler = cm_handler;
 	cm_id_priv->id.context = context;
 	cm_id_priv->id.remote_cm_qpn = 1;
@@ -662,8 +678,7 @@
 		break;
 	case IB_CM_SIDR_REQ_SENT:
 		cm_id->state = IB_CM_IDLE;
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		break;
 	case IB_CM_SIDR_REQ_RCVD:
@@ -674,8 +689,7 @@
 	case IB_CM_MRA_REQ_RCVD:
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		/* Fall through */
 	case IB_CM_REQ_RCVD:
 	case IB_CM_MRA_REQ_SENT:
@@ -692,8 +706,7 @@
 		ib_send_cm_dreq(cm_id, NULL, 0);
 		goto retest;
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		cm_enter_timewait(cm_id_priv);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		break;
@@ -867,7 +880,6 @@
 		   struct ib_cm_req_param *param)
 {
 	struct cm_id_private *cm_id_priv;
-	struct ib_send_wr *bad_send_wr;
 	struct cm_req_msg *req_msg;
 	unsigned long flags;
 	int ret;
@@ -911,6 +923,7 @@
 	cm_id_priv->responder_resources = param->responder_resources;
 	cm_id_priv->retry_count = param->retry_count;
 	cm_id_priv->path_mtu = param->primary_path->mtu;
+	cm_id_priv->qp_type = param->qp_type;
 
 	ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
 	if (ret)
@@ -919,7 +932,7 @@
 	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
 	cm_format_req(req_msg, cm_id_priv, param);
 	cm_id_priv->tid = req_msg->hdr.tid;
-	cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
 	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
 
 	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
@@ -928,8 +941,7 @@
 				cm_req_get_primary_local_ack_timeout(req_msg);
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				&cm_id_priv->msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		goto error2;
@@ -952,7 +964,6 @@
 			void *ari, u8 ari_length)
 {
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	struct cm_rej_msg *rej_msg, *rcv_msg;
 	int ret;
 
@@ -975,7 +986,7 @@
 		memcpy(rej_msg->ari, ari, ari_length);
 	}
 
-	ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		cm_free_msg(msg);
 
@@ -1047,7 +1058,6 @@
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 	param = &work->cm_event.param.req_rcvd;
 	param->listen_id = listen_id;
-	param->device = cm_id_priv->av.port->mad_agent->device;
 	param->port = cm_id_priv->av.port->port_num;
 	param->primary_path = &work->path[0];
 	if (req_msg->alt_local_lid)
@@ -1156,7 +1166,6 @@
 			       struct cm_id_private *cm_id_priv)
 {
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1185,8 +1194,7 @@
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
-			       &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		goto free;
 	return;
@@ -1226,7 +1234,8 @@
 	}
 
 	/* Find matching listen request. */
-	listen_cm_id_priv = cm_find_listen(req_msg->service_id);
+	listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
+					   req_msg->service_id);
 	if (!listen_cm_id_priv) {
 		spin_unlock_irqrestore(&cm.lock, flags);
 		cm_issue_rej(work->port, work->mad_recv_wc,
@@ -1254,7 +1263,7 @@
 
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 
-	cm_id = ib_create_cm_id(NULL, NULL);
+	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 
@@ -1305,6 +1314,7 @@
 				cm_req_get_primary_local_ack_timeout(req_msg);
 	cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
 	cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
+	cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
 
 	cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
 	cm_process_work(cm_id_priv, work);
@@ -1349,7 +1359,6 @@
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
 	struct cm_rep_msg *rep_msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1371,11 +1380,10 @@
 
 	rep_msg = (struct cm_rep_msg *) msg->mad;
 	cm_format_rep(rep_msg, cm_id_priv, param);
-	msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -1413,7 +1421,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	void *data;
 	int ret;
@@ -1440,8 +1447,7 @@
 	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
 		      private_data, private_data_len);
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -1486,7 +1492,6 @@
 	struct cm_id_private *cm_id_priv;
 	struct cm_rep_msg *rep_msg;
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1514,8 +1519,7 @@
 		goto unlock;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
-			       &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		goto free;
 	goto deref;
@@ -1583,8 +1587,7 @@
 
 	/* todo: handle peer_to_peer */
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1618,8 +1621,7 @@
 		goto out;
 	}
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1658,8 +1660,7 @@
 	}
 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1696,7 +1697,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1718,11 +1718,10 @@
 
 	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
 		       private_data, private_data_len);
-	msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		cm_enter_timewait(cm_id_priv);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1756,7 +1755,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	void *data;
 	int ret;
@@ -1786,8 +1784,7 @@
 	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
 		       private_data, private_data_len);
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
-			       &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -1804,7 +1801,6 @@
 	struct cm_id_private *cm_id_priv;
 	struct cm_dreq_msg *dreq_msg;
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1823,8 +1819,7 @@
 	switch (cm_id_priv->id.state) {
 	case IB_CM_REP_SENT:
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		break;
 	case IB_CM_ESTABLISHED:
 	case IB_CM_MRA_REP_RCVD:
@@ -1838,8 +1833,7 @@
 			       cm_id_priv->private_data_len);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
-		if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				     &msg->send_wr, &bad_send_wr))
+		if (ib_post_send_mad(msg, NULL))
 			cm_free_msg(msg);
 		goto deref;
 	default:
@@ -1886,8 +1880,7 @@
 	}
 	cm_enter_timewait(cm_id_priv);
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1912,7 +1905,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1956,8 +1948,7 @@
 	if (ret)
 		goto out;
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		cm_free_msg(msg);
 
@@ -2033,8 +2024,7 @@
 	case IB_CM_MRA_REQ_RCVD:
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		/* fall through */
 	case IB_CM_REQ_RCVD:
 	case IB_CM_MRA_REQ_SENT:
@@ -2044,8 +2034,7 @@
 			cm_reset_to_idle(cm_id_priv);
 		break;
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		/* fall through */
 	case IB_CM_REP_RCVD:
 	case IB_CM_MRA_REP_SENT:
@@ -2080,7 +2069,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	void *data;
 	unsigned long flags;
 	int ret;
@@ -2104,8 +2092,7 @@
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 			      CM_MSG_RESPONSE_REQ, service_timeout,
 			      private_data, private_data_len);
-		ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				       &msg->send_wr, &bad_send_wr);
+		ret = ib_post_send_mad(msg, NULL);
 		if (ret)
 			goto error2;
 		cm_id->state = IB_CM_MRA_REQ_SENT;
@@ -2118,8 +2105,7 @@
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 			      CM_MSG_RESPONSE_REP, service_timeout,
 			      private_data, private_data_len);
-		ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				       &msg->send_wr, &bad_send_wr);
+		ret = ib_post_send_mad(msg, NULL);
 		if (ret)
 			goto error2;
 		cm_id->state = IB_CM_MRA_REP_SENT;
@@ -2132,8 +2118,7 @@
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 			      CM_MSG_RESPONSE_OTHER, service_timeout,
 			      private_data, private_data_len);
-		ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				       &msg->send_wr, &bad_send_wr);
+		ret = ib_post_send_mad(msg, NULL);
 		if (ret)
 			goto error2;
 		cm_id->lap_state = IB_CM_MRA_LAP_SENT;
@@ -2195,14 +2180,14 @@
 	case IB_CM_REQ_SENT:
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  (unsigned long) cm_id_priv->msg, timeout))
+				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
 		break;
 	case IB_CM_REP_SENT:
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  (unsigned long) cm_id_priv->msg, timeout))
+				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
 		break;
@@ -2210,7 +2195,7 @@
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
 		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  (unsigned long) cm_id_priv->msg, timeout))
+				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
 		break;
@@ -2273,7 +2258,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2294,11 +2278,10 @@
 
 	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
 		      alternate_path, private_data, private_data_len);
-	msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -2342,7 +2325,6 @@
 	struct cm_lap_msg *lap_msg;
 	struct ib_cm_lap_event_param *param;
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2376,8 +2358,7 @@
 			      cm_id_priv->private_data_len);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
-		if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				     &msg->send_wr, &bad_send_wr))
+		if (ib_post_send_mad(msg, NULL))
 			cm_free_msg(msg);
 		goto deref;
 	default:
@@ -2433,7 +2414,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2456,8 +2436,7 @@
 
 	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
 		      info, info_length, private_data, private_data_len);
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -2496,8 +2475,7 @@
 		goto out;
 	}
 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	cm_id_priv->msg = NULL;
 
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -2572,7 +2550,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2595,13 +2572,12 @@
 
 	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
 			   param);
-	msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	if (cm_id->state == IB_CM_IDLE)
-		ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				       &msg->send_wr, &bad_send_wr);
+		ret = ib_post_send_mad(msg, NULL);
 	else
 		ret = -EINVAL;
 
@@ -2629,7 +2605,6 @@
 	param = &work->cm_event.param.sidr_req_rcvd;
 	param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
 	param->listen_id = listen_id;
-	param->device = work->port->mad_agent->device;
 	param->port = work->port->port_num;
 	work->cm_event.private_data = &sidr_req_msg->private_data;
 }
@@ -2642,7 +2617,7 @@
 	struct ib_wc *wc;
 	unsigned long flags;
 
-	cm_id = ib_create_cm_id(NULL, NULL);
+	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -2666,7 +2641,8 @@
 		spin_unlock_irqrestore(&cm.lock, flags);
 		goto out; /* Duplicate message. */
 	}
-	cur_cm_id_priv = cm_find_listen(sidr_req_msg->service_id);
+	cur_cm_id_priv = cm_find_listen(cm_id->device,
+					sidr_req_msg->service_id);
 	if (!cur_cm_id_priv) {
 		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
 		spin_unlock_irqrestore(&cm.lock, flags);
@@ -2715,7 +2691,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2737,8 +2712,7 @@
 
 	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
 			   param);
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -2791,8 +2765,7 @@
 		goto out;
 	}
 	cm_id_priv->id.state = IB_CM_IDLE;
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	cm_format_sidr_rep_event(work);
@@ -2860,9 +2833,7 @@
 static void cm_send_handler(struct ib_mad_agent *mad_agent,
 			    struct ib_mad_send_wc *mad_send_wc)
 {
-	struct ib_mad_send_buf *msg;
-
-	msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id;
+	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
 
 	switch (mad_send_wc->status) {
 	case IB_WC_SUCCESS:
@@ -3064,10 +3035,10 @@
 	case IB_CM_ESTABLISHED:
 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
 				IB_QP_PKEY_INDEX | IB_QP_PORT;
-		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+		qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+					   IB_ACCESS_REMOTE_WRITE;
 		if (cm_id_priv->responder_resources)
-			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_WRITE |
-						    IB_ACCESS_REMOTE_READ;
+			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
 		qp_attr->port_num = cm_id_priv->av.port->port_num;
 		ret = 0;
@@ -3097,14 +3068,18 @@
 	case IB_CM_MRA_REP_RCVD:
 	case IB_CM_ESTABLISHED:
 		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
-				IB_QP_DEST_QPN | IB_QP_RQ_PSN |
-				IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER;
+				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
 		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
 		qp_attr->path_mtu = cm_id_priv->path_mtu;
 		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
 		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
-		qp_attr->max_dest_rd_atomic = cm_id_priv->responder_resources;
-		qp_attr->min_rnr_timer = 0;
+		if (cm_id_priv->qp_type == IB_QPT_RC) {
+			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
+					 IB_QP_MIN_RNR_TIMER;
+			qp_attr->max_dest_rd_atomic =
+					cm_id_priv->responder_resources;
+			qp_attr->min_rnr_timer = 0;
+		}
 		if (cm_id_priv->alt_av.ah_attr.dlid) {
 			*qp_attr_mask |= IB_QP_ALT_PATH;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
@@ -3133,14 +3108,17 @@
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
 	case IB_CM_ESTABLISHED:
-		*qp_attr_mask = IB_QP_STATE | IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
-				IB_QP_RNR_RETRY | IB_QP_SQ_PSN |
-				IB_QP_MAX_QP_RD_ATOMIC;
-		qp_attr->timeout = cm_id_priv->local_ack_timeout;
-		qp_attr->retry_cnt = cm_id_priv->retry_count;
-		qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+		*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
 		qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
-		qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+		if (cm_id_priv->qp_type == IB_QPT_RC) {
+			*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
+					 IB_QP_RNR_RETRY |
+					 IB_QP_MAX_QP_RD_ATOMIC;
+			qp_attr->timeout = cm_id_priv->local_ack_timeout;
+			qp_attr->retry_cnt = cm_id_priv->retry_count;
+			qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
+			qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+		}
 		if (cm_id_priv->alt_av.ah_attr.dlid) {
 			*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
 			qp_attr->path_mig_state = IB_MIG_REARM;
@@ -3323,6 +3301,7 @@
 	flush_workqueue(cm.wq);
 	destroy_workqueue(cm.wq);
 	ib_unregister_client(&cm_client);
+	idr_destroy(&cm.local_id_table);
 }
 
 module_init(ib_cm_init);
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 813ab70..4d3aee9 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -186,6 +186,7 @@
 		req_msg->offset40 = cpu_to_be32((be32_to_cpu(
 						  req_msg->offset40) &
 						   0xFFFFFFF9) | 0x2);
+		break;
 	default:
 		req_msg->offset40 = cpu_to_be32(be32_to_cpu(
 						 req_msg->offset40) &
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d3cf84e..5a6e449 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -514,6 +514,12 @@
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 {
+	if (device->node_type == IB_NODE_SWITCH) {
+		if (port_num)
+			return -EINVAL;
+	} else if (port_num < 1 || port_num > device->phys_port_cnt)
+		return -EINVAL;
+
 	return device->query_port(device, port_num, port_attr);
 }
 EXPORT_SYMBOL(ib_query_port);
@@ -583,6 +589,12 @@
 		   u8 port_num, int port_modify_mask,
 		   struct ib_port_modify *port_modify)
 {
+	if (device->node_type == IB_NODE_SWITCH) {
+		if (port_num)
+			return -EINVAL;
+	} else if (port_num < 1 || port_num > device->phys_port_cnt)
+		return -EINVAL;
+
 	return device->modify_port(device, port_num, port_modify_mask,
 				   port_modify);
 }
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a14ca87..88f9f8c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -579,7 +579,7 @@
 }
 
 static void snoop_send(struct ib_mad_qp_info *qp_info,
-		       struct ib_send_wr *send_wr,
+		       struct ib_mad_send_buf *send_buf,
 		       struct ib_mad_send_wc *mad_send_wc,
 		       int mad_snoop_flags)
 {
@@ -597,7 +597,7 @@
 		atomic_inc(&mad_snoop_priv->refcount);
 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 		mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
-						    send_wr, mad_send_wc);
+						    send_buf, mad_send_wc);
 		if (atomic_dec_and_test(&mad_snoop_priv->refcount))
 			wake_up(&mad_snoop_priv->wait);
 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -654,10 +654,10 @@
  * Return < 0 if error
  */
 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
-				  struct ib_smp *smp,
-				  struct ib_send_wr *send_wr)
+				  struct ib_mad_send_wr_private *mad_send_wr)
 {
 	int ret;
+	struct ib_smp *smp = mad_send_wr->send_buf.mad;
 	unsigned long flags;
 	struct ib_mad_local_private *local;
 	struct ib_mad_private *mad_priv;
@@ -666,6 +666,7 @@
 	struct ib_device *device = mad_agent_priv->agent.device;
 	u8 port_num = mad_agent_priv->agent.port_num;
 	struct ib_wc mad_wc;
+	struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
 
 	if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
 		ret = -EINVAL;
@@ -745,13 +746,7 @@
 		goto out;
 	}
 
-	local->send_wr = *send_wr;
-	local->send_wr.sg_list = local->sg_list;
-	memcpy(local->sg_list, send_wr->sg_list,
-	       sizeof *send_wr->sg_list * send_wr->num_sge);
-	local->send_wr.next = NULL;
-	local->tid = send_wr->wr.ud.mad_hdr->tid;
-	local->wr_id = send_wr->wr_id;
+	local->mad_send_wr = mad_send_wr;
 	/* Reference MAD agent until send side of local completion handled */
 	atomic_inc(&mad_agent_priv->refcount);
 	/* Queue local completion to local list */
@@ -781,17 +776,17 @@
 
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
-					    struct ib_ah *ah, int rmpp_active,
+					    int rmpp_active,
 					    int hdr_len, int data_len,
 					    gfp_t gfp_mask)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
-	struct ib_mad_send_buf *send_buf;
+	struct ib_mad_send_wr_private *mad_send_wr;
 	int buf_size;
 	void *buf;
 
-	mad_agent_priv = container_of(mad_agent,
-				      struct ib_mad_agent_private, agent);
+	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+				      agent);
 	buf_size = get_buf_length(hdr_len, data_len);
 
 	if ((!mad_agent->rmpp_version &&
@@ -799,45 +794,40 @@
 	    (!rmpp_active && buf_size > sizeof(struct ib_mad)))
 		return ERR_PTR(-EINVAL);
 
-	buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask);
+	buf = kmalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
-	memset(buf, 0, sizeof *send_buf + buf_size);
+	memset(buf, 0, sizeof *mad_send_wr + buf_size);
 
-	send_buf = buf + buf_size;
-	send_buf->mad = buf;
+	mad_send_wr = buf + buf_size;
+	mad_send_wr->send_buf.mad = buf;
 
-	send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device,
-					    buf, buf_size, DMA_TO_DEVICE);
-	pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr);
-	send_buf->sge.length = buf_size;
-	send_buf->sge.lkey = mad_agent->mr->lkey;
+	mad_send_wr->mad_agent_priv = mad_agent_priv;
+	mad_send_wr->sg_list[0].length = buf_size;
+	mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
 
-	send_buf->send_wr.wr_id = (unsigned long) send_buf;
-	send_buf->send_wr.sg_list = &send_buf->sge;
-	send_buf->send_wr.num_sge = 1;
-	send_buf->send_wr.opcode = IB_WR_SEND;
-	send_buf->send_wr.send_flags = IB_SEND_SIGNALED;
-	send_buf->send_wr.wr.ud.ah = ah;
-	send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr;
-	send_buf->send_wr.wr.ud.remote_qpn = remote_qpn;
-	send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
-	send_buf->send_wr.wr.ud.pkey_index = pkey_index;
+	mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
+	mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
+	mad_send_wr->send_wr.num_sge = 1;
+	mad_send_wr->send_wr.opcode = IB_WR_SEND;
+	mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
+	mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
+	mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+	mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
 
 	if (rmpp_active) {
-		struct ib_rmpp_mad *rmpp_mad;
-		rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad;
+		struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
 		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
-			offsetof(struct ib_rmpp_mad, data) + data_len);
+						   IB_MGMT_RMPP_HDR + data_len);
 		rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
 		rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
 		ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
 				  IB_MGMT_RMPP_FLAG_ACTIVE);
 	}
 
-	send_buf->mad_agent = mad_agent;
+	mad_send_wr->send_buf.mad_agent = mad_agent;
 	atomic_inc(&mad_agent_priv->refcount);
-	return send_buf;
+	return &mad_send_wr->send_buf;
 }
 EXPORT_SYMBOL(ib_create_send_mad);
 
@@ -847,10 +837,6 @@
 
 	mad_agent_priv = container_of(send_buf->mad_agent,
 				      struct ib_mad_agent_private, agent);
-
-	dma_unmap_single(send_buf->mad_agent->device->dma_device,
-			 pci_unmap_addr(send_buf, mapping),
-			 send_buf->sge.length, DMA_TO_DEVICE);
 	kfree(send_buf->mad);
 
 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
@@ -861,8 +847,10 @@
 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_mad_qp_info *qp_info;
-	struct ib_send_wr *bad_send_wr;
 	struct list_head *list;
+	struct ib_send_wr *bad_send_wr;
+	struct ib_mad_agent *mad_agent;
+	struct ib_sge *sge;
 	unsigned long flags;
 	int ret;
 
@@ -871,10 +859,17 @@
 	mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
 	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
 
+	mad_agent = mad_send_wr->send_buf.mad_agent;
+	sge = mad_send_wr->sg_list;
+	sge->addr = dma_map_single(mad_agent->device->dma_device,
+				   mad_send_wr->send_buf.mad, sge->length,
+				   DMA_TO_DEVICE);
+	pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
+
 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
-		ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp,
-				   &mad_send_wr->send_wr, &bad_send_wr);
+		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
+				   &bad_send_wr);
 		list = &qp_info->send_queue.list;
 	} else {
 		ret = 0;
@@ -886,6 +881,11 @@
 		list_add_tail(&mad_send_wr->mad_list.list, list);
 	}
 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+	if (ret)
+		dma_unmap_single(mad_agent->device->dma_device,
+				 pci_unmap_addr(mad_send_wr, mapping),
+				 sge->length, DMA_TO_DEVICE);
+
 	return ret;
 }
 
@@ -893,45 +893,28 @@
  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
  *  with the registered client
  */
-int ib_post_send_mad(struct ib_mad_agent *mad_agent,
-		     struct ib_send_wr *send_wr,
-		     struct ib_send_wr **bad_send_wr)
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+		     struct ib_mad_send_buf **bad_send_buf)
 {
-	int ret = -EINVAL;
 	struct ib_mad_agent_private *mad_agent_priv;
-
-	/* Validate supplied parameters */
-	if (!bad_send_wr)
-		goto error1;
-
-	if (!mad_agent || !send_wr)
-		goto error2;
-
-	if (!mad_agent->send_handler)
-		goto error2;
-
-	mad_agent_priv = container_of(mad_agent,
-				      struct ib_mad_agent_private,
-				      agent);
+	struct ib_mad_send_buf *next_send_buf;
+	struct ib_mad_send_wr_private *mad_send_wr;
+	unsigned long flags;
+	int ret = -EINVAL;
 
 	/* Walk list of send WRs and post each on send list */
-	while (send_wr) {
-		unsigned long			flags;
-		struct ib_send_wr		*next_send_wr;
-		struct ib_mad_send_wr_private	*mad_send_wr;
-		struct ib_smp			*smp;
+	for (; send_buf; send_buf = next_send_buf) {
 
-		/* Validate more parameters */
-		if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG)
-			goto error2;
+		mad_send_wr = container_of(send_buf,
+					   struct ib_mad_send_wr_private,
+					   send_buf);
+		mad_agent_priv = mad_send_wr->mad_agent_priv;
 
-		if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler)
-			goto error2;
-
-		if (!send_wr->wr.ud.mad_hdr) {
-			printk(KERN_ERR PFX "MAD header must be supplied "
-			       "in WR %p\n", send_wr);
-			goto error2;
+		if (!send_buf->mad_agent->send_handler ||
+		    (send_buf->timeout_ms &&
+		     !send_buf->mad_agent->recv_handler)) {
+			ret = -EINVAL;
+			goto error;
 		}
 
 		/*
@@ -939,40 +922,24 @@
 		 * current one completes, and the user modifies the work
 		 * request associated with the completion
 		 */
-		next_send_wr = (struct ib_send_wr *)send_wr->next;
+		next_send_buf = send_buf->next;
+		mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
 
-		smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr;
-		if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-			ret = handle_outgoing_dr_smp(mad_agent_priv, smp,
-						     send_wr);
+		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
+		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+			ret = handle_outgoing_dr_smp(mad_agent_priv,
+						     mad_send_wr);
 			if (ret < 0)		/* error */
-				goto error2;
+				goto error;
 			else if (ret == 1)	/* locally consumed */
-				goto next;
+				continue;
 		}
 
-		/* Allocate MAD send WR tracking structure */
-		mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC);
-		if (!mad_send_wr) {
-			printk(KERN_ERR PFX "No memory for "
-			       "ib_mad_send_wr_private\n");
-			ret = -ENOMEM;
-			goto error2;
-		}
-		memset(mad_send_wr, 0, sizeof *mad_send_wr);
-
-		mad_send_wr->send_wr = *send_wr;
-		mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
-		memcpy(mad_send_wr->sg_list, send_wr->sg_list,
-		       sizeof *send_wr->sg_list * send_wr->num_sge);
-		mad_send_wr->wr_id = send_wr->wr_id;
-		mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
-		mad_send_wr->mad_agent_priv = mad_agent_priv;
+		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
 		/* Timeout will be updated after send completes */
-		mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
-							ud.timeout_ms);
-		mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
-		/* One reference for each work request to QP + response */
+		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+		mad_send_wr->retries = send_buf->retries;
+		/* Reference for work request to QP + response */
 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
 		mad_send_wr->status = IB_WC_SUCCESS;
 
@@ -995,16 +962,13 @@
 			list_del(&mad_send_wr->agent_list);
 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 			atomic_dec(&mad_agent_priv->refcount);
-			goto error2;
+			goto error;
 		}
-next:
-		send_wr = next_send_wr;
 	}
 	return 0;
-
-error2:
-	*bad_send_wr = send_wr;
-error1:
+error:
+	if (bad_send_buf)
+		*bad_send_buf = send_buf;
 	return ret;
 }
 EXPORT_SYMBOL(ib_post_send_mad);
@@ -1447,8 +1411,7 @@
 		 * of MAD.
 		 */
 		hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
-		list_for_each_entry(entry, &port_priv->agent_list,
-				    agent_list) {
+		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
 			if (entry->agent.hi_tid == hi_tid) {
 				mad_agent = entry;
 				break;
@@ -1571,8 +1534,7 @@
 	 */
 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
 			    agent_list) {
-		if (is_data_mad(mad_agent_priv,
-				mad_send_wr->send_wr.wr.ud.mad_hdr) &&
+		if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
 		    mad_send_wr->tid == tid && mad_send_wr->timeout) {
 			/* Verify request has not been canceled */
 			return (mad_send_wr->status == IB_WC_SUCCESS) ?
@@ -1628,14 +1590,14 @@
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
 		/* Defined behavior is to complete response before request */
-		mad_recv_wc->wc->wr_id = mad_send_wr->wr_id;
+		mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
 						   mad_recv_wc);
 		atomic_dec(&mad_agent_priv->refcount);
 
 		mad_send_wc.status = IB_WC_SUCCESS;
 		mad_send_wc.vendor_err = 0;
-		mad_send_wc.wr_id = mad_send_wr->wr_id;
+		mad_send_wc.send_buf = &mad_send_wr->send_buf;
 		ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
 	} else {
 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
@@ -1728,11 +1690,11 @@
 			if (ret & IB_MAD_RESULT_CONSUMED)
 				goto out;
 			if (ret & IB_MAD_RESULT_REPLY) {
-				/* Send response */
-				if (!agent_send(response, &recv->grh, wc,
-						port_priv->device,
-						port_priv->port_num))
-					response = NULL;
+				agent_send_response(&response->mad.mad,
+						    &recv->grh, wc,
+						    port_priv->device,
+						    port_priv->port_num,
+						    qp_info->qp->qp_num);
 				goto out;
 			}
 		}
@@ -1866,15 +1828,15 @@
 
 	if (mad_send_wr->status != IB_WC_SUCCESS )
 		mad_send_wc->status = mad_send_wr->status;
-	if (ret != IB_RMPP_RESULT_INTERNAL)
+	if (ret == IB_RMPP_RESULT_INTERNAL)
+		ib_rmpp_send_handler(mad_send_wc);
+	else
 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
 						   mad_send_wc);
 
 	/* Release reference on agent taken when sending */
 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
 		wake_up(&mad_agent_priv->wait);
-
-	kfree(mad_send_wr);
 	return;
 done:
 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -1888,6 +1850,7 @@
 	struct ib_mad_qp_info		*qp_info;
 	struct ib_mad_queue		*send_queue;
 	struct ib_send_wr		*bad_send_wr;
+	struct ib_mad_send_wc		mad_send_wc;
 	unsigned long flags;
 	int ret;
 
@@ -1898,6 +1861,9 @@
 	qp_info = send_queue->qp_info;
 
 retry:
+	dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
+			 pci_unmap_addr(mad_send_wr, mapping),
+			 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
 	queued_send_wr = NULL;
 	spin_lock_irqsave(&send_queue->lock, flags);
 	list_del(&mad_list->list);
@@ -1914,17 +1880,17 @@
 	}
 	spin_unlock_irqrestore(&send_queue->lock, flags);
 
-	/* Restore client wr_id in WC and complete send */
-	wc->wr_id = mad_send_wr->wr_id;
+	mad_send_wc.send_buf = &mad_send_wr->send_buf;
+	mad_send_wc.status = wc->status;
+	mad_send_wc.vendor_err = wc->vendor_err;
 	if (atomic_read(&qp_info->snoop_count))
-		snoop_send(qp_info, &mad_send_wr->send_wr,
-			   (struct ib_mad_send_wc *)wc,
+		snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
 			   IB_MAD_SNOOP_SEND_COMPLETIONS);
-	ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc);
+	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
 
 	if (queued_send_wr) {
 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
-				&bad_send_wr);
+				   &bad_send_wr);
 		if (ret) {
 			printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
 			mad_send_wr = queued_send_wr;
@@ -2066,38 +2032,37 @@
 
 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
 				 &cancel_list, agent_list) {
-		mad_send_wc.wr_id = mad_send_wr->wr_id;
+		mad_send_wc.send_buf = &mad_send_wr->send_buf;
+		list_del(&mad_send_wr->agent_list);
 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
 						   &mad_send_wc);
-
-		list_del(&mad_send_wr->agent_list);
-		kfree(mad_send_wr);
 		atomic_dec(&mad_agent_priv->refcount);
 	}
 }
 
 static struct ib_mad_send_wr_private*
-find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id)
+find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
+	     struct ib_mad_send_buf *send_buf)
 {
 	struct ib_mad_send_wr_private *mad_send_wr;
 
 	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
 			    agent_list) {
-		if (mad_send_wr->wr_id == wr_id)
+		if (&mad_send_wr->send_buf == send_buf)
 			return mad_send_wr;
 	}
 
 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
 			    agent_list) {
-		if (is_data_mad(mad_agent_priv,
-				mad_send_wr->send_wr.wr.ud.mad_hdr) &&
-		    mad_send_wr->wr_id == wr_id)
+		if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
+		    &mad_send_wr->send_buf == send_buf)
 			return mad_send_wr;
 	}
 	return NULL;
 }
 
-int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_wr_private *mad_send_wr;
@@ -2107,7 +2072,7 @@
 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
 				      agent);
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
-	mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id);
+	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
 	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 		return -EINVAL;
@@ -2119,7 +2084,7 @@
 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
 	}
 
-	mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms;
+	mad_send_wr->send_buf.timeout_ms = timeout_ms;
 	if (active)
 		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
 	else
@@ -2130,9 +2095,10 @@
 }
 EXPORT_SYMBOL(ib_modify_mad);
 
-void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id)
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+		   struct ib_mad_send_buf *send_buf)
 {
-	ib_modify_mad(mad_agent, wr_id, 0);
+	ib_modify_mad(mad_agent, send_buf, 0);
 }
 EXPORT_SYMBOL(ib_cancel_mad);
 
@@ -2166,10 +2132,9 @@
 			 * Defined behavior is to complete response
 			 * before request
 			 */
-			build_smp_wc(local->wr_id,
+			build_smp_wc((unsigned long) local->mad_send_wr,
 				     be16_to_cpu(IB_LID_PERMISSIVE),
-				     0 /* pkey index */,
-				     recv_mad_agent->agent.port_num, &wc);
+				     0, recv_mad_agent->agent.port_num, &wc);
 
 			local->mad_priv->header.recv_wc.wc = &wc;
 			local->mad_priv->header.recv_wc.mad_len =
@@ -2196,11 +2161,11 @@
 		/* Complete send */
 		mad_send_wc.status = IB_WC_SUCCESS;
 		mad_send_wc.vendor_err = 0;
-		mad_send_wc.wr_id = local->wr_id;
+		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
 		if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
-			snoop_send(mad_agent_priv->qp_info, &local->send_wr,
-				  &mad_send_wc,
-				   IB_MAD_SNOOP_SEND_COMPLETIONS);
+			snoop_send(mad_agent_priv->qp_info,
+				   &local->mad_send_wr->send_buf,
+				   &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
 						   &mad_send_wc);
 
@@ -2221,8 +2186,7 @@
 	if (!mad_send_wr->retries--)
 		return -ETIMEDOUT;
 
-	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr.
-						wr.ud.timeout_ms);
+	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
 
 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
 		ret = ib_retry_rmpp(mad_send_wr);
@@ -2285,11 +2249,10 @@
 			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
 		else
 			mad_send_wc.status = mad_send_wr->status;
-		mad_send_wc.wr_id = mad_send_wr->wr_id;
+		mad_send_wc.send_buf = &mad_send_wr->send_buf;
 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
 						   &mad_send_wc);
 
-		kfree(mad_send_wr);
 		atomic_dec(&mad_agent_priv->refcount);
 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
 	}
@@ -2683,40 +2646,47 @@
 
 static void ib_mad_init_device(struct ib_device *device)
 {
-	int num_ports, cur_port, i;
+	int start, end, i;
 
 	if (device->node_type == IB_NODE_SWITCH) {
-		num_ports = 1;
-		cur_port = 0;
+		start = 0;
+		end   = 0;
 	} else {
-		num_ports = device->phys_port_cnt;
-		cur_port = 1;
+		start = 1;
+		end   = device->phys_port_cnt;
 	}
-	for (i = 0; i < num_ports; i++, cur_port++) {
-		if (ib_mad_port_open(device, cur_port)) {
+
+	for (i = start; i <= end; i++) {
+		if (ib_mad_port_open(device, i)) {
 			printk(KERN_ERR PFX "Couldn't open %s port %d\n",
-			       device->name, cur_port);
-			goto error_device_open;
+			       device->name, i);
+			goto error;
 		}
-		if (ib_agent_port_open(device, cur_port)) {
+		if (ib_agent_port_open(device, i)) {
 			printk(KERN_ERR PFX "Couldn't open %s port %d "
 			       "for agents\n",
-			       device->name, cur_port);
-			goto error_device_open;
+			       device->name, i);
+			goto error_agent;
 		}
 	}
 	return;
 
-error_device_open:
-	while (i > 0) {
-		cur_port--;
-		if (ib_agent_port_close(device, cur_port))
+error_agent:
+	if (ib_mad_port_close(device, i))
+		printk(KERN_ERR PFX "Couldn't close %s port %d\n",
+		       device->name, i);
+
+error:
+	i--;
+
+	while (i >= start) {
+		if (ib_agent_port_close(device, i))
 			printk(KERN_ERR PFX "Couldn't close %s port %d "
 			       "for agents\n",
-			       device->name, cur_port);
-		if (ib_mad_port_close(device, cur_port))
+			       device->name, i);
+		if (ib_mad_port_close(device, i))
 			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
-			       device->name, cur_port);
+			       device->name, i);
 		i--;
 	}
 }
@@ -2754,7 +2724,6 @@
 	int ret;
 
 	spin_lock_init(&ib_mad_port_list_lock);
-	spin_lock_init(&ib_agent_port_list_lock);
 
 	ib_mad_cache = kmem_cache_create("ib_mad",
 					 sizeof(struct ib_mad_private),
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index f1ba794..570f786 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -118,9 +118,10 @@
 	struct ib_mad_list_head mad_list;
 	struct list_head agent_list;
 	struct ib_mad_agent_private *mad_agent_priv;
+	struct ib_mad_send_buf send_buf;
+	DECLARE_PCI_UNMAP_ADDR(mapping)
 	struct ib_send_wr send_wr;
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
-	u64 wr_id;			/* client WR ID */
 	__be64 tid;
 	unsigned long timeout;
 	int retries;
@@ -141,10 +142,7 @@
 	struct list_head completion_list;
 	struct ib_mad_private *mad_priv;
 	struct ib_mad_agent_private *recv_mad_agent;
-	struct ib_send_wr send_wr;
-	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
-	u64 wr_id;			/* client WR ID */
-	__be64 tid;
+	struct ib_mad_send_wr_private *mad_send_wr;
 };
 
 struct ib_mad_mgmt_method_table {
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index e23836d..3249e1d 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -103,12 +103,12 @@
 static int data_offset(u8 mgmt_class)
 {
 	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
-		return offsetof(struct ib_sa_mad, data);
+		return IB_MGMT_SA_HDR;
 	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
 		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
-		return offsetof(struct ib_vendor_mad, data);
+		return IB_MGMT_VENDOR_HDR;
 	else
-		return offsetof(struct ib_rmpp_mad, data);
+		return IB_MGMT_RMPP_HDR;
 }
 
 static void format_ack(struct ib_rmpp_mad *ack,
@@ -135,55 +135,52 @@
 		     struct ib_mad_recv_wc *recv_wc)
 {
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
-	int hdr_len, ret;
+	int ret;
 
-	hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
 	msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
-				 recv_wc->wc->pkey_index, rmpp_recv->ah, 1,
-				 hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len,
-				 GFP_KERNEL);
+				 recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR,
+				 IB_MGMT_RMPP_DATA, GFP_KERNEL);
 	if (!msg)
 		return;
 
-	format_ack((struct ib_rmpp_mad *) msg->mad,
-		   (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
-	ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr,
-			       &bad_send_wr);
+	format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad,
+		   rmpp_recv);
+	msg->ah = rmpp_recv->ah;
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		ib_free_send_mad(msg);
 }
 
-static int alloc_response_msg(struct ib_mad_agent *agent,
-			      struct ib_mad_recv_wc *recv_wc,
-			      struct ib_mad_send_buf **msg)
+static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
+						  struct ib_mad_recv_wc *recv_wc)
 {
-	struct ib_mad_send_buf *m;
+	struct ib_mad_send_buf *msg;
 	struct ib_ah *ah;
-	int hdr_len;
 
 	ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
 				  recv_wc->recv_buf.grh, agent->port_num);
 	if (IS_ERR(ah))
-		return PTR_ERR(ah);
+		return (void *) ah;
 
-	hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
-	m = ib_create_send_mad(agent, recv_wc->wc->src_qp,
-			       recv_wc->wc->pkey_index, ah, 1, hdr_len,
-			       sizeof(struct ib_rmpp_mad) - hdr_len,
-			       GFP_KERNEL);
-	if (IS_ERR(m)) {
+	msg = ib_create_send_mad(agent, recv_wc->wc->src_qp,
+				 recv_wc->wc->pkey_index, 1,
+				 IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA,
+				 GFP_KERNEL);
+	if (IS_ERR(msg))
 		ib_destroy_ah(ah);
-		return PTR_ERR(m);
-	}
-	*msg = m;
-	return 0;
+	else
+		msg->ah = ah;
+
+	return msg;
 }
 
-static void free_msg(struct ib_mad_send_buf *msg)
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
-	ib_destroy_ah(msg->send_wr.wr.ud.ah);
-	ib_free_send_mad(msg);
+	struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
+
+	if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK)
+		ib_destroy_ah(mad_send_wc->send_buf->ah);
+	ib_free_send_mad(mad_send_wc->send_buf);
 }
 
 static void nack_recv(struct ib_mad_agent_private *agent,
@@ -191,14 +188,13 @@
 {
 	struct ib_mad_send_buf *msg;
 	struct ib_rmpp_mad *rmpp_mad;
-	struct ib_send_wr *bad_send_wr;
 	int ret;
 
-	ret = alloc_response_msg(&agent->agent, recv_wc, &msg);
-	if (ret)
+	msg = alloc_response_msg(&agent->agent, recv_wc);
+	if (IS_ERR(msg))
 		return;
 
-	rmpp_mad = (struct ib_rmpp_mad *) msg->mad;
+	rmpp_mad = msg->mad;
 	memcpy(rmpp_mad, recv_wc->recv_buf.mad,
 	       data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
 
@@ -210,9 +206,11 @@
 	rmpp_mad->rmpp_hdr.seg_num = 0;
 	rmpp_mad->rmpp_hdr.paylen_newwin = 0;
 
-	ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr);
-	if (ret)
-		free_msg(msg);
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		ib_destroy_ah(msg->ah);
+		ib_free_send_mad(msg);
+	}
 }
 
 static void recv_timeout_handler(void *data)
@@ -585,7 +583,7 @@
 	int timeout;
 	u32 paylen;
 
-	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+	rmpp_mad = mad_send_wr->send_buf.mad;
 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
 	rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num);
 
@@ -612,7 +610,7 @@
 	}
 
 	/* 2 seconds for an ACK until we can find the packet lifetime */
-	timeout = mad_send_wr->send_wr.wr.ud.timeout_ms;
+	timeout = mad_send_wr->send_buf.timeout_ms;
 	if (!timeout || timeout > 2000)
 		mad_send_wr->timeout = msecs_to_jiffies(2000);
 	mad_send_wr->seg_num++;
@@ -640,7 +638,7 @@
 
 	wc.status = IB_WC_REM_ABORT_ERR;
 	wc.vendor_err = rmpp_status;
-	wc.wr_id = mad_send_wr->wr_id;
+	wc.send_buf = &mad_send_wr->send_buf;
 	ib_mad_complete_send_wr(mad_send_wr, &wc);
 	return;
 out:
@@ -694,12 +692,12 @@
 
 	if (seg_num > mad_send_wr->last_ack) {
 		mad_send_wr->last_ack = seg_num;
-		mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
+		mad_send_wr->retries = mad_send_wr->send_buf.retries;
 	}
 	mad_send_wr->newwin = newwin;
 	if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
 		/* If no response is expected, the ACK completes the send */
-		if (!mad_send_wr->send_wr.wr.ud.timeout_ms) {
+		if (!mad_send_wr->send_buf.timeout_ms) {
 			struct ib_mad_send_wc wc;
 
 			ib_mark_mad_done(mad_send_wr);
@@ -707,13 +705,13 @@
 
 			wc.status = IB_WC_SUCCESS;
 			wc.vendor_err = 0;
-			wc.wr_id = mad_send_wr->wr_id;
+			wc.send_buf = &mad_send_wr->send_buf;
 			ib_mad_complete_send_wr(mad_send_wr, &wc);
 			return;
 		}
 		if (mad_send_wr->refcount == 1)
-			ib_reset_mad_timeout(mad_send_wr, mad_send_wr->
-					     send_wr.wr.ud.timeout_ms);
+			ib_reset_mad_timeout(mad_send_wr,
+					     mad_send_wr->send_buf.timeout_ms);
 	} else if (mad_send_wr->refcount == 1 &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
 		   mad_send_wr->seg_num <= mad_send_wr->total_seg) {
@@ -842,7 +840,7 @@
 	struct ib_rmpp_mad *rmpp_mad;
 	int i, total_len, ret;
 
-	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+	rmpp_mad = mad_send_wr->send_buf.mad;
 	if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 	      IB_MGMT_RMPP_FLAG_ACTIVE))
 		return IB_RMPP_RESULT_UNHANDLED;
@@ -863,7 +861,7 @@
 
         mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
 			(sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
-	mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) -
+	mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
 			   be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
 
 	/* We need to wait for the final ACK even if there isn't a response */
@@ -878,23 +876,15 @@
 			    struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_rmpp_mad *rmpp_mad;
-	struct ib_mad_send_buf *msg;
 	int ret;
 
-	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+	rmpp_mad = mad_send_wr->send_buf.mad;
 	if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 	      IB_MGMT_RMPP_FLAG_ACTIVE))
 		return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
 
-	if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
-		msg = (struct ib_mad_send_buf *) (unsigned long)
-		      mad_send_wc->wr_id;
-		if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK)
-			ib_free_send_mad(msg);
-		else
-			free_msg(msg);
+	if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
 		return IB_RMPP_RESULT_INTERNAL;	 /* ACK, STOP, or ABORT */
-	}
 
 	if (mad_send_wc->status != IB_WC_SUCCESS ||
 	    mad_send_wr->status != IB_WC_SUCCESS)
@@ -905,7 +895,7 @@
 
 	if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
 		mad_send_wr->timeout =
-			msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms);
+			msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
 		return IB_RMPP_RESULT_PROCESSED; /* Send done */
 	}
 
@@ -926,7 +916,7 @@
 	struct ib_rmpp_mad *rmpp_mad;
 	int ret;
 
-	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+	rmpp_mad = mad_send_wr->send_buf.mad;
 	if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 	      IB_MGMT_RMPP_FLAG_ACTIVE))
 		return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h
index c4924df..f0616fd 100644
--- a/drivers/infiniband/core/mad_rmpp.h
+++ b/drivers/infiniband/core/mad_rmpp.h
@@ -51,6 +51,8 @@
 int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
 			    struct ib_mad_send_wc *mad_send_wc);
 
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc);
+
 void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
 
 int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 2626182..89ce9dc 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -73,11 +73,10 @@
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
-	struct ib_sa_port  *port;
-	struct ib_sa_mad   *mad;
-	struct ib_sa_sm_ah *sm_ah;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-	int                 id;
+	struct ib_sa_port      *port;
+	struct ib_mad_send_buf *mad_buf;
+	struct ib_sa_sm_ah     *sm_ah;
+	int			id;
 };
 
 struct ib_sa_service_query {
@@ -426,6 +425,7 @@
 {
 	unsigned long flags;
 	struct ib_mad_agent *agent;
+	struct ib_mad_send_buf *mad_buf;
 
 	spin_lock_irqsave(&idr_lock, flags);
 	if (idr_find(&query_idr, id) != query) {
@@ -433,9 +433,10 @@
 		return;
 	}
 	agent = query->port->agent;
+	mad_buf = query->mad_buf;
 	spin_unlock_irqrestore(&idr_lock, flags);
 
-	ib_cancel_mad(agent, id);
+	ib_cancel_mad(agent, mad_buf);
 }
 EXPORT_SYMBOL(ib_sa_cancel_query);
 
@@ -457,71 +458,46 @@
 
 static int send_mad(struct ib_sa_query *query, int timeout_ms)
 {
-	struct ib_sa_port *port = query->port;
 	unsigned long flags;
-	int ret;
-	struct ib_sge      gather_list;
-	struct ib_send_wr *bad_wr, wr = {
-		.opcode      = IB_WR_SEND,
-		.sg_list     = &gather_list,
-		.num_sge     = 1,
-		.send_flags  = IB_SEND_SIGNALED,
-		.wr	     = {
-			 .ud = {
-				 .mad_hdr     = &query->mad->mad_hdr,
-				 .remote_qpn  = 1,
-				 .remote_qkey = IB_QP1_QKEY,
-				 .timeout_ms  = timeout_ms,
-			 }
-		 }
-	};
+	int ret, id;
 
 retry:
 	if (!idr_pre_get(&query_idr, GFP_ATOMIC))
 		return -ENOMEM;
 	spin_lock_irqsave(&idr_lock, flags);
-	ret = idr_get_new(&query_idr, query, &query->id);
+	ret = idr_get_new(&query_idr, query, &id);
 	spin_unlock_irqrestore(&idr_lock, flags);
 	if (ret == -EAGAIN)
 		goto retry;
 	if (ret)
 		return ret;
 
-	wr.wr_id = query->id;
+	query->mad_buf->timeout_ms  = timeout_ms;
+	query->mad_buf->context[0] = query;
+	query->id = id;
 
-	spin_lock_irqsave(&port->ah_lock, flags);
-	kref_get(&port->sm_ah->ref);
-	query->sm_ah = port->sm_ah;
-	wr.wr.ud.ah  = port->sm_ah->ah;
-	spin_unlock_irqrestore(&port->ah_lock, flags);
+	spin_lock_irqsave(&query->port->ah_lock, flags);
+	kref_get(&query->port->sm_ah->ref);
+	query->sm_ah = query->port->sm_ah;
+	spin_unlock_irqrestore(&query->port->ah_lock, flags);
 
-	gather_list.addr   = dma_map_single(port->agent->device->dma_device,
-					    query->mad,
-					    sizeof (struct ib_sa_mad),
-					    DMA_TO_DEVICE);
-	gather_list.length = sizeof (struct ib_sa_mad);
-	gather_list.lkey   = port->agent->mr->lkey;
-	pci_unmap_addr_set(query, mapping, gather_list.addr);
+	query->mad_buf->ah = query->sm_ah->ah;
 
-	ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
+	ret = ib_post_send_mad(query->mad_buf, NULL);
 	if (ret) {
-		dma_unmap_single(port->agent->device->dma_device,
-				 pci_unmap_addr(query, mapping),
-				 sizeof (struct ib_sa_mad),
-				 DMA_TO_DEVICE);
-		kref_put(&query->sm_ah->ref, free_sm_ah);
 		spin_lock_irqsave(&idr_lock, flags);
-		idr_remove(&query_idr, query->id);
+		idr_remove(&query_idr, id);
 		spin_unlock_irqrestore(&idr_lock, flags);
+
+		kref_put(&query->sm_ah->ref, free_sm_ah);
 	}
 
 	/*
 	 * It's not safe to dereference query any more, because the
 	 * send may already have completed and freed the query in
-	 * another context.  So use wr.wr_id, which has a copy of the
-	 * query's id.
+	 * another context.
 	 */
-	return ret ? ret : wr.wr_id;
+	return ret ? ret : id;
 }
 
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -543,7 +519,6 @@
 
 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 {
-	kfree(sa_query->mad);
 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
 }
 
@@ -583,44 +558,59 @@
 {
 	struct ib_sa_path_query *query;
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
-	struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
-	struct ib_mad_agent *agent  = port->agent;
+	struct ib_sa_port   *port;
+	struct ib_mad_agent *agent;
+	struct ib_sa_mad *mad;
 	int ret;
 
+	if (!sa_dev)
+		return -ENODEV;
+
+	port  = &sa_dev->port[port_num - sa_dev->start_port];
+	agent = port->agent;
+
 	query = kmalloc(sizeof *query, gfp_mask);
 	if (!query)
 		return -ENOMEM;
-	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
-	if (!query->sa_query.mad) {
-		kfree(query);
-		return -ENOMEM;
+
+	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+						     0, IB_MGMT_SA_HDR,
+						     IB_MGMT_SA_DATA, gfp_mask);
+	if (!query->sa_query.mad_buf) {
+		ret = -ENOMEM;
+		goto err1;
 	}
 
 	query->callback = callback;
 	query->context  = context;
 
-	init_mad(query->sa_query.mad, agent);
+	mad = query->sa_query.mad_buf->mad;
+	init_mad(mad, agent);
 
-	query->sa_query.callback              = callback ? ib_sa_path_rec_callback : NULL;
-	query->sa_query.release               = ib_sa_path_rec_release;
-	query->sa_query.port                  = port;
-	query->sa_query.mad->mad_hdr.method   = IB_MGMT_METHOD_GET;
-	query->sa_query.mad->mad_hdr.attr_id  = cpu_to_be16(IB_SA_ATTR_PATH_REC);
-	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
+	query->sa_query.release  = ib_sa_path_rec_release;
+	query->sa_query.port     = port;
+	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
+	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
+	mad->sa_hdr.comp_mask	 = comp_mask;
 
-	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
-		rec, query->sa_query.mad->data);
+	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
 
 	*sa_query = &query->sa_query;
 
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret < 0) {
-		*sa_query = NULL;
-		kfree(query->sa_query.mad);
-		kfree(query);
-	}
+	if (ret < 0)
+		goto err2;
 
 	return ret;
+
+err2:
+	*sa_query = NULL;
+	ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+	kfree(query);
+	return ret;
 }
 EXPORT_SYMBOL(ib_sa_path_rec_get);
 
@@ -643,7 +633,6 @@
 
 static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
 {
-	kfree(sa_query->mad);
 	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
 }
 
@@ -685,10 +674,17 @@
 {
 	struct ib_sa_service_query *query;
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
-	struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
-	struct ib_mad_agent *agent  = port->agent;
+	struct ib_sa_port   *port;
+	struct ib_mad_agent *agent;
+	struct ib_sa_mad *mad;
 	int ret;
 
+	if (!sa_dev)
+		return -ENODEV;
+
+	port  = &sa_dev->port[port_num - sa_dev->start_port];
+	agent = port->agent;
+
 	if (method != IB_MGMT_METHOD_GET &&
 	    method != IB_MGMT_METHOD_SET &&
 	    method != IB_SA_METHOD_DELETE)
@@ -697,38 +693,46 @@
 	query = kmalloc(sizeof *query, gfp_mask);
 	if (!query)
 		return -ENOMEM;
-	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
-	if (!query->sa_query.mad) {
-		kfree(query);
-		return -ENOMEM;
+
+	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+						     0, IB_MGMT_SA_HDR,
+						     IB_MGMT_SA_DATA, gfp_mask);
+	if (!query->sa_query.mad_buf) {
+		ret = -ENOMEM;
+		goto err1;
 	}
 
 	query->callback = callback;
 	query->context  = context;
 
-	init_mad(query->sa_query.mad, agent);
+	mad = query->sa_query.mad_buf->mad;
+	init_mad(mad, agent);
 
-	query->sa_query.callback              = callback ? ib_sa_service_rec_callback : NULL;
-	query->sa_query.release               = ib_sa_service_rec_release;
-	query->sa_query.port                  = port;
-	query->sa_query.mad->mad_hdr.method   = method;
-	query->sa_query.mad->mad_hdr.attr_id  =
-				cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
-	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
+	query->sa_query.release  = ib_sa_service_rec_release;
+	query->sa_query.port     = port;
+	mad->mad_hdr.method	 = method;
+	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
+	mad->sa_hdr.comp_mask	 = comp_mask;
 
 	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
-		rec, query->sa_query.mad->data);
+		rec, mad->data);
 
 	*sa_query = &query->sa_query;
 
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret < 0) {
-		*sa_query = NULL;
-		kfree(query->sa_query.mad);
-		kfree(query);
-	}
+	if (ret < 0)
+		goto err2;
 
 	return ret;
+
+err2:
+	*sa_query = NULL;
+	ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+	kfree(query);
+	return ret;
 }
 EXPORT_SYMBOL(ib_sa_service_rec_query);
 
@@ -751,7 +755,6 @@
 
 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
 {
-	kfree(sa_query->mad);
 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
@@ -768,60 +771,69 @@
 {
 	struct ib_sa_mcmember_query *query;
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
-	struct ib_sa_port   *port   = &sa_dev->port[port_num - sa_dev->start_port];
-	struct ib_mad_agent *agent  = port->agent;
+	struct ib_sa_port   *port;
+	struct ib_mad_agent *agent;
+	struct ib_sa_mad *mad;
 	int ret;
 
+	if (!sa_dev)
+		return -ENODEV;
+
+	port  = &sa_dev->port[port_num - sa_dev->start_port];
+	agent = port->agent;
+
 	query = kmalloc(sizeof *query, gfp_mask);
 	if (!query)
 		return -ENOMEM;
-	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
-	if (!query->sa_query.mad) {
-		kfree(query);
-		return -ENOMEM;
+
+	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+						     0, IB_MGMT_SA_HDR,
+						     IB_MGMT_SA_DATA, gfp_mask);
+	if (!query->sa_query.mad_buf) {
+		ret = -ENOMEM;
+		goto err1;
 	}
 
 	query->callback = callback;
 	query->context  = context;
 
-	init_mad(query->sa_query.mad, agent);
+	mad = query->sa_query.mad_buf->mad;
+	init_mad(mad, agent);
 
-	query->sa_query.callback              = callback ? ib_sa_mcmember_rec_callback : NULL;
-	query->sa_query.release               = ib_sa_mcmember_rec_release;
-	query->sa_query.port                  = port;
-	query->sa_query.mad->mad_hdr.method   = method;
-	query->sa_query.mad->mad_hdr.attr_id  = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
-	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
+	query->sa_query.release  = ib_sa_mcmember_rec_release;
+	query->sa_query.port     = port;
+	mad->mad_hdr.method	 = method;
+	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+	mad->sa_hdr.comp_mask	 = comp_mask;
 
 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
-		rec, query->sa_query.mad->data);
+		rec, mad->data);
 
 	*sa_query = &query->sa_query;
 
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret < 0) {
-		*sa_query = NULL;
-		kfree(query->sa_query.mad);
-		kfree(query);
-	}
+	if (ret < 0)
+		goto err2;
 
 	return ret;
+
+err2:
+	*sa_query = NULL;
+	ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+	kfree(query);
+	return ret;
 }
 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
 
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *mad_send_wc)
 {
-	struct ib_sa_query *query;
+	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
 	unsigned long flags;
 
-	spin_lock_irqsave(&idr_lock, flags);
-	query = idr_find(&query_idr, mad_send_wc->wr_id);
-	spin_unlock_irqrestore(&idr_lock, flags);
-
-	if (!query)
-		return;
-
 	if (query->callback)
 		switch (mad_send_wc->status) {
 		case IB_WC_SUCCESS:
@@ -838,30 +850,25 @@
 			break;
 		}
 
-	dma_unmap_single(agent->device->dma_device,
-			 pci_unmap_addr(query, mapping),
-			 sizeof (struct ib_sa_mad),
-			 DMA_TO_DEVICE);
-	kref_put(&query->sm_ah->ref, free_sm_ah);
-
-	query->release(query);
-
 	spin_lock_irqsave(&idr_lock, flags);
-	idr_remove(&query_idr, mad_send_wc->wr_id);
+	idr_remove(&query_idr, query->id);
 	spin_unlock_irqrestore(&idr_lock, flags);
+
+        ib_free_send_mad(mad_send_wc->send_buf);
+	kref_put(&query->sm_ah->ref, free_sm_ah);
+	query->release(query);
 }
 
 static void recv_handler(struct ib_mad_agent *mad_agent,
 			 struct ib_mad_recv_wc *mad_recv_wc)
 {
 	struct ib_sa_query *query;
-	unsigned long flags;
+	struct ib_mad_send_buf *mad_buf;
 
-	spin_lock_irqsave(&idr_lock, flags);
-	query = idr_find(&query_idr, mad_recv_wc->wc->wr_id);
-	spin_unlock_irqrestore(&idr_lock, flags);
+	mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
+	query = mad_buf->context[0];
 
-	if (query && query->callback) {
+	if (query->callback) {
 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
 			query->callback(query,
 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
@@ -975,6 +982,7 @@
 static void __exit ib_sa_cleanup(void)
 {
 	ib_unregister_client(&sa_client);
+	idr_destroy(&query_idr);
 }
 
 module_init(ib_sa_init);
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index db25503..2b3c401 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -39,6 +39,8 @@
 #ifndef __SMI_H_
 #define __SMI_H_
 
+#include <rdma/ib_smi.h>
+
 int smi_handle_dr_smp_recv(struct ib_smp *smp,
 			   u8 node_type,
 			   int port_num,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 211ba32..7ce7a6c 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -65,6 +65,11 @@
 	int			index;
 };
 
+static inline int ibdev_is_alive(const struct ib_device *dev) 
+{
+	return dev->reg_state == IB_DEV_REGISTERED;
+}
+
 static ssize_t port_attr_show(struct kobject *kobj,
 			      struct attribute *attr, char *buf)
 {
@@ -74,6 +79,8 @@
 
 	if (!port_attr->show)
 		return -EIO;
+	if (!ibdev_is_alive(p->ibdev))
+		return -ENODEV;
 
 	return port_attr->show(p, port_attr, buf);
 }
@@ -581,6 +588,9 @@
 {
 	struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
 
+	if (!ibdev_is_alive(dev))
+		return -ENODEV;
+
 	switch (dev->node_type) {
 	case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
 	case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
@@ -595,6 +605,9 @@
 	struct ib_device_attr attr;
 	ssize_t ret;
 
+	if (!ibdev_is_alive(dev))
+		return -ENODEV;
+
 	ret = ib_query_device(dev, &attr);
 	if (ret)
 		return ret;
@@ -612,6 +625,9 @@
 	struct ib_device_attr attr;
 	ssize_t ret;
 
+	if (!ibdev_is_alive(dev))
+		return -ENODEV;
+
 	ret = ib_query_device(dev, &attr);
 	if (ret)
 		return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 021b8f1..2847756 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -41,37 +41,81 @@
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/cdev.h>
+#include <linux/idr.h>
 
 #include <asm/uaccess.h>
 
-#include "ucm.h"
+#include <rdma/ib_cm.h>
+#include <rdma/ib_user_cm.h>
 
 MODULE_AUTHOR("Libor Michalek");
 MODULE_DESCRIPTION("InfiniBand userspace Connection Manager access");
 MODULE_LICENSE("Dual BSD/GPL");
 
-static int ucm_debug_level;
+struct ib_ucm_device {
+	int			devnum;
+	struct cdev		dev;
+	struct class_device	class_dev;
+	struct ib_device	*ib_dev;
+};
 
-module_param_named(debug_level, ucm_debug_level, int, 0644);
-MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
+struct ib_ucm_file {
+	struct semaphore mutex;
+	struct file *filp;
+	struct ib_ucm_device *device;
+
+	struct list_head  ctxs;
+	struct list_head  events;
+	wait_queue_head_t poll_wait;
+};
+
+struct ib_ucm_context {
+	int                 id;
+	wait_queue_head_t   wait;
+	atomic_t            ref;
+	int		    events_reported;
+
+	struct ib_ucm_file *file;
+	struct ib_cm_id    *cm_id;
+	__u64		   uid;
+
+	struct list_head    events;    /* list of pending events. */
+	struct list_head    file_list; /* member in file ctx list */
+};
+
+struct ib_ucm_event {
+	struct ib_ucm_context *ctx;
+	struct list_head file_list; /* member in file event list */
+	struct list_head ctx_list;  /* member in ctx event list */
+
+	struct ib_cm_id *cm_id;
+	struct ib_ucm_event_resp resp;
+	void *data;
+	void *info;
+	int data_len;
+	int info_len;
+};
 
 enum {
 	IB_UCM_MAJOR = 231,
-	IB_UCM_MINOR = 255
+	IB_UCM_BASE_MINOR = 224,
+	IB_UCM_MAX_DEVICES = 32
 };
 
-#define IB_UCM_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_MINOR)
+#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
 
-#define PFX "UCM: "
+static void ib_ucm_add_one(struct ib_device *device);
+static void ib_ucm_remove_one(struct ib_device *device);
 
-#define ucm_dbg(format, arg...)			\
-	do {					\
-		if (ucm_debug_level > 0)	\
-			printk(KERN_DEBUG PFX format, ## arg); \
-	} while (0)
+static struct ib_client ucm_client = {
+	.name   = "ucm",
+	.add    = ib_ucm_add_one,
+	.remove = ib_ucm_remove_one
+};
 
-static struct semaphore ctx_id_mutex;
-static struct idr       ctx_id_table;
+static DECLARE_MUTEX(ctx_id_mutex);
+static DEFINE_IDR(ctx_id_table);
+static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
 
 static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
 {
@@ -152,17 +196,13 @@
 		goto error;
 
 	list_add_tail(&ctx->file_list, &file->ctxs);
-	ucm_dbg("Allocated CM ID <%d>\n", ctx->id);
 	return ctx;
 
 error:
 	kfree(ctx);
 	return NULL;
 }
-/*
- * Event portion of the API, handle CM events
- * and allow event polling.
- */
+
 static void ib_ucm_event_path_get(struct ib_ucm_path_rec *upath,
 				  struct ib_sa_path_rec	 *kpath)
 {
@@ -209,6 +249,7 @@
 	ureq->retry_count                = kreq->retry_count;
 	ureq->rnr_retry_count            = kreq->rnr_retry_count;
 	ureq->srq                        = kreq->srq;
+	ureq->port			 = kreq->port;
 
 	ib_ucm_event_path_get(&ureq->primary_path, kreq->primary_path);
 	ib_ucm_event_path_get(&ureq->alternate_path, kreq->alternate_path);
@@ -295,6 +336,8 @@
 	case IB_CM_SIDR_REQ_RECEIVED:
 		uvt->resp.u.sidr_req_resp.pkey = 
 					evt->param.sidr_req_rcvd.pkey;
+		uvt->resp.u.sidr_req_resp.port = 
+					evt->param.sidr_req_rcvd.port;
 		uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
 		break;
 	case IB_CM_SIDR_REP_RECEIVED:
@@ -387,9 +430,7 @@
 
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
-	/*
-	 * wait
-	 */
+
 	down(&file->mutex);
 	while (list_empty(&file->events)) {
 
@@ -471,7 +512,6 @@
 	return result;
 }
 
-
 static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
 				const char __user *inbuf,
 				int in_len, int out_len)
@@ -494,29 +534,27 @@
 		return -ENOMEM;
 
 	ctx->uid = cmd.uid;
-	ctx->cm_id = ib_create_cm_id(ib_ucm_event_handler, ctx);
+	ctx->cm_id = ib_create_cm_id(file->device->ib_dev,
+				     ib_ucm_event_handler, ctx);
 	if (IS_ERR(ctx->cm_id)) {
 		result = PTR_ERR(ctx->cm_id);
-		goto err;
+		goto err1;
 	}
 
 	resp.id = ctx->id;
 	if (copy_to_user((void __user *)(unsigned long)cmd.response,
 			 &resp, sizeof(resp))) {
 		result = -EFAULT;
-		goto err;
+		goto err2;
 	}
-
 	return 0;
 
-err:
+err2:
+	ib_destroy_cm_id(ctx->cm_id);
+err1:
 	down(&ctx_id_mutex);
 	idr_remove(&ctx_id_table, ctx->id);
 	up(&ctx_id_mutex);
-
-	if (!IS_ERR(ctx->cm_id))
-		ib_destroy_cm_id(ctx->cm_id);
-
 	kfree(ctx);
 	return result;
 }
@@ -1184,9 +1222,6 @@
 	if (copy_from_user(&hdr, buf, sizeof(hdr)))
 		return -EFAULT;
 
-	ucm_dbg("Write. cmd <%d> in <%d> out <%d> len <%Zu>\n",
-		hdr.cmd, hdr.in, hdr.out, len);
-
 	if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
 		return -EINVAL;
 
@@ -1231,8 +1266,7 @@
 
 	filp->private_data = file;
 	file->filp = filp;
-
-	ucm_dbg("Created struct\n");
+	file->device = container_of(inode->i_cdev, struct ib_ucm_device, dev);
 
 	return 0;
 }
@@ -1263,7 +1297,17 @@
 	return 0;
 }
 
-static struct file_operations ib_ucm_fops = {
+static void ib_ucm_release_class_dev(struct class_device *class_dev)
+{
+	struct ib_ucm_device *dev;
+
+	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+	cdev_del(&dev->dev);
+	clear_bit(dev->devnum, dev_map);
+	kfree(dev);
+}
+
+static struct file_operations ucm_fops = {
 	.owner 	 = THIS_MODULE,
 	.open 	 = ib_ucm_open,
 	.release = ib_ucm_close,
@@ -1271,55 +1315,142 @@
 	.poll    = ib_ucm_poll,
 };
 
+static struct class ucm_class = {
+	.name    = "infiniband_cm",
+	.release = ib_ucm_release_class_dev
+};
 
-static struct class *ib_ucm_class;
-static struct cdev	  ib_ucm_cdev;
+static ssize_t show_dev(struct class_device *class_dev, char *buf)
+{
+	struct ib_ucm_device *dev;
+	
+	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+	return print_dev_t(buf, dev->dev.dev);
+}
+static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
+
+static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
+{
+	struct ib_ucm_device *dev;
+	
+	dev = container_of(class_dev, struct ib_ucm_device, class_dev);
+	return sprintf(buf, "%s\n", dev->ib_dev->name);
+}
+static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+
+static void ib_ucm_add_one(struct ib_device *device)
+{
+	struct ib_ucm_device *ucm_dev;
+
+	if (!device->alloc_ucontext)
+		return;
+
+	ucm_dev = kmalloc(sizeof *ucm_dev, GFP_KERNEL);
+	if (!ucm_dev)
+		return;
+
+	memset(ucm_dev, 0, sizeof *ucm_dev);
+	ucm_dev->ib_dev = device;
+
+	ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+	if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
+		goto err;
+
+	set_bit(ucm_dev->devnum, dev_map);
+
+	cdev_init(&ucm_dev->dev, &ucm_fops);
+	ucm_dev->dev.owner = THIS_MODULE;
+	kobject_set_name(&ucm_dev->dev.kobj, "ucm%d", ucm_dev->devnum);
+	if (cdev_add(&ucm_dev->dev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+		goto err;
+
+	ucm_dev->class_dev.class = &ucm_class;
+	ucm_dev->class_dev.dev = device->dma_device;
+	snprintf(ucm_dev->class_dev.class_id, BUS_ID_SIZE, "ucm%d",
+		 ucm_dev->devnum);
+	if (class_device_register(&ucm_dev->class_dev))
+		goto err_cdev;
+
+	if (class_device_create_file(&ucm_dev->class_dev,
+				     &class_device_attr_dev))
+		goto err_class;
+	if (class_device_create_file(&ucm_dev->class_dev,
+				     &class_device_attr_ibdev))
+		goto err_class;
+
+	ib_set_client_data(device, &ucm_client, ucm_dev);
+	return;
+
+err_class:
+	class_device_unregister(&ucm_dev->class_dev);
+err_cdev:
+	cdev_del(&ucm_dev->dev);
+	clear_bit(ucm_dev->devnum, dev_map);
+err:
+	kfree(ucm_dev);
+	return;
+}
+
+static void ib_ucm_remove_one(struct ib_device *device)
+{
+	struct ib_ucm_device *ucm_dev = ib_get_client_data(device, &ucm_client);
+
+	if (!ucm_dev)
+		return;
+
+	class_device_unregister(&ucm_dev->class_dev);
+}
+
+static ssize_t show_abi_version(struct class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", IB_USER_CM_ABI_VERSION);
+}
+static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
 
 static int __init ib_ucm_init(void)
 {
-	int result;
+	int ret;
 
-	result = register_chrdev_region(IB_UCM_DEV, 1, "infiniband_cm");
-	if (result) {
-		ucm_dbg("Error <%d> registering dev\n", result);
-		goto err_chr;
+	ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
+				     "infiniband_cm");
+	if (ret) {
+		printk(KERN_ERR "ucm: couldn't register device number\n");
+		goto err;
 	}
 
-	cdev_init(&ib_ucm_cdev, &ib_ucm_fops);
-
-	result = cdev_add(&ib_ucm_cdev, IB_UCM_DEV, 1);
-	if (result) {
- 		ucm_dbg("Error <%d> adding cdev\n", result);
-		goto err_cdev;
+	ret = class_register(&ucm_class);
+	if (ret) {
+		printk(KERN_ERR "ucm: couldn't create class infiniband_cm\n");
+		goto err_chrdev;
 	}
 
-	ib_ucm_class = class_create(THIS_MODULE, "infiniband_cm");
-	if (IS_ERR(ib_ucm_class)) {
-		result = PTR_ERR(ib_ucm_class);
- 		ucm_dbg("Error <%d> creating class\n", result);
+	ret = class_create_file(&ucm_class, &class_attr_abi_version);
+	if (ret) {
+		printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
 		goto err_class;
 	}
 
-	class_device_create(ib_ucm_class, NULL, IB_UCM_DEV, NULL, "ucm");
-
-	idr_init(&ctx_id_table);
-	init_MUTEX(&ctx_id_mutex);
-
+	ret = ib_register_client(&ucm_client);
+	if (ret) {
+		printk(KERN_ERR "ucm: couldn't register client\n");
+		goto err_class;
+	}
 	return 0;
+
 err_class:
-	cdev_del(&ib_ucm_cdev);
-err_cdev:
-	unregister_chrdev_region(IB_UCM_DEV, 1);
-err_chr:
-	return result;
+	class_unregister(&ucm_class);
+err_chrdev:
+	unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+err:
+	return ret;
 }
 
 static void __exit ib_ucm_cleanup(void)
 {
-	class_device_destroy(ib_ucm_class, IB_UCM_DEV);
-	class_destroy(ib_ucm_class);
-	cdev_del(&ib_ucm_cdev);
-	unregister_chrdev_region(IB_UCM_DEV, 1);
+	ib_unregister_client(&ucm_client);
+	class_unregister(&ucm_class);
+	unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+	idr_destroy(&ctx_id_table);
 }
 
 module_init(ib_ucm_init);
diff --git a/drivers/infiniband/core/ucm.h b/drivers/infiniband/core/ucm.h
deleted file mode 100644
index f46f37b..0000000
--- a/drivers/infiniband/core/ucm.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2005 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Intel Corporation.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: ucm.h 2208 2005-04-22 23:24:31Z libor $
- */
-
-#ifndef UCM_H
-#define UCM_H
-
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
-#include <linux/idr.h>
-
-#include <rdma/ib_cm.h>
-#include <rdma/ib_user_cm.h>
-
-struct ib_ucm_file {
-	struct semaphore mutex;
-	struct file *filp;
-
-	struct list_head  ctxs;   /* list of active connections */
-	struct list_head  events; /* list of pending events */
-	wait_queue_head_t poll_wait;
-};
-
-struct ib_ucm_context {
-	int                 id;
-	wait_queue_head_t   wait;
-	atomic_t            ref;
-	int		    events_reported;
-
-	struct ib_ucm_file *file;
-	struct ib_cm_id    *cm_id;
-	__u64		   uid;
-
-	struct list_head    events;    /* list of pending events. */
-	struct list_head    file_list; /* member in file ctx list */
-};
-
-struct ib_ucm_event {
-	struct ib_ucm_context *ctx;
-	struct list_head file_list; /* member in file event list */
-	struct list_head ctx_list;  /* member in ctx event list */
-
-	struct ib_cm_id *cm_id;
-	struct ib_ucm_event_resp resp;
-	void *data;
-	void *info;
-	int data_len;
-	int info_len;
-};
-
-#endif /* UCM_H */
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index a64d6b4..9e49816 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -64,18 +64,39 @@
 	IB_UMAD_MINOR_BASE = 0
 };
 
-struct ib_umad_port {
-	int                    devnum;
-	struct cdev            dev;
-	struct class_device    class_dev;
+/*
+ * Our lifetime rules for these structs are the following: each time a
+ * device special file is opened, we look up the corresponding struct
+ * ib_umad_port by minor in the umad_port[] table while holding the
+ * port_lock.  If this lookup succeeds, we take a reference on the
+ * ib_umad_port's struct ib_umad_device while still holding the
+ * port_lock; if the lookup fails, we fail the open().  We drop these
+ * references in the corresponding close().
+ *
+ * In addition to references coming from open character devices, there
+ * is one more reference to each ib_umad_device representing the
+ * module's reference taken when allocating the ib_umad_device in
+ * ib_umad_add_one().
+ *
+ * When destroying an ib_umad_device, we clear all of its
+ * ib_umad_ports from umad_port[] while holding port_lock before
+ * dropping the module's reference to the ib_umad_device.  This is
+ * always safe because any open() calls will either succeed and obtain
+ * a reference before we clear the umad_port[] entries, or fail after
+ * we clear the umad_port[] entries.
+ */
 
-	int                    sm_devnum;
-	struct cdev            sm_dev;
-	struct class_device    sm_class_dev;
+struct ib_umad_port {
+	struct cdev           *dev;
+	struct class_device   *class_dev;
+
+	struct cdev           *sm_dev;
+	struct class_device   *sm_class_dev;
 	struct semaphore       sm_sem;
 
 	struct ib_device      *ib_dev;
 	struct ib_umad_device *umad_dev;
+	int                    dev_num;
 	u8                     port_num;
 };
 
@@ -96,21 +117,31 @@
 };
 
 struct ib_umad_packet {
-	struct ib_ah      *ah;
 	struct ib_mad_send_buf *msg;
 	struct list_head   list;
 	int		   length;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
 	struct ib_user_mad mad;
 };
 
+static struct class *umad_class;
+
 static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
-static spinlock_t map_lock;
+
+static DEFINE_SPINLOCK(port_lock);
+static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
 static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS * 2);
 
 static void ib_umad_add_one(struct ib_device *device);
 static void ib_umad_remove_one(struct ib_device *device);
 
+static void ib_umad_release_dev(struct kref *ref)
+{
+	struct ib_umad_device *dev =
+		container_of(ref, struct ib_umad_device, ref);
+
+	kfree(dev);
+}
+
 static int queue_packet(struct ib_umad_file *file,
 			struct ib_mad_agent *agent,
 			struct ib_umad_packet *packet)
@@ -139,22 +170,19 @@
 			 struct ib_mad_send_wc *send_wc)
 {
 	struct ib_umad_file *file = agent->context;
-	struct ib_umad_packet *timeout, *packet =
-		(void *) (unsigned long) send_wc->wr_id;
+	struct ib_umad_packet *timeout;
+	struct ib_umad_packet *packet = send_wc->send_buf->context[0];
 
-	ib_destroy_ah(packet->msg->send_wr.wr.ud.ah);
+	ib_destroy_ah(packet->msg->ah);
 	ib_free_send_mad(packet->msg);
 
 	if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
-		timeout = kmalloc(sizeof *timeout + sizeof (struct ib_mad_hdr),
-				  GFP_KERNEL);
+		timeout = kzalloc(sizeof *timeout + IB_MGMT_MAD_HDR, GFP_KERNEL);
 		if (!timeout)
 			goto out;
 
-		memset(timeout, 0, sizeof *timeout + sizeof (struct ib_mad_hdr));
-
-		timeout->length = sizeof (struct ib_mad_hdr);
-		timeout->mad.hdr.id = packet->mad.hdr.id;
+		timeout->length 	= IB_MGMT_MAD_HDR;
+		timeout->mad.hdr.id 	= packet->mad.hdr.id;
 		timeout->mad.hdr.status = ETIMEDOUT;
 		memcpy(timeout->mad.data, packet->mad.data,
 		       sizeof (struct ib_mad_hdr));
@@ -177,11 +205,10 @@
 		goto out;
 
 	length = mad_recv_wc->mad_len;
-	packet = kmalloc(sizeof *packet + length, GFP_KERNEL);
+	packet = kzalloc(sizeof *packet + length, GFP_KERNEL);
 	if (!packet)
 		goto out;
 
-	memset(packet, 0, sizeof *packet + length);
 	packet->length = length;
 
 	ib_coalesce_recv_mad(mad_recv_wc, packet->mad.data);
@@ -247,7 +274,7 @@
 		else
 			ret = -ENOSPC;
 	} else if (copy_to_user(buf, &packet->mad,
-			      packet->length + sizeof (struct ib_user_mad)))
+				packet->length + sizeof (struct ib_user_mad)))
 		ret = -EFAULT;
 	else
 		ret = packet->length + sizeof (struct ib_user_mad);
@@ -268,26 +295,23 @@
 	struct ib_umad_packet *packet;
 	struct ib_mad_agent *agent;
 	struct ib_ah_attr ah_attr;
-	struct ib_send_wr *bad_wr;
+	struct ib_ah *ah;
 	struct ib_rmpp_mad *rmpp_mad;
 	u8 method;
 	__be64 *tid;
-	int ret, length, hdr_len, data_len, rmpp_hdr_size;
+	int ret, length, hdr_len, copy_offset;
 	int rmpp_active = 0;
 
 	if (count < sizeof (struct ib_user_mad))
 		return -EINVAL;
 
 	length = count - sizeof (struct ib_user_mad);
-	packet = kmalloc(sizeof *packet + sizeof(struct ib_mad_hdr) +
-			 sizeof(struct ib_rmpp_hdr), GFP_KERNEL);
+	packet = kmalloc(sizeof *packet + IB_MGMT_RMPP_HDR, GFP_KERNEL);
 	if (!packet)
 		return -ENOMEM;
 
 	if (copy_from_user(&packet->mad, buf,
-			    sizeof (struct ib_user_mad) +
-			    sizeof(struct ib_mad_hdr) +
-			    sizeof(struct ib_rmpp_hdr))) {
+			    sizeof (struct ib_user_mad) + IB_MGMT_RMPP_HDR)) {
 		ret = -EFAULT;
 		goto err;
 	}
@@ -298,8 +322,6 @@
 		goto err;
 	}
 
-	packet->length = length;
-
 	down_read(&file->agent_mutex);
 
 	agent = file->agent[packet->mad.hdr.id];
@@ -321,9 +343,9 @@
 		ah_attr.grh.traffic_class  = packet->mad.hdr.traffic_class;
 	}
 
-	packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
-	if (IS_ERR(packet->ah)) {
-		ret = PTR_ERR(packet->ah);
+	ah = ib_create_ah(agent->qp->pd, &ah_attr);
+	if (IS_ERR(ah)) {
+		ret = PTR_ERR(ah);
 		goto err_up;
 	}
 
@@ -337,64 +359,44 @@
 
 		/* Validate that the management class can support RMPP */
 		if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
-			hdr_len = offsetof(struct ib_sa_mad, data);
-			data_len = length - hdr_len;
+			hdr_len = IB_MGMT_SA_HDR;
 		} else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
 			    (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) {
-				hdr_len = offsetof(struct ib_vendor_mad, data);
-				data_len = length - hdr_len;
+				hdr_len = IB_MGMT_VENDOR_HDR;
 		} else {
 			ret = -EINVAL;
 			goto err_ah;
 		}
 		rmpp_active = 1;
+		copy_offset = IB_MGMT_RMPP_HDR;
 	} else {
-		if (length > sizeof(struct ib_mad)) {
-			ret = -EINVAL;
-			goto err_ah;
-		}
-		hdr_len = offsetof(struct ib_mad, data);
-		data_len = length - hdr_len;
+		hdr_len = IB_MGMT_MAD_HDR;
+		copy_offset = IB_MGMT_MAD_HDR;
 	}
 
 	packet->msg = ib_create_send_mad(agent,
 					 be32_to_cpu(packet->mad.hdr.qpn),
-					 0, packet->ah, rmpp_active,
-					 hdr_len, data_len,
+					 0, rmpp_active,
+					 hdr_len, length - hdr_len,
 					 GFP_KERNEL);
 	if (IS_ERR(packet->msg)) {
 		ret = PTR_ERR(packet->msg);
 		goto err_ah;
 	}
 
-	packet->msg->send_wr.wr.ud.timeout_ms  = packet->mad.hdr.timeout_ms;
-	packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries;
+	packet->msg->ah 	= ah;
+	packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
+	packet->msg->retries 	= packet->mad.hdr.retries;
+	packet->msg->context[0] = packet;
 
-	/* Override send WR WRID initialized in ib_create_send_mad */
-	packet->msg->send_wr.wr_id = (unsigned long) packet;
-
-	if (!rmpp_active) {
-		/* Copy message from user into send buffer */
-		if (copy_from_user(packet->msg->mad,
-				   buf + sizeof(struct ib_user_mad), length)) {
-			ret = -EFAULT;
-			goto err_msg;
-		}
-	} else {
-		rmpp_hdr_size = sizeof(struct ib_mad_hdr) +
-				sizeof(struct ib_rmpp_hdr);
-
-		/* Only copy MAD headers (RMPP header in place) */
-		memcpy(packet->msg->mad, packet->mad.data,
-		       sizeof(struct ib_mad_hdr));
-
-		/* Now, copy rest of message from user into send buffer */
-		if (copy_from_user(((struct ib_rmpp_mad *) packet->msg->mad)->data,
-				   buf + sizeof (struct ib_user_mad) + rmpp_hdr_size,
-				   length - rmpp_hdr_size)) {
-			ret = -EFAULT;
-			goto err_msg;
-		}
+	/* Copy MAD headers (RMPP header in place) */
+	memcpy(packet->msg->mad, packet->mad.data, IB_MGMT_MAD_HDR);
+	/* Now, copy rest of message from user into send buffer */
+	if (copy_from_user(packet->msg->mad + copy_offset,
+			   buf + sizeof (struct ib_user_mad) + copy_offset,
+			   length - copy_offset)) {
+		ret = -EFAULT;
+		goto err_msg;
 	}
 
 	/*
@@ -403,29 +405,29 @@
 	 * transaction ID matches the agent being used to send the
 	 * MAD.
 	 */
-	method = packet->msg->mad->mad_hdr.method;
+	method = ((struct ib_mad_hdr *) packet->msg->mad)->method;
 
 	if (!(method & IB_MGMT_METHOD_RESP)       &&
 	    method != IB_MGMT_METHOD_TRAP_REPRESS &&
 	    method != IB_MGMT_METHOD_SEND) {
-		tid = &packet->msg->mad->mad_hdr.tid;
+		tid = &((struct ib_mad_hdr *) packet->msg->mad)->tid;
 		*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
 				   (be64_to_cpup(tid) & 0xffffffff));
 	}
 
-	ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr);
+	ret = ib_post_send_mad(packet->msg, NULL);
 	if (ret)
 		goto err_msg;
 
 	up_read(&file->agent_mutex);
 
-	return sizeof (struct ib_user_mad_hdr) + packet->length;
+	return count;
 
 err_msg:
 	ib_free_send_mad(packet->msg);
 
 err_ah:
-	ib_destroy_ah(packet->ah);
+	ib_destroy_ah(ah);
 
 err_up:
 	up_read(&file->agent_mutex);
@@ -565,15 +567,23 @@
 
 static int ib_umad_open(struct inode *inode, struct file *filp)
 {
-	struct ib_umad_port *port =
-		container_of(inode->i_cdev, struct ib_umad_port, dev);
+	struct ib_umad_port *port;
 	struct ib_umad_file *file;
 
-	file = kmalloc(sizeof *file, GFP_KERNEL);
-	if (!file)
-		return -ENOMEM;
+	spin_lock(&port_lock);
+	port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
+	if (port)
+		kref_get(&port->umad_dev->ref);
+	spin_unlock(&port_lock);
 
-	memset(file, 0, sizeof *file);
+	if (!port)
+		return -ENXIO;
+
+	file = kzalloc(sizeof *file, GFP_KERNEL);
+	if (!file) {
+		kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+		return -ENOMEM;
+	}
 
 	spin_lock_init(&file->recv_lock);
 	init_rwsem(&file->agent_mutex);
@@ -589,6 +599,7 @@
 static int ib_umad_close(struct inode *inode, struct file *filp)
 {
 	struct ib_umad_file *file = filp->private_data;
+	struct ib_umad_device *dev = file->port->umad_dev;
 	struct ib_umad_packet *packet, *tmp;
 	int i;
 
@@ -603,6 +614,8 @@
 
 	kfree(file);
 
+	kref_put(&dev->ref, ib_umad_release_dev);
+
 	return 0;
 }
 
@@ -619,30 +632,46 @@
 
 static int ib_umad_sm_open(struct inode *inode, struct file *filp)
 {
-	struct ib_umad_port *port =
-		container_of(inode->i_cdev, struct ib_umad_port, sm_dev);
+	struct ib_umad_port *port;
 	struct ib_port_modify props = {
 		.set_port_cap_mask = IB_PORT_SM
 	};
 	int ret;
 
+	spin_lock(&port_lock);
+	port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
+	if (port)
+		kref_get(&port->umad_dev->ref);
+	spin_unlock(&port_lock);
+
+	if (!port)
+		return -ENXIO;
+
 	if (filp->f_flags & O_NONBLOCK) {
-		if (down_trylock(&port->sm_sem))
-			return -EAGAIN;
+		if (down_trylock(&port->sm_sem)) {
+			ret = -EAGAIN;
+			goto fail;
+		}
 	} else {
-		if (down_interruptible(&port->sm_sem))
-			return -ERESTARTSYS;
+		if (down_interruptible(&port->sm_sem)) {
+			ret = -ERESTARTSYS;
+			goto fail;
+		}
 	}
 
 	ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
 	if (ret) {
 		up(&port->sm_sem);
-		return ret;
+		goto fail;
 	}
 
 	filp->private_data = port;
 
 	return 0;
+
+fail:
+	kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+	return ret;
 }
 
 static int ib_umad_sm_close(struct inode *inode, struct file *filp)
@@ -656,6 +685,8 @@
 	ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props);
 	up(&port->sm_sem);
 
+	kref_put(&port->umad_dev->ref, ib_umad_release_dev);
+
 	return ret;
 }
 
@@ -671,21 +702,13 @@
 	.remove = ib_umad_remove_one
 };
 
-static ssize_t show_dev(struct class_device *class_dev, char *buf)
-{
-	struct ib_umad_port *port = class_get_devdata(class_dev);
-
-	if (class_dev == &port->class_dev)
-		return print_dev_t(buf, port->dev.dev);
-	else
-		return print_dev_t(buf, port->sm_dev.dev);
-}
-static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL);
-
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
 	struct ib_umad_port *port = class_get_devdata(class_dev);
 
+	if (!port)
+		return -ENODEV;
+
 	return sprintf(buf, "%s\n", port->ib_dev->name);
 }
 static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -694,38 +717,13 @@
 {
 	struct ib_umad_port *port = class_get_devdata(class_dev);
 
+	if (!port)
+		return -ENODEV;
+
 	return sprintf(buf, "%d\n", port->port_num);
 }
 static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
 
-static void ib_umad_release_dev(struct kref *ref)
-{
-	struct ib_umad_device *dev =
-		container_of(ref, struct ib_umad_device, ref);
-
-	kfree(dev);
-}
-
-static void ib_umad_release_port(struct class_device *class_dev)
-{
-	struct ib_umad_port *port = class_get_devdata(class_dev);
-
-	if (class_dev == &port->class_dev) {
-		cdev_del(&port->dev);
-		clear_bit(port->devnum, dev_map);
-	} else {
-		cdev_del(&port->sm_dev);
-		clear_bit(port->sm_devnum, dev_map);
-	}
-
-	kref_put(&port->umad_dev->ref, ib_umad_release_dev);
-}
-
-static struct class umad_class = {
-	.name    = "infiniband_mad",
-	.release = ib_umad_release_port
-};
-
 static ssize_t show_abi_version(struct class *class, char *buf)
 {
 	return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
@@ -735,91 +733,102 @@
 static int ib_umad_init_port(struct ib_device *device, int port_num,
 			     struct ib_umad_port *port)
 {
-	spin_lock(&map_lock);
-	port->devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
-	if (port->devnum >= IB_UMAD_MAX_PORTS) {
-		spin_unlock(&map_lock);
+	spin_lock(&port_lock);
+	port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+	if (port->dev_num >= IB_UMAD_MAX_PORTS) {
+		spin_unlock(&port_lock);
 		return -1;
 	}
-	port->sm_devnum = find_next_zero_bit(dev_map, IB_UMAD_MAX_PORTS * 2, IB_UMAD_MAX_PORTS);
-	if (port->sm_devnum >= IB_UMAD_MAX_PORTS * 2) {
-		spin_unlock(&map_lock);
-		return -1;
-	}
-	set_bit(port->devnum, dev_map);
-	set_bit(port->sm_devnum, dev_map);
-	spin_unlock(&map_lock);
+	set_bit(port->dev_num, dev_map);
+	spin_unlock(&port_lock);
 
 	port->ib_dev   = device;
 	port->port_num = port_num;
 	init_MUTEX(&port->sm_sem);
 
-	cdev_init(&port->dev, &umad_fops);
-	port->dev.owner = THIS_MODULE;
-	kobject_set_name(&port->dev.kobj, "umad%d", port->devnum);
-	if (cdev_add(&port->dev, base_dev + port->devnum, 1))
+	port->dev = cdev_alloc();
+	if (!port->dev)
 		return -1;
-
-	port->class_dev.class = &umad_class;
-	port->class_dev.dev   = device->dma_device;
-
-	snprintf(port->class_dev.class_id, BUS_ID_SIZE, "umad%d", port->devnum);
-
-	if (class_device_register(&port->class_dev))
+	port->dev->owner = THIS_MODULE;
+	port->dev->ops   = &umad_fops;
+	kobject_set_name(&port->dev->kobj, "umad%d", port->dev_num);
+	if (cdev_add(port->dev, base_dev + port->dev_num, 1))
 		goto err_cdev;
 
-	class_set_devdata(&port->class_dev, port);
-	kref_get(&port->umad_dev->ref);
+	port->class_dev = class_device_create(umad_class, port->dev->dev,
+					      device->dma_device,
+					      "umad%d", port->dev_num);
+	if (IS_ERR(port->class_dev))
+		goto err_cdev;
 
-	if (class_device_create_file(&port->class_dev, &class_device_attr_dev))
+	if (class_device_create_file(port->class_dev, &class_device_attr_ibdev))
 		goto err_class;
-	if (class_device_create_file(&port->class_dev, &class_device_attr_ibdev))
-		goto err_class;
-	if (class_device_create_file(&port->class_dev, &class_device_attr_port))
+	if (class_device_create_file(port->class_dev, &class_device_attr_port))
 		goto err_class;
 
-	cdev_init(&port->sm_dev, &umad_sm_fops);
-	port->sm_dev.owner = THIS_MODULE;
-	kobject_set_name(&port->dev.kobj, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
-	if (cdev_add(&port->sm_dev, base_dev + port->sm_devnum, 1))
-		return -1;
-
-	port->sm_class_dev.class = &umad_class;
-	port->sm_class_dev.dev   = device->dma_device;
-
-	snprintf(port->sm_class_dev.class_id, BUS_ID_SIZE, "issm%d", port->sm_devnum - IB_UMAD_MAX_PORTS);
-
-	if (class_device_register(&port->sm_class_dev))
+	port->sm_dev = cdev_alloc();
+	if (!port->sm_dev)
+		goto err_class;
+	port->sm_dev->owner = THIS_MODULE;
+	port->sm_dev->ops   = &umad_sm_fops;
+	kobject_set_name(&port->dev->kobj, "issm%d", port->dev_num);
+	if (cdev_add(port->sm_dev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
 		goto err_sm_cdev;
 
-	class_set_devdata(&port->sm_class_dev, port);
-	kref_get(&port->umad_dev->ref);
+	port->sm_class_dev = class_device_create(umad_class, port->sm_dev->dev,
+						 device->dma_device,
+						 "issm%d", port->dev_num);
+	if (IS_ERR(port->sm_class_dev))
+		goto err_sm_cdev;
 
-	if (class_device_create_file(&port->sm_class_dev, &class_device_attr_dev))
+	class_set_devdata(port->class_dev,    port);
+	class_set_devdata(port->sm_class_dev, port);
+
+	if (class_device_create_file(port->sm_class_dev, &class_device_attr_ibdev))
 		goto err_sm_class;
-	if (class_device_create_file(&port->sm_class_dev, &class_device_attr_ibdev))
+	if (class_device_create_file(port->sm_class_dev, &class_device_attr_port))
 		goto err_sm_class;
-	if (class_device_create_file(&port->sm_class_dev, &class_device_attr_port))
-		goto err_sm_class;
+
+	spin_lock(&port_lock);
+	umad_port[port->dev_num] = port;
+	spin_unlock(&port_lock);
 
 	return 0;
 
 err_sm_class:
-	class_device_unregister(&port->sm_class_dev);
+	class_device_destroy(umad_class, port->sm_dev->dev);
 
 err_sm_cdev:
-	cdev_del(&port->sm_dev);
+	cdev_del(port->sm_dev);
 
 err_class:
-	class_device_unregister(&port->class_dev);
+	class_device_destroy(umad_class, port->dev->dev);
 
 err_cdev:
-	cdev_del(&port->dev);
-	clear_bit(port->devnum, dev_map);
+	cdev_del(port->dev);
+	clear_bit(port->dev_num, dev_map);
 
 	return -1;
 }
 
+static void ib_umad_kill_port(struct ib_umad_port *port)
+{
+	class_set_devdata(port->class_dev,    NULL);
+	class_set_devdata(port->sm_class_dev, NULL);
+
+	class_device_destroy(umad_class, port->dev->dev);
+	class_device_destroy(umad_class, port->sm_dev->dev);
+
+	cdev_del(port->dev);
+	cdev_del(port->sm_dev);
+
+	spin_lock(&port_lock);
+	umad_port[port->dev_num] = NULL;
+	spin_unlock(&port_lock);
+
+	clear_bit(port->dev_num, dev_map);
+}
+
 static void ib_umad_add_one(struct ib_device *device)
 {
 	struct ib_umad_device *umad_dev;
@@ -832,15 +841,12 @@
 		e = device->phys_port_cnt;
 	}
 
-	umad_dev = kmalloc(sizeof *umad_dev +
+	umad_dev = kzalloc(sizeof *umad_dev +
 			   (e - s + 1) * sizeof (struct ib_umad_port),
 			   GFP_KERNEL);
 	if (!umad_dev)
 		return;
 
-	memset(umad_dev, 0, sizeof *umad_dev +
-	       (e - s + 1) * sizeof (struct ib_umad_port));
-
 	kref_init(&umad_dev->ref);
 
 	umad_dev->start_port = s;
@@ -858,10 +864,8 @@
 	return;
 
 err:
-	while (--i >= s) {
-		class_device_unregister(&umad_dev->port[i - s].class_dev);
-		class_device_unregister(&umad_dev->port[i - s].sm_class_dev);
-	}
+	while (--i >= s)
+		ib_umad_kill_port(&umad_dev->port[i]);
 
 	kref_put(&umad_dev->ref, ib_umad_release_dev);
 }
@@ -874,10 +878,8 @@
 	if (!umad_dev)
 		return;
 
-	for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) {
-		class_device_unregister(&umad_dev->port[i].class_dev);
-		class_device_unregister(&umad_dev->port[i].sm_class_dev);
-	}
+	for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)
+		ib_umad_kill_port(&umad_dev->port[i]);
 
 	kref_put(&umad_dev->ref, ib_umad_release_dev);
 }
@@ -886,8 +888,6 @@
 {
 	int ret;
 
-	spin_lock_init(&map_lock);
-
 	ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
 				     "infiniband_mad");
 	if (ret) {
@@ -895,13 +895,14 @@
 		goto out;
 	}
 
-	ret = class_register(&umad_class);
-	if (ret) {
+	umad_class = class_create(THIS_MODULE, "infiniband_mad");
+	if (IS_ERR(umad_class)) {
+		ret = PTR_ERR(umad_class);
 		printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
 		goto out_chrdev;
 	}
 
-	ret = class_create_file(&umad_class, &class_attr_abi_version);
+	ret = class_create_file(umad_class, &class_attr_abi_version);
 	if (ret) {
 		printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
 		goto out_class;
@@ -916,7 +917,7 @@
 	return 0;
 
 out_class:
-	class_unregister(&umad_class);
+	class_destroy(umad_class);
 
 out_chrdev:
 	unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
@@ -928,7 +929,7 @@
 static void __exit ib_umad_cleanup(void)
 {
 	ib_unregister_client(&umad_client);
-	class_unregister(&umad_class);
+	class_destroy(umad_class);
 	unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
 }
 
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index cc12434..031cdf3 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -3,6 +3,7 @@
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -38,29 +39,47 @@
 #ifndef UVERBS_H
 #define UVERBS_H
 
-/* Include device.h and fs.h until cdev.h is self-sufficient */
-#include <linux/fs.h>
-#include <linux/device.h>
-#include <linux/cdev.h>
 #include <linux/kref.h>
 #include <linux/idr.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
 
+/*
+ * Our lifetime rules for these structs are the following:
+ *
+ * struct ib_uverbs_device: One reference is held by the module and
+ * released in ib_uverbs_remove_one().  Another reference is taken by
+ * ib_uverbs_open() each time the character special file is opened,
+ * and released in ib_uverbs_release_file() when the file is released.
+ *
+ * struct ib_uverbs_file: One reference is held by the VFS and
+ * released when the file is closed.  Another reference is taken when
+ * an asynchronous event queue file is created and released when the
+ * event file is closed.
+ *
+ * struct ib_uverbs_event_file: One reference is held by the VFS and
+ * released when the file is closed.  For asynchronous event files,
+ * another reference is held by the corresponding main context file
+ * and released when that file is closed.  For completion event files,
+ * a reference is taken when a CQ is created that uses the file, and
+ * released when the CQ is destroyed.
+ */
+
 struct ib_uverbs_device {
+	struct kref				ref;
 	int					devnum;
-	struct cdev				dev;
-	struct class_device			class_dev;
+	struct cdev			       *dev;
+	struct class_device		       *class_dev;
 	struct ib_device		       *ib_dev;
-	int					num_comp;
+	int					num_comp_vectors;
 };
 
 struct ib_uverbs_event_file {
 	struct kref				ref;
+	struct file			       *file;
 	struct ib_uverbs_file		       *uverbs_file;
 	spinlock_t				lock;
-	int					fd;
 	int					is_async;
 	wait_queue_head_t			poll_wait;
 	struct fasync_struct		       *async_queue;
@@ -73,8 +92,7 @@
 	struct ib_uverbs_device		       *device;
 	struct ib_ucontext		       *ucontext;
 	struct ib_event_handler			event_handler;
-	struct ib_uverbs_event_file	        async_file;
-	struct ib_uverbs_event_file	        comp_file[1];
+	struct ib_uverbs_event_file	       *async_file;
 };
 
 struct ib_uverbs_event {
@@ -110,10 +128,23 @@
 extern struct idr ib_uverbs_qp_idr;
 extern struct idr ib_uverbs_srq_idr;
 
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+					int is_async, int *fd);
+void ib_uverbs_release_event_file(struct kref *ref);
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd);
+
+void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
+			   struct ib_uverbs_event_file *ev_file,
+			   struct ib_ucq_object *uobj);
+void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
+			      struct ib_uevent_object *uobj);
+
 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context);
 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+			     struct ib_event *event);
 
 int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
 		void *addr, size_t size, int write);
@@ -125,21 +156,26 @@
 				 const char __user *buf, int in_len,	\
 				 int out_len)
 
-IB_UVERBS_DECLARE_CMD(query_params);
 IB_UVERBS_DECLARE_CMD(get_context);
 IB_UVERBS_DECLARE_CMD(query_device);
 IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(query_gid);
-IB_UVERBS_DECLARE_CMD(query_pkey);
 IB_UVERBS_DECLARE_CMD(alloc_pd);
 IB_UVERBS_DECLARE_CMD(dealloc_pd);
 IB_UVERBS_DECLARE_CMD(reg_mr);
 IB_UVERBS_DECLARE_CMD(dereg_mr);
+IB_UVERBS_DECLARE_CMD(create_comp_channel);
 IB_UVERBS_DECLARE_CMD(create_cq);
+IB_UVERBS_DECLARE_CMD(poll_cq);
+IB_UVERBS_DECLARE_CMD(req_notify_cq);
 IB_UVERBS_DECLARE_CMD(destroy_cq);
 IB_UVERBS_DECLARE_CMD(create_qp);
 IB_UVERBS_DECLARE_CMD(modify_qp);
 IB_UVERBS_DECLARE_CMD(destroy_qp);
+IB_UVERBS_DECLARE_CMD(post_send);
+IB_UVERBS_DECLARE_CMD(post_recv);
+IB_UVERBS_DECLARE_CMD(post_srq_recv);
+IB_UVERBS_DECLARE_CMD(create_ah);
+IB_UVERBS_DECLARE_CMD(destroy_ah);
 IB_UVERBS_DECLARE_CMD(attach_mcast);
 IB_UVERBS_DECLARE_CMD(detach_mcast);
 IB_UVERBS_DECLARE_CMD(create_srq);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5624451..8c89abc 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -33,6 +34,9 @@
  * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $
  */
 
+#include <linux/file.h>
+#include <linux/fs.h>
+
 #include <asm/uaccess.h>
 
 #include "uverbs.h"
@@ -45,29 +49,6 @@
 		(udata)->outlen = (olen);				\
 	} while (0)
 
-ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file,
-			       const char __user *buf,
-			       int in_len, int out_len)
-{
-	struct ib_uverbs_query_params      cmd;
-	struct ib_uverbs_query_params_resp resp;
-
-	if (out_len < sizeof resp)
-		return -ENOSPC;
-
-	if (copy_from_user(&cmd, buf, sizeof cmd))
-		return -EFAULT;
-
-	memset(&resp, 0, sizeof resp);
-
-	resp.num_cq_events = file->device->num_comp;
-
-	if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp))
-	    return -EFAULT;
-
-	return in_len;
-}
-
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 			      const char __user *buf,
 			      int in_len, int out_len)
@@ -77,7 +58,7 @@
 	struct ib_udata                   udata;
 	struct ib_device                 *ibdev = file->device->ib_dev;
 	struct ib_ucontext		 *ucontext;
-	int i;
+	struct file			 *filp;
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -110,26 +91,42 @@
 	INIT_LIST_HEAD(&ucontext->srq_list);
 	INIT_LIST_HEAD(&ucontext->ah_list);
 
-	resp.async_fd = file->async_file.fd;
-	for (i = 0; i < file->device->num_comp; ++i)
-		if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab +
-				 i * sizeof (__u32),
-				 &file->comp_file[i].fd, sizeof (__u32))) {
-			ret = -EFAULT;
-			goto err_free;
-		}
+	resp.num_comp_vectors = file->device->num_comp_vectors;
+
+	filp = ib_uverbs_alloc_event_file(file, 1, &resp.async_fd);
+	if (IS_ERR(filp)) {
+		ret = PTR_ERR(filp);
+		goto err_free;
+	}
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_free;
+		goto err_file;
 	}
 
+	file->async_file = filp->private_data;
+
+	INIT_IB_EVENT_HANDLER(&file->event_handler, file->device->ib_dev,
+			      ib_uverbs_event_handler);
+	ret = ib_register_event_handler(&file->event_handler);
+	if (ret)
+		goto err_file;
+
+	kref_get(&file->async_file->ref);
+	kref_get(&file->ref);
 	file->ucontext = ucontext;
+
+	fd_install(resp.async_fd, filp);
+
 	up(&file->mutex);
 
 	return in_len;
 
+err_file:
+	put_unused_fd(resp.async_fd);
+	fput(filp);
+
 err_free:
 	ibdev->dealloc_ucontext(ucontext);
 
@@ -255,62 +252,6 @@
 	return in_len;
 }
 
-ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file,
-			    const char __user *buf,
-			    int in_len, int out_len)
-{
-	struct ib_uverbs_query_gid      cmd;
-	struct ib_uverbs_query_gid_resp resp;
-	int                             ret;
-
-	if (out_len < sizeof resp)
-		return -ENOSPC;
-
-	if (copy_from_user(&cmd, buf, sizeof cmd))
-		return -EFAULT;
-
-	memset(&resp, 0, sizeof resp);
-
-	ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index,
-			   (union ib_gid *) resp.gid);
-	if (ret)
-		return ret;
-
-	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
-		return -EFAULT;
-
-	return in_len;
-}
-
-ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file,
-			     const char __user *buf,
-			     int in_len, int out_len)
-{
-	struct ib_uverbs_query_pkey      cmd;
-	struct ib_uverbs_query_pkey_resp resp;
-	int                              ret;
-
-	if (out_len < sizeof resp)
-		return -ENOSPC;
-
-	if (copy_from_user(&cmd, buf, sizeof cmd))
-		return -EFAULT;
-
-	memset(&resp, 0, sizeof resp);
-
-	ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index,
-			    &resp.pkey);
-	if (ret)
-		return ret;
-
-	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp))
-		return -EFAULT;
-
-	return in_len;
-}
-
 ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 			   const char __user *buf,
 			   int in_len, int out_len)
@@ -349,24 +290,20 @@
 	pd->uobject = uobj;
 	atomic_set(&pd->usecnt, 0);
 
+	down(&ib_uverbs_idr_mutex);
+
 retry:
 	if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) {
 		ret = -ENOMEM;
-		goto err_pd;
+		goto err_up;
 	}
 
-	down(&ib_uverbs_idr_mutex);
 	ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id);
-	up(&ib_uverbs_idr_mutex);
 
 	if (ret == -EAGAIN)
 		goto retry;
 	if (ret)
-		goto err_pd;
-
-	down(&file->mutex);
-	list_add_tail(&uobj->list, &file->ucontext->pd_list);
-	up(&file->mutex);
+		goto err_up;
 
 	memset(&resp, 0, sizeof resp);
 	resp.pd_handle = uobj->id;
@@ -374,21 +311,22 @@
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_list;
+		goto err_idr;
 	}
 
+	down(&file->mutex);
+	list_add_tail(&uobj->list, &file->ucontext->pd_list);
+	up(&file->mutex);
+
+	up(&ib_uverbs_idr_mutex);
+
 	return in_len;
 
-err_list:
- 	down(&file->mutex);
-	list_del(&uobj->list);
-	up(&file->mutex);
-
-	down(&ib_uverbs_idr_mutex);
+err_idr:
 	idr_remove(&ib_uverbs_pd_idr, uobj->id);
-	up(&ib_uverbs_idr_mutex);
 
-err_pd:
+err_up:
+	up(&ib_uverbs_idr_mutex);
 	ib_dealloc_pd(pd);
 
 err:
@@ -459,6 +397,14 @@
 	if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
 		return -EINVAL;
 
+	/*
+	 * Local write permission is required if remote write or
+	 * remote atomic permission is also requested.
+	 */
+	if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+	    !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
+		return -EINVAL;
+
 	obj = kmalloc(sizeof *obj, GFP_KERNEL);
 	if (!obj)
 		return -ENOMEM;
@@ -524,24 +470,22 @@
 
 	resp.mr_handle = obj->uobject.id;
 
-	down(&file->mutex);
-	list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
-	up(&file->mutex);
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_list;
+		goto err_idr;
 	}
 
+	down(&file->mutex);
+	list_add_tail(&obj->uobject.list, &file->ucontext->mr_list);
+	up(&file->mutex);
+
 	up(&ib_uverbs_idr_mutex);
 
 	return in_len;
 
-err_list:
-	down(&file->mutex);
-	list_del(&obj->uobject.list);
-	up(&file->mutex);
+err_idr:
+	idr_remove(&ib_uverbs_mr_idr, obj->uobject.id);
 
 err_unreg:
 	ib_dereg_mr(mr);
@@ -595,6 +539,35 @@
 	return ret ? ret : in_len;
 }
 
+ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
+				      const char __user *buf, int in_len,
+				      int out_len)
+{
+	struct ib_uverbs_create_comp_channel	   cmd;
+	struct ib_uverbs_create_comp_channel_resp  resp;
+	struct file				  *filp;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	filp = ib_uverbs_alloc_event_file(file, 0, &resp.fd);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp)) {
+		put_unused_fd(resp.fd);
+		fput(filp);
+		return -EFAULT;
+	}
+
+	fd_install(resp.fd, filp);
+	return in_len;
+}
+
 ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 			    const char __user *buf, int in_len,
 			    int out_len)
@@ -603,6 +576,7 @@
 	struct ib_uverbs_create_cq_resp resp;
 	struct ib_udata                 udata;
 	struct ib_ucq_object           *uobj;
+	struct ib_uverbs_event_file    *ev_file = NULL;
 	struct ib_cq                   *cq;
 	int                             ret;
 
@@ -616,9 +590,12 @@
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	if (cmd.event_handler >= file->device->num_comp)
+	if (cmd.comp_vector >= file->device->num_comp_vectors)
 		return -EINVAL;
 
+	if (cmd.comp_channel >= 0)
+		ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
+
 	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
 	if (!uobj)
 		return -ENOMEM;
@@ -641,27 +618,23 @@
 	cq->uobject       = &uobj->uobject;
 	cq->comp_handler  = ib_uverbs_comp_handler;
 	cq->event_handler = ib_uverbs_cq_event_handler;
-	cq->cq_context    = file;
+	cq->cq_context    = ev_file;
 	atomic_set(&cq->usecnt, 0);
 
+	down(&ib_uverbs_idr_mutex);
+
 retry:
 	if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) {
 		ret = -ENOMEM;
-		goto err_cq;
+		goto err_up;
 	}
 
-	down(&ib_uverbs_idr_mutex);
 	ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id);
-	up(&ib_uverbs_idr_mutex);
 
 	if (ret == -EAGAIN)
 		goto retry;
 	if (ret)
-		goto err_cq;
-
-	down(&file->mutex);
-	list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
-	up(&file->mutex);
+		goto err_up;
 
 	memset(&resp, 0, sizeof resp);
 	resp.cq_handle = uobj->uobject.id;
@@ -670,21 +643,22 @@
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_list;
+		goto err_idr;
 	}
 
+	down(&file->mutex);
+	list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
+	up(&file->mutex);
+
+	up(&ib_uverbs_idr_mutex);
+
 	return in_len;
 
-err_list:
- 	down(&file->mutex);
-	list_del(&uobj->uobject.list);
-	up(&file->mutex);
-
-	down(&ib_uverbs_idr_mutex);
+err_idr:
 	idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
-	up(&ib_uverbs_idr_mutex);
 
-err_cq:
+err_up:
+	up(&ib_uverbs_idr_mutex);
 	ib_destroy_cq(cq);
 
 err:
@@ -692,6 +666,93 @@
 	return ret;
 }
 
+ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
+			  const char __user *buf, int in_len,
+			  int out_len)
+{
+	struct ib_uverbs_poll_cq       cmd;
+	struct ib_uverbs_poll_cq_resp *resp;
+	struct ib_cq                  *cq;
+	struct ib_wc                  *wc;
+	int                            ret = 0;
+	int                            i;
+	int                            rsize;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL);
+	if (!wc)
+		return -ENOMEM;
+
+	rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc);
+	resp = kmalloc(rsize, GFP_KERNEL);
+	if (!resp) {
+		ret = -ENOMEM;
+		goto out_wc;
+	}
+
+	down(&ib_uverbs_idr_mutex);
+	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+	if (!cq || cq->uobject->context != file->ucontext) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	resp->count = ib_poll_cq(cq, cmd.ne, wc);
+
+	for (i = 0; i < resp->count; i++) {
+		resp->wc[i].wr_id 	   = wc[i].wr_id;
+		resp->wc[i].status 	   = wc[i].status;
+		resp->wc[i].opcode 	   = wc[i].opcode;
+		resp->wc[i].vendor_err 	   = wc[i].vendor_err;
+		resp->wc[i].byte_len 	   = wc[i].byte_len;
+		resp->wc[i].imm_data 	   = wc[i].imm_data;
+		resp->wc[i].qp_num 	   = wc[i].qp_num;
+		resp->wc[i].src_qp 	   = wc[i].src_qp;
+		resp->wc[i].wc_flags 	   = wc[i].wc_flags;
+		resp->wc[i].pkey_index 	   = wc[i].pkey_index;
+		resp->wc[i].slid 	   = wc[i].slid;
+		resp->wc[i].sl 		   = wc[i].sl;
+		resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits;
+		resp->wc[i].port_num 	   = wc[i].port_num;
+	}
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize))
+		ret = -EFAULT;
+
+out:
+	up(&ib_uverbs_idr_mutex);
+	kfree(resp);
+
+out_wc:
+	kfree(wc);
+	return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
+				const char __user *buf, int in_len,
+				int out_len)
+{
+	struct ib_uverbs_req_notify_cq cmd;
+	struct ib_cq                  *cq;
+	int                            ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	down(&ib_uverbs_idr_mutex);
+	cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
+	if (cq && cq->uobject->context == file->ucontext) {
+		ib_req_notify_cq(cq, cmd.solicited_only ?
+					IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
+		ret = in_len;
+	}
+	up(&ib_uverbs_idr_mutex);
+
+	return ret;
+}
+
 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
 			     const char __user *buf, int in_len,
 			     int out_len)
@@ -700,7 +761,7 @@
 	struct ib_uverbs_destroy_cq_resp resp;
 	struct ib_cq               	*cq;
 	struct ib_ucq_object        	*uobj;
-	struct ib_uverbs_event		*evt, *tmp;
+	struct ib_uverbs_event_file	*ev_file;
 	u64				 user_handle;
 	int                        	 ret = -EINVAL;
 
@@ -716,7 +777,8 @@
 		goto out;
 
 	user_handle = cq->uobject->user_handle;
-	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
+	uobj        = container_of(cq->uobject, struct ib_ucq_object, uobject);
+	ev_file     = cq->cq_context;
 
 	ret = ib_destroy_cq(cq);
 	if (ret)
@@ -728,19 +790,7 @@
 	list_del(&uobj->uobject.list);
 	up(&file->mutex);
 
-	spin_lock_irq(&file->comp_file[0].lock);
-	list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
-		list_del(&evt->list);
-		kfree(evt);
-	}
-	spin_unlock_irq(&file->comp_file[0].lock);
-
-	spin_lock_irq(&file->async_file.lock);
-	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
-		list_del(&evt->list);
-		kfree(evt);
-	}
-	spin_unlock_irq(&file->async_file.lock);
+	ib_uverbs_release_ucq(file, ev_file, uobj);
 
 	resp.comp_events_reported  = uobj->comp_events_reported;
 	resp.async_events_reported = uobj->async_events_reported;
@@ -859,24 +909,22 @@
 
 	resp.qp_handle = uobj->uobject.id;
 
-	down(&file->mutex);
-	list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
-	up(&file->mutex);
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_list;
+		goto err_idr;
 	}
 
+	down(&file->mutex);
+	list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
+	up(&file->mutex);
+
 	up(&ib_uverbs_idr_mutex);
 
 	return in_len;
 
-err_list:
-	down(&file->mutex);
-	list_del(&uobj->uobject.list);
-	up(&file->mutex);
+err_idr:
+	idr_remove(&ib_uverbs_qp_idr, uobj->uobject.id);
 
 err_destroy:
 	ib_destroy_qp(qp);
@@ -979,7 +1027,6 @@
 	struct ib_uverbs_destroy_qp_resp resp;
 	struct ib_qp               	*qp;
 	struct ib_uevent_object        	*uobj;
-	struct ib_uverbs_event		*evt, *tmp;
 	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1005,12 +1052,7 @@
 	list_del(&uobj->uobject.list);
 	up(&file->mutex);
 
-	spin_lock_irq(&file->async_file.lock);
-	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
-		list_del(&evt->list);
-		kfree(evt);
-	}
-	spin_unlock_irq(&file->async_file.lock);
+	ib_uverbs_release_uevent(file, uobj);
 
 	resp.events_reported = uobj->events_reported;
 
@@ -1026,6 +1068,468 @@
 	return ret ? ret : in_len;
 }
 
+ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+	struct ib_uverbs_post_send      cmd;
+	struct ib_uverbs_post_send_resp resp;
+	struct ib_uverbs_send_wr       *user_wr;
+	struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
+	struct ib_qp                   *qp;
+	int                             i, sg_ind;
+	ssize_t                         ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
+	    cmd.sge_count * sizeof (struct ib_uverbs_sge))
+		return -EINVAL;
+
+	if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
+		return -EINVAL;
+
+	user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
+	if (!user_wr)
+		return -ENOMEM;
+
+	down(&ib_uverbs_idr_mutex);
+
+	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+	if (!qp || qp->uobject->context != file->ucontext)
+		goto out;
+
+	sg_ind = 0;
+	last = NULL;
+	for (i = 0; i < cmd.wr_count; ++i) {
+		if (copy_from_user(user_wr,
+				   buf + sizeof cmd + i * cmd.wqe_size,
+				   cmd.wqe_size)) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		if (user_wr->num_sge + sg_ind > cmd.sge_count) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+			       user_wr->num_sge * sizeof (struct ib_sge),
+			       GFP_KERNEL);
+		if (!next) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		if (!last)
+			wr = next;
+		else
+			last->next = next;
+		last = next;
+
+		next->next       = NULL;
+		next->wr_id      = user_wr->wr_id;
+		next->num_sge    = user_wr->num_sge;
+		next->opcode     = user_wr->opcode;
+		next->send_flags = user_wr->send_flags;
+		next->imm_data   = user_wr->imm_data;
+
+		if (qp->qp_type == IB_QPT_UD) {
+			next->wr.ud.ah = idr_find(&ib_uverbs_ah_idr,
+						  user_wr->wr.ud.ah);
+			if (!next->wr.ud.ah) {
+				ret = -EINVAL;
+				goto out;
+			}
+			next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;
+			next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
+		} else {
+			switch (next->opcode) {
+			case IB_WR_RDMA_WRITE:
+			case IB_WR_RDMA_WRITE_WITH_IMM:
+			case IB_WR_RDMA_READ:
+				next->wr.rdma.remote_addr =
+					user_wr->wr.rdma.remote_addr;
+				next->wr.rdma.rkey        =
+					user_wr->wr.rdma.rkey;
+				break;
+			case IB_WR_ATOMIC_CMP_AND_SWP:
+			case IB_WR_ATOMIC_FETCH_AND_ADD:
+				next->wr.atomic.remote_addr =
+					user_wr->wr.atomic.remote_addr;
+				next->wr.atomic.compare_add =
+					user_wr->wr.atomic.compare_add;
+				next->wr.atomic.swap = user_wr->wr.atomic.swap;
+				next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (next->num_sge) {
+			next->sg_list = (void *) next +
+				ALIGN(sizeof *next, sizeof (struct ib_sge));
+			if (copy_from_user(next->sg_list,
+					   buf + sizeof cmd +
+					   cmd.wr_count * cmd.wqe_size +
+					   sg_ind * sizeof (struct ib_sge),
+					   next->num_sge * sizeof (struct ib_sge))) {
+				ret = -EFAULT;
+				goto out;
+			}
+			sg_ind += next->num_sge;
+		} else
+			next->sg_list = NULL;
+	}
+
+	resp.bad_wr = 0;
+	ret = qp->device->post_send(qp, wr, &bad_wr);
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	up(&ib_uverbs_idr_mutex);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	kfree(user_wr);
+
+	return ret ? ret : in_len;
+}
+
+static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
+						    int in_len,
+						    u32 wr_count,
+						    u32 sge_count,
+						    u32 wqe_size)
+{
+	struct ib_uverbs_recv_wr *user_wr;
+	struct ib_recv_wr        *wr = NULL, *last, *next;
+	int                       sg_ind;
+	int                       i;
+	int                       ret;
+
+	if (in_len < wqe_size * wr_count +
+	    sge_count * sizeof (struct ib_uverbs_sge))
+		return ERR_PTR(-EINVAL);
+
+	if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
+		return ERR_PTR(-EINVAL);
+
+	user_wr = kmalloc(wqe_size, GFP_KERNEL);
+	if (!user_wr)
+		return ERR_PTR(-ENOMEM);
+
+	sg_ind = 0;
+	last = NULL;
+	for (i = 0; i < wr_count; ++i) {
+		if (copy_from_user(user_wr, buf + i * wqe_size,
+				   wqe_size)) {
+			ret = -EFAULT;
+			goto err;
+		}
+
+		if (user_wr->num_sge + sg_ind > sge_count) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
+			       user_wr->num_sge * sizeof (struct ib_sge),
+			       GFP_KERNEL);
+		if (!next) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		if (!last)
+			wr = next;
+		else
+			last->next = next;
+		last = next;
+
+		next->next       = NULL;
+		next->wr_id      = user_wr->wr_id;
+		next->num_sge    = user_wr->num_sge;
+
+		if (next->num_sge) {
+			next->sg_list = (void *) next +
+				ALIGN(sizeof *next, sizeof (struct ib_sge));
+			if (copy_from_user(next->sg_list,
+					   buf + wr_count * wqe_size +
+					   sg_ind * sizeof (struct ib_sge),
+					   next->num_sge * sizeof (struct ib_sge))) {
+				ret = -EFAULT;
+				goto err;
+			}
+			sg_ind += next->num_sge;
+		} else
+			next->sg_list = NULL;
+	}
+
+	kfree(user_wr);
+	return wr;
+
+err:
+	kfree(user_wr);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	return ERR_PTR(ret);
+}
+
+ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+	struct ib_uverbs_post_recv      cmd;
+	struct ib_uverbs_post_recv_resp resp;
+	struct ib_recv_wr              *wr, *next, *bad_wr;
+	struct ib_qp                   *qp;
+	ssize_t                         ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+				       in_len - sizeof cmd, cmd.wr_count,
+				       cmd.sge_count, cmd.wqe_size);
+	if (IS_ERR(wr))
+		return PTR_ERR(wr);
+
+	down(&ib_uverbs_idr_mutex);
+
+	qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
+	if (!qp || qp->uobject->context != file->ucontext)
+		goto out;
+
+	resp.bad_wr = 0;
+	ret = qp->device->post_recv(qp, wr, &bad_wr);
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
+
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	up(&ib_uverbs_idr_mutex);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
+                            const char __user *buf, int in_len,
+                            int out_len)
+{
+	struct ib_uverbs_post_srq_recv      cmd;
+	struct ib_uverbs_post_srq_recv_resp resp;
+	struct ib_recv_wr                  *wr, *next, *bad_wr;
+	struct ib_srq                      *srq;
+	ssize_t                             ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
+				       in_len - sizeof cmd, cmd.wr_count,
+				       cmd.sge_count, cmd.wqe_size);
+	if (IS_ERR(wr))
+		return PTR_ERR(wr);
+
+	down(&ib_uverbs_idr_mutex);
+
+	srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
+	if (!srq || srq->uobject->context != file->ucontext)
+		goto out;
+
+	resp.bad_wr = 0;
+	ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+	if (ret)
+		for (next = wr; next; next = next->next) {
+			++resp.bad_wr;
+			if (next == bad_wr)
+				break;
+		}
+
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp))
+		ret = -EFAULT;
+
+out:
+	up(&ib_uverbs_idr_mutex);
+
+	while (wr) {
+		next = wr->next;
+		kfree(wr);
+		wr = next;
+	}
+
+	return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_create_ah	 cmd;
+	struct ib_uverbs_create_ah_resp	 resp;
+	struct ib_uobject		*uobj;
+	struct ib_pd			*pd;
+	struct ib_ah			*ah;
+	struct ib_ah_attr		attr;
+	int ret;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
+	if (!uobj)
+		return -ENOMEM;
+
+	down(&ib_uverbs_idr_mutex);
+
+	pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle);
+	if (!pd || pd->uobject->context != file->ucontext) {
+		ret = -EINVAL;
+		goto err_up;
+	}
+
+	uobj->user_handle = cmd.user_handle;
+	uobj->context     = file->ucontext;
+
+	attr.dlid 	       = cmd.attr.dlid;
+	attr.sl 	       = cmd.attr.sl;
+	attr.src_path_bits     = cmd.attr.src_path_bits;
+	attr.static_rate       = cmd.attr.static_rate;
+	attr.port_num 	       = cmd.attr.port_num;
+	attr.grh.flow_label    = cmd.attr.grh.flow_label;
+	attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
+	attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
+	attr.grh.traffic_class = cmd.attr.grh.traffic_class;
+	memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+
+	ah = ib_create_ah(pd, &attr);
+	if (IS_ERR(ah)) {
+		ret = PTR_ERR(ah);
+		goto err_up;
+	}
+
+	ah->uobject = uobj;
+
+retry:
+	if (!idr_pre_get(&ib_uverbs_ah_idr, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto err_destroy;
+	}
+
+	ret = idr_get_new(&ib_uverbs_ah_idr, ah, &uobj->id);
+
+	if (ret == -EAGAIN)
+		goto retry;
+	if (ret)
+		goto err_destroy;
+
+	resp.ah_handle = uobj->id;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp)) {
+		ret = -EFAULT;
+		goto err_idr;
+	}
+
+	down(&file->mutex);
+	list_add_tail(&uobj->list, &file->ucontext->ah_list);
+	up(&file->mutex);
+
+	up(&ib_uverbs_idr_mutex);
+
+	return in_len;
+
+err_idr:
+	idr_remove(&ib_uverbs_ah_idr, uobj->id);
+
+err_destroy:
+	ib_destroy_ah(ah);
+
+err_up:
+	up(&ib_uverbs_idr_mutex);
+
+	kfree(uobj);
+	return ret;
+}
+
+ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
+			     const char __user *buf, int in_len, int out_len)
+{
+	struct ib_uverbs_destroy_ah cmd;
+	struct ib_ah		   *ah;
+	struct ib_uobject	   *uobj;
+	int			    ret = -EINVAL;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	down(&ib_uverbs_idr_mutex);
+
+	ah = idr_find(&ib_uverbs_ah_idr, cmd.ah_handle);
+	if (!ah || ah->uobject->context != file->ucontext)
+		goto out;
+
+	uobj = ah->uobject;
+
+	ret = ib_destroy_ah(ah);
+	if (ret)
+		goto out;
+
+	idr_remove(&ib_uverbs_ah_idr, cmd.ah_handle);
+
+	down(&file->mutex);
+	list_del(&uobj->list);
+	up(&file->mutex);
+
+	kfree(uobj);
+
+out:
+	up(&ib_uverbs_idr_mutex);
+
+	return ret ? ret : in_len;
+}
+
 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
 			       const char __user *buf, int in_len,
 			       int out_len)
@@ -1148,24 +1652,22 @@
 
 	resp.srq_handle = uobj->uobject.id;
 
-	down(&file->mutex);
-	list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
-	up(&file->mutex);
-
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp)) {
 		ret = -EFAULT;
-		goto err_list;
+		goto err_idr;
 	}
 
+	down(&file->mutex);
+	list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
+	up(&file->mutex);
+
 	up(&ib_uverbs_idr_mutex);
 
 	return in_len;
 
-err_list:
-	down(&file->mutex);
-	list_del(&uobj->uobject.list);
-	up(&file->mutex);
+err_idr:
+	idr_remove(&ib_uverbs_srq_idr, uobj->uobject.id);
 
 err_destroy:
 	ib_destroy_srq(srq);
@@ -1217,7 +1719,6 @@
 	struct ib_uverbs_destroy_srq_resp resp;
 	struct ib_srq               	 *srq;
 	struct ib_uevent_object        	 *uobj;
-	struct ib_uverbs_event		 *evt, *tmp;
 	int                         	  ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -1243,12 +1744,7 @@
 	list_del(&uobj->uobject.list);
 	up(&file->mutex);
 
-	spin_lock_irq(&file->async_file.lock);
-	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
-		list_del(&evt->list);
-		kfree(evt);
-	}
-	spin_unlock_irq(&file->async_file.lock);
+	ib_uverbs_release_uevent(file, uobj);
 
 	resp.events_reported = uobj->events_reported;
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 1251180..ac08d2c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -43,6 +44,7 @@
 #include <linux/poll.h>
 #include <linux/file.h>
 #include <linux/mount.h>
+#include <linux/cdev.h>
 
 #include <asm/uaccess.h>
 
@@ -62,6 +64,8 @@
 
 #define IB_UVERBS_BASE_DEV	MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
 
+static struct class *uverbs_class;
+
 DECLARE_MUTEX(ib_uverbs_idr_mutex);
 DEFINE_IDR(ib_uverbs_pd_idr);
 DEFINE_IDR(ib_uverbs_mr_idr);
@@ -72,31 +76,37 @@
 DEFINE_IDR(ib_uverbs_srq_idr);
 
 static spinlock_t map_lock;
+static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
 
 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 				     const char __user *buf, int in_len,
 				     int out_len) = {
-	[IB_USER_VERBS_CMD_QUERY_PARAMS]  = ib_uverbs_query_params,
-	[IB_USER_VERBS_CMD_GET_CONTEXT]   = ib_uverbs_get_context,
-	[IB_USER_VERBS_CMD_QUERY_DEVICE]  = ib_uverbs_query_device,
-	[IB_USER_VERBS_CMD_QUERY_PORT]    = ib_uverbs_query_port,
-	[IB_USER_VERBS_CMD_QUERY_GID]     = ib_uverbs_query_gid,
-	[IB_USER_VERBS_CMD_QUERY_PKEY]    = ib_uverbs_query_pkey,
-	[IB_USER_VERBS_CMD_ALLOC_PD]      = ib_uverbs_alloc_pd,
-	[IB_USER_VERBS_CMD_DEALLOC_PD]    = ib_uverbs_dealloc_pd,
-	[IB_USER_VERBS_CMD_REG_MR]        = ib_uverbs_reg_mr,
-	[IB_USER_VERBS_CMD_DEREG_MR]      = ib_uverbs_dereg_mr,
-	[IB_USER_VERBS_CMD_CREATE_CQ]     = ib_uverbs_create_cq,
-	[IB_USER_VERBS_CMD_DESTROY_CQ]    = ib_uverbs_destroy_cq,
-	[IB_USER_VERBS_CMD_CREATE_QP]     = ib_uverbs_create_qp,
-	[IB_USER_VERBS_CMD_MODIFY_QP]     = ib_uverbs_modify_qp,
-	[IB_USER_VERBS_CMD_DESTROY_QP]    = ib_uverbs_destroy_qp,
-	[IB_USER_VERBS_CMD_ATTACH_MCAST]  = ib_uverbs_attach_mcast,
-	[IB_USER_VERBS_CMD_DETACH_MCAST]  = ib_uverbs_detach_mcast,
-	[IB_USER_VERBS_CMD_CREATE_SRQ]    = ib_uverbs_create_srq,
-	[IB_USER_VERBS_CMD_MODIFY_SRQ]    = ib_uverbs_modify_srq,
-	[IB_USER_VERBS_CMD_DESTROY_SRQ]   = ib_uverbs_destroy_srq,
+	[IB_USER_VERBS_CMD_GET_CONTEXT]   	= ib_uverbs_get_context,
+	[IB_USER_VERBS_CMD_QUERY_DEVICE]  	= ib_uverbs_query_device,
+	[IB_USER_VERBS_CMD_QUERY_PORT]    	= ib_uverbs_query_port,
+	[IB_USER_VERBS_CMD_ALLOC_PD]      	= ib_uverbs_alloc_pd,
+	[IB_USER_VERBS_CMD_DEALLOC_PD]    	= ib_uverbs_dealloc_pd,
+	[IB_USER_VERBS_CMD_REG_MR]        	= ib_uverbs_reg_mr,
+	[IB_USER_VERBS_CMD_DEREG_MR]      	= ib_uverbs_dereg_mr,
+	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
+	[IB_USER_VERBS_CMD_CREATE_CQ]     	= ib_uverbs_create_cq,
+	[IB_USER_VERBS_CMD_POLL_CQ]     	= ib_uverbs_poll_cq,
+	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]     	= ib_uverbs_req_notify_cq,
+	[IB_USER_VERBS_CMD_DESTROY_CQ]    	= ib_uverbs_destroy_cq,
+	[IB_USER_VERBS_CMD_CREATE_QP]     	= ib_uverbs_create_qp,
+	[IB_USER_VERBS_CMD_MODIFY_QP]     	= ib_uverbs_modify_qp,
+	[IB_USER_VERBS_CMD_DESTROY_QP]    	= ib_uverbs_destroy_qp,
+	[IB_USER_VERBS_CMD_POST_SEND]    	= ib_uverbs_post_send,
+	[IB_USER_VERBS_CMD_POST_RECV]    	= ib_uverbs_post_recv,
+	[IB_USER_VERBS_CMD_POST_SRQ_RECV]    	= ib_uverbs_post_srq_recv,
+	[IB_USER_VERBS_CMD_CREATE_AH]    	= ib_uverbs_create_ah,
+	[IB_USER_VERBS_CMD_DESTROY_AH]    	= ib_uverbs_destroy_ah,
+	[IB_USER_VERBS_CMD_ATTACH_MCAST]  	= ib_uverbs_attach_mcast,
+	[IB_USER_VERBS_CMD_DETACH_MCAST]  	= ib_uverbs_detach_mcast,
+	[IB_USER_VERBS_CMD_CREATE_SRQ]    	= ib_uverbs_create_srq,
+	[IB_USER_VERBS_CMD_MODIFY_SRQ]    	= ib_uverbs_modify_srq,
+	[IB_USER_VERBS_CMD_DESTROY_SRQ]   	= ib_uverbs_destroy_srq,
 };
 
 static struct vfsmount *uverbs_event_mnt;
@@ -104,7 +114,54 @@
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device);
 
-static int ib_dealloc_ucontext(struct ib_ucontext *context)
+static void ib_uverbs_release_dev(struct kref *ref)
+{
+	struct ib_uverbs_device *dev =
+		container_of(ref, struct ib_uverbs_device, ref);
+
+	kfree(dev);
+}
+
+void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
+			  struct ib_uverbs_event_file *ev_file,
+			  struct ib_ucq_object *uobj)
+{
+	struct ib_uverbs_event *evt, *tmp;
+
+	if (ev_file) {
+		spin_lock_irq(&ev_file->lock);
+		list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
+			list_del(&evt->list);
+			kfree(evt);
+		}
+		spin_unlock_irq(&ev_file->lock);
+
+		kref_put(&ev_file->ref, ib_uverbs_release_event_file);
+	}
+
+	spin_lock_irq(&file->async_file->lock);
+	list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
+		list_del(&evt->list);
+		kfree(evt);
+	}
+	spin_unlock_irq(&file->async_file->lock);
+}
+
+void ib_uverbs_release_uevent(struct ib_uverbs_file *file,
+			      struct ib_uevent_object *uobj)
+{
+	struct ib_uverbs_event *evt, *tmp;
+
+	spin_lock_irq(&file->async_file->lock);
+	list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
+		list_del(&evt->list);
+		kfree(evt);
+	}
+	spin_unlock_irq(&file->async_file->lock);
+}
+
+static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
+				      struct ib_ucontext *context)
 {
 	struct ib_uobject *uobj, *tmp;
 
@@ -113,30 +170,46 @@
 
 	down(&ib_uverbs_idr_mutex);
 
-	/* XXX Free AHs */
+	list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
+		struct ib_ah *ah = idr_find(&ib_uverbs_ah_idr, uobj->id);
+		idr_remove(&ib_uverbs_ah_idr, uobj->id);
+		ib_destroy_ah(ah);
+		list_del(&uobj->list);
+		kfree(uobj);
+	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {
 		struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id);
+		struct ib_uevent_object *uevent =
+			container_of(uobj, struct ib_uevent_object, uobject);
 		idr_remove(&ib_uverbs_qp_idr, uobj->id);
 		ib_destroy_qp(qp);
 		list_del(&uobj->list);
-		kfree(container_of(uobj, struct ib_uevent_object, uobject));
+		ib_uverbs_release_uevent(file, uevent);
+		kfree(uevent);
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
 		struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id);
+		struct ib_uverbs_event_file *ev_file = cq->cq_context;
+		struct ib_ucq_object *ucq =
+			container_of(uobj, struct ib_ucq_object, uobject);
 		idr_remove(&ib_uverbs_cq_idr, uobj->id);
 		ib_destroy_cq(cq);
 		list_del(&uobj->list);
-		kfree(container_of(uobj, struct ib_ucq_object, uobject));
+		ib_uverbs_release_ucq(file, ev_file, ucq);
+		kfree(ucq);
 	}
 
 	list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
 		struct ib_srq *srq = idr_find(&ib_uverbs_srq_idr, uobj->id);
+		struct ib_uevent_object *uevent =
+			container_of(uobj, struct ib_uevent_object, uobject);
 		idr_remove(&ib_uverbs_srq_idr, uobj->id);
 		ib_destroy_srq(srq);
 		list_del(&uobj->list);
-		kfree(container_of(uobj, struct ib_uevent_object, uobject));
+		ib_uverbs_release_uevent(file, uevent);
+		kfree(uevent);
 	}
 
 	/* XXX Free MWs */
@@ -175,6 +248,8 @@
 		container_of(ref, struct ib_uverbs_file, ref);
 
 	module_put(file->device->ib_dev->owner);
+	kref_put(&file->device->ref, ib_uverbs_release_dev);
+
 	kfree(file);
 }
 
@@ -188,25 +263,19 @@
 
 	spin_lock_irq(&file->lock);
 
-	while (list_empty(&file->event_list) && file->fd >= 0) {
+	while (list_empty(&file->event_list)) {
 		spin_unlock_irq(&file->lock);
 
 		if (filp->f_flags & O_NONBLOCK)
 			return -EAGAIN;
 
 		if (wait_event_interruptible(file->poll_wait,
-					     !list_empty(&file->event_list) ||
-					     file->fd < 0))
+					     !list_empty(&file->event_list)))
 			return -ERESTARTSYS;
 
 		spin_lock_irq(&file->lock);
 	}
 
-	if (file->fd < 0) {
-		spin_unlock_irq(&file->lock);
-		return -ENODEV;
-	}
-
 	event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
 
 	if (file->is_async)
@@ -248,26 +317,19 @@
 	poll_wait(filp, &file->poll_wait, wait);
 
 	spin_lock_irq(&file->lock);
-	if (file->fd < 0)
-		pollflags = POLLERR;
-	else if (!list_empty(&file->event_list))
+	if (!list_empty(&file->event_list))
 		pollflags = POLLIN | POLLRDNORM;
 	spin_unlock_irq(&file->lock);
 
 	return pollflags;
 }
 
-static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
+void ib_uverbs_release_event_file(struct kref *ref)
 {
-	struct ib_uverbs_event *entry, *tmp;
+	struct ib_uverbs_event_file *file =
+		container_of(ref, struct ib_uverbs_event_file, ref);
 
-	spin_lock_irq(&file->lock);
-	if (file->fd != -1) {
-		file->fd = -1;
-		list_for_each_entry_safe(entry, tmp, &file->event_list, list)
-			kfree(entry);
-	}
-	spin_unlock_irq(&file->lock);
+	kfree(file);
 }
 
 static int ib_uverbs_event_fasync(int fd, struct file *filp, int on)
@@ -280,21 +342,30 @@
 static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
 {
 	struct ib_uverbs_event_file *file = filp->private_data;
+	struct ib_uverbs_event *entry, *tmp;
 
-	ib_uverbs_event_release(file);
+	spin_lock_irq(&file->lock);
+	file->file = NULL;
+	list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
+		if (entry->counter)
+			list_del(&entry->obj_list);
+		kfree(entry);
+	}
+	spin_unlock_irq(&file->lock);
+
 	ib_uverbs_event_fasync(-1, filp, 0);
-	kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+
+	if (file->is_async) {
+		ib_unregister_event_handler(&file->uverbs_file->event_handler);
+		kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
+	}
+	kref_put(&file->ref, ib_uverbs_release_event_file);
 
 	return 0;
 }
 
 static struct file_operations uverbs_event_fops = {
-	/*
-	 * No .owner field since we artificially create event files,
-	 * so there is no increment to the module reference count in
-	 * the open path.  All event files come from a uverbs command
-	 * file, which already takes a module reference, so this is OK.
-	 */
+	.owner	 = THIS_MODULE,
 	.read 	 = ib_uverbs_event_read,
 	.poll    = ib_uverbs_event_poll,
 	.release = ib_uverbs_event_close,
@@ -303,27 +374,37 @@
 
 void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
 {
-	struct ib_uverbs_file  *file = cq_context;
-	struct ib_ucq_object   *uobj;
-	struct ib_uverbs_event *entry;
-	unsigned long           flags;
+	struct ib_uverbs_event_file    *file = cq_context;
+	struct ib_ucq_object	       *uobj;
+	struct ib_uverbs_event	       *entry;
+	unsigned long			flags;
+
+	if (!file)
+		return;
+
+	spin_lock_irqsave(&file->lock, flags);
+	if (!file->file) {
+		spin_unlock_irqrestore(&file->lock, flags);
+		return;
+	}
 
 	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
-	if (!entry)
+	if (!entry) {
+		spin_unlock_irqrestore(&file->lock, flags);
 		return;
+	}
 
 	uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
 
 	entry->desc.comp.cq_handle = cq->uobject->user_handle;
 	entry->counter		   = &uobj->comp_events_reported;
 
-	spin_lock_irqsave(&file->comp_file[0].lock, flags);
-	list_add_tail(&entry->list, &file->comp_file[0].event_list);
+	list_add_tail(&entry->list, &file->event_list);
 	list_add_tail(&entry->obj_list, &uobj->comp_list);
-	spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
+	spin_unlock_irqrestore(&file->lock, flags);
 
-	wake_up_interruptible(&file->comp_file[0].poll_wait);
-	kill_fasync(&file->comp_file[0].async_queue, SIGIO, POLL_IN);
+	wake_up_interruptible(&file->poll_wait);
+	kill_fasync(&file->async_queue, SIGIO, POLL_IN);
 }
 
 static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
@@ -334,32 +415,40 @@
 	struct ib_uverbs_event *entry;
 	unsigned long flags;
 
-	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
-	if (!entry)
+	spin_lock_irqsave(&file->async_file->lock, flags);
+	if (!file->async_file->file) {
+		spin_unlock_irqrestore(&file->async_file->lock, flags);
 		return;
+	}
+
+	entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+	if (!entry) {
+		spin_unlock_irqrestore(&file->async_file->lock, flags);
+		return;
+	}
 
 	entry->desc.async.element    = element;
 	entry->desc.async.event_type = event;
 	entry->counter               = counter;
 
-	spin_lock_irqsave(&file->async_file.lock, flags);
-	list_add_tail(&entry->list, &file->async_file.event_list);
+	list_add_tail(&entry->list, &file->async_file->event_list);
 	if (obj_list)
 		list_add_tail(&entry->obj_list, obj_list);
-	spin_unlock_irqrestore(&file->async_file.lock, flags);
+	spin_unlock_irqrestore(&file->async_file->lock, flags);
 
-	wake_up_interruptible(&file->async_file.poll_wait);
-	kill_fasync(&file->async_file.async_queue, SIGIO, POLL_IN);
+	wake_up_interruptible(&file->async_file->poll_wait);
+	kill_fasync(&file->async_file->async_queue, SIGIO, POLL_IN);
 }
 
 void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
 {
+	struct ib_uverbs_event_file *ev_file = context_ptr;
 	struct ib_ucq_object *uobj;
 
 	uobj = container_of(event->element.cq->uobject,
 			    struct ib_ucq_object, uobject);
 
-	ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
+	ib_uverbs_async_handler(ev_file->uverbs_file, uobj->uobject.user_handle,
 				event->event, &uobj->async_list,
 				&uobj->async_events_reported);
 				
@@ -389,8 +478,8 @@
 				&uobj->events_reported);
 }
 
-static void ib_uverbs_event_handler(struct ib_event_handler *handler,
-				    struct ib_event *event)
+void ib_uverbs_event_handler(struct ib_event_handler *handler,
+			     struct ib_event *event)
 {
 	struct ib_uverbs_file *file =
 		container_of(handler, struct ib_uverbs_file, event_handler);
@@ -399,38 +488,90 @@
 				NULL, NULL);
 }
 
-static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,
-				struct ib_uverbs_file *uverbs_file)
+struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
+					int is_async, int *fd)
 {
+	struct ib_uverbs_event_file *ev_file;
 	struct file *filp;
+	int ret;
 
-	spin_lock_init(&file->lock);
-	INIT_LIST_HEAD(&file->event_list);
-	init_waitqueue_head(&file->poll_wait);
-	file->uverbs_file = uverbs_file;
-	file->async_queue = NULL;
+	ev_file = kmalloc(sizeof *ev_file, GFP_KERNEL);
+	if (!ev_file)
+		return ERR_PTR(-ENOMEM);
 
-	file->fd = get_unused_fd();
-	if (file->fd < 0)
-		return file->fd;
+	kref_init(&ev_file->ref);
+	spin_lock_init(&ev_file->lock);
+	INIT_LIST_HEAD(&ev_file->event_list);
+	init_waitqueue_head(&ev_file->poll_wait);
+	ev_file->uverbs_file = uverbs_file;
+	ev_file->async_queue = NULL;
+	ev_file->is_async    = is_async;
+
+	*fd = get_unused_fd();
+	if (*fd < 0) {
+		ret = *fd;
+		goto err;
+	}
 
 	filp = get_empty_filp();
 	if (!filp) {
-		put_unused_fd(file->fd);
-		return -ENFILE;
+		ret = -ENFILE;
+		goto err_fd;
 	}
 
-	filp->f_op 	   = &uverbs_event_fops;
+	ev_file->file      = filp;
+
+	/*
+	 * fops_get() can't fail here, because we're coming from a
+	 * system call on a uverbs file, which will already have a
+	 * module reference.
+	 */
+	filp->f_op 	   = fops_get(&uverbs_event_fops);
 	filp->f_vfsmnt 	   = mntget(uverbs_event_mnt);
 	filp->f_dentry 	   = dget(uverbs_event_mnt->mnt_root);
 	filp->f_mapping    = filp->f_dentry->d_inode->i_mapping;
 	filp->f_flags      = O_RDONLY;
 	filp->f_mode       = FMODE_READ;
-	filp->private_data = file;
+	filp->private_data = ev_file;
 
-	fd_install(file->fd, filp);
+	return filp;
 
-	return 0;
+err_fd:
+	put_unused_fd(*fd);
+
+err:
+	kfree(ev_file);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Look up a completion event file by FD.  If lookup is successful,
+ * takes a ref to the event file struct that it returns; if
+ * unsuccessful, returns NULL.
+ */
+struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
+{
+	struct ib_uverbs_event_file *ev_file = NULL;
+	struct file *filp;
+
+	filp = fget(fd);
+	if (!filp)
+		return NULL;
+
+	if (filp->f_op != &uverbs_event_fops)
+		goto out;
+
+	ev_file = filp->private_data;
+	if (ev_file->is_async) {
+		ev_file = NULL;
+		goto out;
+	}
+
+	kref_get(&ev_file->ref);
+
+out:
+	fput(filp);
+	return ev_file;
 }
 
 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
@@ -450,11 +591,11 @@
 
 	if (hdr.command < 0				||
 	    hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
-	    !uverbs_cmd_table[hdr.command])
+	    !uverbs_cmd_table[hdr.command]		||
+	    !(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command)))
 		return -EINVAL;
 
-	if (!file->ucontext                               &&
-	    hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS &&
+	if (!file->ucontext &&
 	    hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT)
 		return -EINVAL;
 
@@ -474,84 +615,57 @@
 
 static int ib_uverbs_open(struct inode *inode, struct file *filp)
 {
-	struct ib_uverbs_device *dev =
-		container_of(inode->i_cdev, struct ib_uverbs_device, dev);
+	struct ib_uverbs_device *dev;
 	struct ib_uverbs_file *file;
-	int i = 0;
 	int ret;
 
-	if (!try_module_get(dev->ib_dev->owner))
-		return -ENODEV;
+	spin_lock(&map_lock);
+	dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
+	if (dev)
+		kref_get(&dev->ref);
+	spin_unlock(&map_lock);
 
-	file = kmalloc(sizeof *file +
-		       (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file),
-		       GFP_KERNEL);
-	if (!file) {
-		ret = -ENOMEM;
+	if (!dev)
+		return -ENXIO;
+
+	if (!try_module_get(dev->ib_dev->owner)) {
+		ret = -ENODEV;
 		goto err;
 	}
 
-	file->device = dev;
+	file = kmalloc(sizeof *file, GFP_KERNEL);
+	if (!file) {
+		ret = -ENOMEM;
+		goto err_module;
+	}
+
+	file->device	 = dev;
+	file->ucontext	 = NULL;
+	file->async_file = NULL;
 	kref_init(&file->ref);
 	init_MUTEX(&file->mutex);
 
-	file->ucontext = NULL;
-
-	kref_get(&file->ref);
-	ret = ib_uverbs_event_init(&file->async_file, file);
-	if (ret)
-		goto err_kref;
-
-	file->async_file.is_async = 1;
-
-	for (i = 0; i < dev->num_comp; ++i) {
-		kref_get(&file->ref);
-		ret = ib_uverbs_event_init(&file->comp_file[i], file);
-		if (ret)
-			goto err_async;
-		file->comp_file[i].is_async = 0;
-	}
-
-
 	filp->private_data = file;
 
-	INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev,
-			      ib_uverbs_event_handler);
-	if (ib_register_event_handler(&file->event_handler))
-		goto err_async;
-
 	return 0;
 
-err_async:
-	while (i--)
-		ib_uverbs_event_release(&file->comp_file[i]);
-
-	ib_uverbs_event_release(&file->async_file);
-
-err_kref:
-	/*
-	 * One extra kref_put() because we took a reference before the
-	 * event file creation that failed and got us here.
-	 */
-	kref_put(&file->ref, ib_uverbs_release_file);
-	kref_put(&file->ref, ib_uverbs_release_file);
+err_module:
+	module_put(dev->ib_dev->owner);
 
 err:
-	module_put(dev->ib_dev->owner);
+	kref_put(&dev->ref, ib_uverbs_release_dev);
+
 	return ret;
 }
 
 static int ib_uverbs_close(struct inode *inode, struct file *filp)
 {
 	struct ib_uverbs_file *file = filp->private_data;
-	int i;
 
-	ib_unregister_event_handler(&file->event_handler);
-	ib_uverbs_event_release(&file->async_file);
-	ib_dealloc_ucontext(file->ucontext);
+	ib_uverbs_cleanup_ucontext(file, file->ucontext);
 
-	for (i = 0; i < file->device->num_comp; ++i)
-		ib_uverbs_event_release(&file->comp_file[i]);
+	if (file->async_file)
+		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
 
 	kref_put(&file->ref, ib_uverbs_release_file);
 
@@ -581,27 +695,25 @@
 
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
-	struct ib_uverbs_device *dev =
-		container_of(class_dev, struct ib_uverbs_device, class_dev);
+	struct ib_uverbs_device *dev = class_get_devdata(class_dev);
+
+	if (!dev)
+		return -ENODEV;
 
 	return sprintf(buf, "%s\n", dev->ib_dev->name);
 }
 static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
 
-static void ib_uverbs_release_class_dev(struct class_device *class_dev)
+static ssize_t show_dev_abi_version(struct class_device *class_dev, char *buf)
 {
-	struct ib_uverbs_device *dev =
-		container_of(class_dev, struct ib_uverbs_device, class_dev);
+	struct ib_uverbs_device *dev = class_get_devdata(class_dev);
 
-	cdev_del(&dev->dev);
-	clear_bit(dev->devnum, dev_map);
-	kfree(dev);
+	if (!dev)
+		return -ENODEV;
+
+	return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
 }
-
-static struct class uverbs_class = {
-	.name    = "infiniband_verbs",
-	.release = ib_uverbs_release_class_dev
-};
+static CLASS_DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
 
 static ssize_t show_abi_version(struct class *class, char *buf)
 {
@@ -622,6 +734,8 @@
 
 	memset(uverbs_dev, 0, sizeof *uverbs_dev);
 
+	kref_init(&uverbs_dev->ref);
+
 	spin_lock(&map_lock);
 	uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
 	if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
@@ -631,41 +745,48 @@
 	set_bit(uverbs_dev->devnum, dev_map);
 	spin_unlock(&map_lock);
 
-	uverbs_dev->ib_dev   = device;
-	uverbs_dev->num_comp = 1;
+	uverbs_dev->ib_dev           = device;
+	uverbs_dev->num_comp_vectors = 1;
 
-	if (device->mmap)
-		cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops);
-	else
-		cdev_init(&uverbs_dev->dev, &uverbs_fops);
-	uverbs_dev->dev.owner = THIS_MODULE;
-	kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum);
-	if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+	uverbs_dev->dev = cdev_alloc();
+	if (!uverbs_dev->dev)
 		goto err;
-
-	uverbs_dev->class_dev.class = &uverbs_class;
-	uverbs_dev->class_dev.dev   = device->dma_device;
-	uverbs_dev->class_dev.devt  = uverbs_dev->dev.dev;
-	snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum);
-	if (class_device_register(&uverbs_dev->class_dev))
+	uverbs_dev->dev->owner = THIS_MODULE;
+	uverbs_dev->dev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+	kobject_set_name(&uverbs_dev->dev->kobj, "uverbs%d", uverbs_dev->devnum);
+	if (cdev_add(uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
 		goto err_cdev;
 
-	if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev))
+	uverbs_dev->class_dev = class_device_create(uverbs_class, uverbs_dev->dev->dev,
+						    device->dma_device,
+						    "uverbs%d", uverbs_dev->devnum);
+	if (IS_ERR(uverbs_dev->class_dev))
+		goto err_cdev;
+
+	class_set_devdata(uverbs_dev->class_dev, uverbs_dev);
+
+	if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_ibdev))
 		goto err_class;
+	if (class_device_create_file(uverbs_dev->class_dev, &class_device_attr_abi_version))
+		goto err_class;
+
+	spin_lock(&map_lock);
+	dev_table[uverbs_dev->devnum] = uverbs_dev;
+	spin_unlock(&map_lock);
 
 	ib_set_client_data(device, &uverbs_client, uverbs_dev);
 
 	return;
 
 err_class:
-	class_device_unregister(&uverbs_dev->class_dev);
+	class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
 
 err_cdev:
-	cdev_del(&uverbs_dev->dev);
+	cdev_del(uverbs_dev->dev);
 	clear_bit(uverbs_dev->devnum, dev_map);
 
 err:
-	kfree(uverbs_dev);
+	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
 	return;
 }
 
@@ -676,7 +797,16 @@
 	if (!uverbs_dev)
 		return;
 
-	class_device_unregister(&uverbs_dev->class_dev);
+	class_set_devdata(uverbs_dev->class_dev, NULL);
+	class_device_destroy(uverbs_class, uverbs_dev->dev->dev);
+	cdev_del(uverbs_dev->dev);
+
+	spin_lock(&map_lock);
+	dev_table[uverbs_dev->devnum] = NULL;
+	spin_unlock(&map_lock);
+
+	clear_bit(uverbs_dev->devnum, dev_map);
+	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
 }
 
 static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags,
@@ -706,13 +836,14 @@
 		goto out;
 	}
 
-	ret = class_register(&uverbs_class);
-	if (ret) {
+	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
+	if (IS_ERR(uverbs_class)) {
+		ret = PTR_ERR(uverbs_class);
 		printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
 		goto out_chrdev;
 	}
 
-	ret = class_create_file(&uverbs_class, &class_attr_abi_version);
+	ret = class_create_file(uverbs_class, &class_attr_abi_version);
 	if (ret) {
 		printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
 		goto out_class;
@@ -746,7 +877,7 @@
 	unregister_filesystem(&uverbs_event_fs);
 
 out_class:
-	class_unregister(&uverbs_class);
+	class_destroy(uverbs_class);
 
 out_chrdev:
 	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
@@ -760,8 +891,15 @@
 	ib_unregister_client(&uverbs_client);
 	mntput(uverbs_event_mnt);
 	unregister_filesystem(&uverbs_event_fs);
-	class_unregister(&uverbs_class);
+	class_destroy(uverbs_class);
 	unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+	idr_destroy(&ib_uverbs_pd_idr);
+	idr_destroy(&ib_uverbs_mr_idr);
+	idr_destroy(&ib_uverbs_mw_idr);
+	idr_destroy(&ib_uverbs_ah_idr);
+	idr_destroy(&ib_uverbs_cq_idr);
+	idr_destroy(&ib_uverbs_qp_idr);
+	idr_destroy(&ib_uverbs_srq_idr);
 }
 
 module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 5081d90..72d3ef7 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -523,16 +523,22 @@
 
 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
 {
-	return qp->device->attach_mcast ?
-		qp->device->attach_mcast(qp, gid, lid) :
-		-ENOSYS;
+	if (!qp->device->attach_mcast)
+		return -ENOSYS;
+	if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+		return -EINVAL;
+
+	return qp->device->attach_mcast(qp, gid, lid);
 }
 EXPORT_SYMBOL(ib_attach_mcast);
 
 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
 {
-	return qp->device->detach_mcast ?
-		qp->device->detach_mcast(qp, gid, lid) :
-		-ENOSYS;
+	if (!qp->device->detach_mcast)
+		return -ENOSYS;
+	if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
+		return -EINVAL;
+
+	return qp->device->detach_mcast(qp, gid, lid);
 }
 EXPORT_SYMBOL(ib_detach_mcast);
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile
index c44f7ba..47ec5a7 100644
--- a/drivers/infiniband/hw/mthca/Makefile
+++ b/drivers/infiniband/hw/mthca/Makefile
@@ -7,4 +7,5 @@
 ib_mthca-y :=	mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
 		mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
 		mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
-		mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o
+		mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o \
+		mthca_catas.o
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
new file mode 100644
index 0000000..7ac52af
--- /dev/null
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include "mthca_dev.h"
+
+enum {
+	MTHCA_CATAS_POLL_INTERVAL	= 5 * HZ,
+
+	MTHCA_CATAS_TYPE_INTERNAL	= 0,
+	MTHCA_CATAS_TYPE_UPLINK		= 3,
+	MTHCA_CATAS_TYPE_DDR		= 4,
+	MTHCA_CATAS_TYPE_PARITY		= 5,
+};
+
+static DEFINE_SPINLOCK(catas_lock);
+
+static void handle_catas(struct mthca_dev *dev)
+{
+	struct ib_event event;
+	const char *type;
+	int i;
+
+	event.device = &dev->ib_dev;
+	event.event  = IB_EVENT_DEVICE_FATAL;
+	event.element.port_num = 0;
+
+	ib_dispatch_event(&event);
+
+	switch (swab32(readl(dev->catas_err.map)) >> 24) {
+	case MTHCA_CATAS_TYPE_INTERNAL:
+		type = "internal error";
+		break;
+	case MTHCA_CATAS_TYPE_UPLINK:
+		type = "uplink bus error";
+		break;
+	case MTHCA_CATAS_TYPE_DDR:
+		type = "DDR data error";
+		break;
+	case MTHCA_CATAS_TYPE_PARITY:
+		type = "internal parity error";
+		break;
+	default:
+		type = "unknown error";
+		break;
+	}
+
+	mthca_err(dev, "Catastrophic error detected: %s\n", type);
+	for (i = 0; i < dev->catas_err.size; ++i)
+		mthca_err(dev, "  buf[%02x]: %08x\n",
+			  i, swab32(readl(dev->catas_err.map + i)));
+}
+
+static void poll_catas(unsigned long dev_ptr)
+{
+	struct mthca_dev *dev = (struct mthca_dev *) dev_ptr;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < dev->catas_err.size; ++i)
+		if (readl(dev->catas_err.map + i)) {
+			handle_catas(dev);
+			return;
+		}
+
+	spin_lock_irqsave(&catas_lock, flags);
+	if (dev->catas_err.stop)
+		mod_timer(&dev->catas_err.timer,
+			  jiffies + MTHCA_CATAS_POLL_INTERVAL);
+	spin_unlock_irqrestore(&catas_lock, flags);
+
+	return;
+}
+
+void mthca_start_catas_poll(struct mthca_dev *dev)
+{
+	unsigned long addr;
+
+	init_timer(&dev->catas_err.timer);
+	dev->catas_err.stop = 0;
+	dev->catas_err.map  = NULL;
+
+	addr = pci_resource_start(dev->pdev, 0) +
+		((pci_resource_len(dev->pdev, 0) - 1) &
+		 dev->catas_err.addr);
+
+	if (!request_mem_region(addr, dev->catas_err.size * 4,
+				DRV_NAME)) {
+		mthca_warn(dev, "couldn't request catastrophic error region "
+			   "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
+		return;
+	}
+
+	dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4);
+	if (!dev->catas_err.map) {
+		mthca_warn(dev, "couldn't map catastrophic error region "
+			   "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4);
+		release_mem_region(addr, dev->catas_err.size * 4);
+		return;
+	}
+
+	dev->catas_err.timer.data     = (unsigned long) dev;
+	dev->catas_err.timer.function = poll_catas;
+	dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+	add_timer(&dev->catas_err.timer);
+}
+
+void mthca_stop_catas_poll(struct mthca_dev *dev)
+{
+	spin_lock_irq(&catas_lock);
+	dev->catas_err.stop = 1;
+	spin_unlock_irq(&catas_lock);
+
+	del_timer_sync(&dev->catas_err.timer);
+
+	if (dev->catas_err.map) {
+		iounmap(dev->catas_err.map);
+		release_mem_region(pci_resource_start(dev->pdev, 0) +
+				   ((pci_resource_len(dev->pdev, 0) - 1) &
+				    dev->catas_err.addr),
+				   dev->catas_err.size * 4);
+	}
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 378646b..49f211d 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -706,9 +707,13 @@
 
 	MTHCA_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET);
 	dev->cmd.max_cmds = 1 << lg;
+	MTHCA_GET(dev->catas_err.addr, outbox, QUERY_FW_ERR_START_OFFSET);
+	MTHCA_GET(dev->catas_err.size, outbox, QUERY_FW_ERR_SIZE_OFFSET);
 
 	mthca_dbg(dev, "FW version %012llx, max commands %d\n",
 		  (unsigned long long) dev->fw_ver, dev->cmd.max_cmds);
+	mthca_dbg(dev, "Catastrophic error buffer at 0x%llx, size 0x%x\n",
+		  (unsigned long long) dev->catas_err.addr, dev->catas_err.size);
 
 	if (mthca_is_memfree(dev)) {
 		MTHCA_GET(dev->fw.arbel.fw_pages,       outbox, QUERY_FW_SIZE_OFFSET);
@@ -933,9 +938,9 @@
 		goto out;
 
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
-	dev_lim->max_srq_sz = 1 << field;
+	dev_lim->max_srq_sz = (1 << field) - 1;
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
-	dev_lim->max_qp_sz = 1 << field;
+	dev_lim->max_qp_sz = (1 << field) - 1;
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
 	dev_lim->reserved_qps = 1 << (field & 0xf);
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -1045,6 +1050,8 @@
 		  dev_lim->max_pds, dev_lim->reserved_pds, dev_lim->reserved_uars);
 	mthca_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n",
 		  dev_lim->max_pds, dev_lim->reserved_mgms);
+	mthca_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n",
+		  dev_lim->max_cq_sz, dev_lim->max_qp_sz, dev_lim->max_srq_sz);
 
 	mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
 
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 7bff5a8..7e68bd4 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -83,6 +83,8 @@
 	/* Arbel FW gives us these, but we need them for Tavor */
 	MTHCA_MPT_ENTRY_SIZE  =  0x40,
 	MTHCA_MTT_SEG_SIZE    =  0x40,
+
+	MTHCA_QP_PER_MGM      = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
 };
 
 enum {
@@ -128,12 +130,16 @@
 	int      num_uars;
 	int      max_sg;
 	int      num_qps;
+	int      max_wqes;
+	int	 max_qp_init_rdma;
 	int      reserved_qps;
 	int      num_srqs;
+	int      max_srq_wqes;
 	int      reserved_srqs;
 	int      num_eecs;
 	int      reserved_eecs;
 	int      num_cqs;
+	int      max_cqes;
 	int      reserved_cqs;
 	int      num_eqs;
 	int      reserved_eqs;
@@ -148,6 +154,7 @@
 	int      reserved_mcgs;
 	int      num_pds;
 	int      reserved_pds;
+	u32      flags;
 	u8       port_width_cap;
 };
 
@@ -251,6 +258,14 @@
 	struct mthca_icm_table *table;
 };
 
+struct mthca_catas_err {
+	u64			addr;
+	u32 __iomem	       *map;
+	unsigned long		stop;
+	u32			size;
+	struct timer_list	timer;
+};
+
 struct mthca_dev {
 	struct ib_device  ib_dev;
 	struct pci_dev   *pdev;
@@ -311,6 +326,8 @@
 	struct mthca_av_table  av_table;
 	struct mthca_mcg_table mcg_table;
 
+	struct mthca_catas_err catas_err;
+
 	struct mthca_uar       driver_uar;
 	struct mthca_db_table *db_tab;
 	struct mthca_pd        driver_pd;
@@ -398,6 +415,9 @@
 int mthca_register_device(struct mthca_dev *dev);
 void mthca_unregister_device(struct mthca_dev *dev);
 
+void mthca_start_catas_poll(struct mthca_dev *dev);
+void mthca_stop_catas_poll(struct mthca_dev *dev);
+
 int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
 void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
 
@@ -447,6 +467,8 @@
 int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 		    struct ib_srq_attr *attr, struct mthca_srq *srq);
 void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		     enum ib_srq_attr_mask attr_mask);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type);
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 8dfafda..e5a047a 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -83,7 +83,8 @@
 	MTHCA_EVENT_TYPE_PATH_MIG   	    = 0x01,
 	MTHCA_EVENT_TYPE_COMM_EST   	    = 0x02,
 	MTHCA_EVENT_TYPE_SQ_DRAINED 	    = 0x03,
-	MTHCA_EVENT_TYPE_SRQ_LAST_WQE       = 0x13,
+	MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE    = 0x13,
+	MTHCA_EVENT_TYPE_SRQ_LIMIT	    = 0x14,
 	MTHCA_EVENT_TYPE_CQ_ERROR   	    = 0x04,
 	MTHCA_EVENT_TYPE_WQ_CATAS_ERROR     = 0x05,
 	MTHCA_EVENT_TYPE_EEC_CATAS_ERROR    = 0x06,
@@ -110,8 +111,9 @@
 				(1ULL << MTHCA_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
 				(1ULL << MTHCA_EVENT_TYPE_PORT_CHANGE)        | \
 				(1ULL << MTHCA_EVENT_TYPE_ECC_DETECT))
-#define MTHCA_SRQ_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
-				(1ULL << MTHCA_EVENT_TYPE_SRQ_LAST_WQE)
+#define MTHCA_SRQ_EVENT_MASK   ((1ULL << MTHCA_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
+				(1ULL << MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
+				(1ULL << MTHCA_EVENT_TYPE_SRQ_LIMIT))
 #define MTHCA_CMD_EVENT_MASK    (1ULL << MTHCA_EVENT_TYPE_CMD)
 
 #define MTHCA_EQ_DB_INC_CI     (1 << 24)
@@ -142,6 +144,9 @@
 			__be32 qpn;
 		} __attribute__((packed)) qp;
 		struct {
+			__be32 srqn;
+		} __attribute__((packed)) srq;
+		struct {
 			__be32 cqn;
 			u32    reserved1;
 			u8     reserved2[3];
@@ -305,6 +310,16 @@
 				       IB_EVENT_SQ_DRAINED);
 			break;
 
+		case MTHCA_EVENT_TYPE_SRQ_QP_LAST_WQE:
+			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
+				       IB_EVENT_QP_LAST_WQE_REACHED);
+			break;
+
+		case MTHCA_EVENT_TYPE_SRQ_LIMIT:
+			mthca_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) & 0xffffff,
+					IB_EVENT_SRQ_LIMIT_REACHED);
+			break;
+
 		case MTHCA_EVENT_TYPE_WQ_CATAS_ERROR:
 			mthca_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) & 0xffffff,
 				       IB_EVENT_QP_FATAL);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 9804174..8561b29 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -46,11 +46,6 @@
 	MTHCA_VENDOR_CLASS2 = 0xa
 };
 
-struct mthca_trap_mad {
-	struct ib_mad *mad;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
 static void update_sm_ah(struct mthca_dev *dev,
 			 u8 port_num, u16 lid, u8 sl)
 {
@@ -116,49 +111,14 @@
 			 struct ib_mad *mad)
 {
 	int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
-	struct mthca_trap_mad *tmad;
-	struct ib_sge      gather_list;
-	struct ib_send_wr *bad_wr, wr = {
-		.opcode      = IB_WR_SEND,
-		.sg_list     = &gather_list,
-		.num_sge     = 1,
-		.send_flags  = IB_SEND_SIGNALED,
-		.wr	     = {
-			 .ud = {
-				 .remote_qpn  = qpn,
-				 .remote_qkey = qpn ? IB_QP1_QKEY : 0,
-				 .timeout_ms  = 0
-			 }
-		 }
-	};
+	struct ib_mad_send_buf *send_buf;
 	struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
 	int ret;
 	unsigned long flags;
 
 	if (agent) {
-		tmad = kmalloc(sizeof *tmad, GFP_KERNEL);
-		if (!tmad)
-			return;
-
-		tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL);
-		if (!tmad->mad) {
-			kfree(tmad);
-			return;
-		}
-
-		memcpy(tmad->mad, mad, sizeof *mad);
-
-		wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr;
-		wr.wr_id         = (unsigned long) tmad;
-
-		gather_list.addr   = dma_map_single(agent->device->dma_device,
-						    tmad->mad,
-						    sizeof *tmad->mad,
-						    DMA_TO_DEVICE);
-		gather_list.length = sizeof *tmad->mad;
-		gather_list.lkey   = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey;
-		pci_unmap_addr_set(tmad, mapping, gather_list.addr);
-
+		send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
+					      IB_MGMT_MAD_DATA, GFP_ATOMIC);
 		/*
 		 * We rely here on the fact that MLX QPs don't use the
 		 * address handle after the send is posted (this is
@@ -166,21 +126,15 @@
 		 * it's OK for our devices).
 		 */
 		spin_lock_irqsave(&dev->sm_lock, flags);
-		wr.wr.ud.ah      = dev->sm_ah[port_num - 1];
-		if (wr.wr.ud.ah)
-			ret = ib_post_send_mad(agent, &wr, &bad_wr);
+		memcpy(send_buf->mad, mad, sizeof *mad);
+		if ((send_buf->ah = dev->sm_ah[port_num - 1]))
+			ret = ib_post_send_mad(send_buf, NULL);
 		else
 			ret = -EINVAL;
 		spin_unlock_irqrestore(&dev->sm_lock, flags);
 
-		if (ret) {
-			dma_unmap_single(agent->device->dma_device,
-					 pci_unmap_addr(tmad, mapping),
-					 sizeof *tmad->mad,
-					 DMA_TO_DEVICE);
-			kfree(tmad->mad);
-			kfree(tmad);
-		}
+		if (ret)
+			ib_free_send_mad(send_buf);
 	}
 }
 
@@ -267,15 +221,7 @@
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *mad_send_wc)
 {
-	struct mthca_trap_mad *tmad =
-		(void *) (unsigned long) mad_send_wc->wr_id;
-
-	dma_unmap_single(agent->device->dma_device,
-			 pci_unmap_addr(tmad, mapping),
-			 sizeof *tmad->mad,
-			 DMA_TO_DEVICE);
-	kfree(tmad->mad);
-	kfree(tmad);
+	ib_free_send_mad(mad_send_wc->send_buf);
 }
 
 int mthca_create_agents(struct mthca_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 23a3f56..883d1e5 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -162,9 +162,18 @@
 	mdev->limits.pkey_table_len 	= dev_lim->max_pkeys;
 	mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
 	mdev->limits.max_sg             = dev_lim->max_sg;
+	mdev->limits.max_wqes           = dev_lim->max_qp_sz;
+	mdev->limits.max_qp_init_rdma   = dev_lim->max_requester_per_qp;
 	mdev->limits.reserved_qps       = dev_lim->reserved_qps;
+	mdev->limits.max_srq_wqes       = dev_lim->max_srq_sz;
 	mdev->limits.reserved_srqs      = dev_lim->reserved_srqs;
 	mdev->limits.reserved_eecs      = dev_lim->reserved_eecs;
+	/*
+	 * Subtract 1 from the limit because we need to allocate a
+	 * spare CQE so the HCA HW can tell the difference between an
+	 * empty CQ and a full CQ.
+	 */
+	mdev->limits.max_cqes           = dev_lim->max_cq_sz - 1;
 	mdev->limits.reserved_cqs       = dev_lim->reserved_cqs;
 	mdev->limits.reserved_eqs       = dev_lim->reserved_eqs;
 	mdev->limits.reserved_mtts      = dev_lim->reserved_mtts;
@@ -172,6 +181,7 @@
 	mdev->limits.reserved_uars      = dev_lim->reserved_uars;
 	mdev->limits.reserved_pds       = dev_lim->reserved_pds;
 	mdev->limits.port_width_cap     = dev_lim->max_port_width;
+	mdev->limits.flags              = dev_lim->flags;
 
 	/* IB_DEVICE_RESIZE_MAX_WR not supported by driver.
 	   May be doable since hardware supports it for SRQ.
@@ -1186,6 +1196,7 @@
 
 static struct pci_driver mthca_driver = {
 	.name		= DRV_NAME,
+	.owner		= THIS_MODULE,
 	.id_table	= mthca_pci_table,
 	.probe		= mthca_init_one,
 	.remove		= __devexit_p(mthca_remove_one)
diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c
index a270760..b47ea7d 100644
--- a/drivers/infiniband/hw/mthca/mthca_mcg.c
+++ b/drivers/infiniband/hw/mthca/mthca_mcg.c
@@ -37,10 +37,6 @@
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 
-enum {
-	MTHCA_QP_PER_MGM = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
-};
-
 struct mthca_mgm {
 	__be32 next_gid_index;
 	u32    reserved[3];
@@ -189,7 +185,12 @@
 	}
 
 	for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
-		if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
+		if (mgm->qp[i] == cpu_to_be32(ibqp->qp_num | (1 << 31))) {
+			mthca_dbg(dev, "QP %06x already a member of MGM\n", 
+				  ibqp->qp_num);
+			err = 0;
+			goto out;
+		} else if (!(mgm->qp[i] & cpu_to_be32(1 << 31))) {
 			mgm->qp[i] = cpu_to_be32(ibqp->qp_num | (1 << 31));
 			break;
 		}
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 9ad8b3b..d72fe95 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -487,7 +487,8 @@
 	}
 }
 
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db)
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+		   u32 qn, __be32 **db)
 {
 	int group;
 	int start, end, dir;
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 29433f2..4fdca26 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -173,7 +173,8 @@
 
 int mthca_init_db_tab(struct mthca_dev *dev);
 void mthca_cleanup_db_tab(struct mthca_dev *dev);
-int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db);
+int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
+		   u32 qn, __be32 **db);
 void mthca_free_db(struct mthca_dev *dev, int type, int db_index);
 
 #endif /* MTHCA_MEMFREE_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 3f5319a..1b9477e 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
  */
 
 #include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
 #include <linux/mm.h>
 
 #include "mthca_dev.h"
@@ -90,15 +91,26 @@
 
 	props->max_mr_size         = ~0ull;
 	props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
-	props->max_qp_wr           = 0xffff;
+	props->max_qp_wr           = mdev->limits.max_wqes;
 	props->max_sge             = mdev->limits.max_sg;
 	props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
-	props->max_cqe             = 0xffff;
+	props->max_cqe             = mdev->limits.max_cqes;
 	props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
 	props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
 	props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
-	props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift;
+	props->max_qp_init_rd_atom = mdev->limits.max_qp_init_rdma;
+	props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
+	props->max_srq             = mdev->limits.num_srqs - mdev->limits.reserved_srqs;
+	props->max_srq_wr          = mdev->limits.max_srq_wqes;
+	props->max_srq_sge         = mdev->limits.max_sg;
 	props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
+	props->atomic_cap          = mdev->limits.flags & DEV_LIM_FLAG_ATOMIC ? 
+					IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+	props->max_pkeys           = mdev->limits.pkey_table_len;
+	props->max_mcast_grp       = mdev->limits.num_mgms + mdev->limits.num_amgms;
+	props->max_mcast_qp_attach = MTHCA_QP_PER_MGM;
+	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 
+					   props->max_mcast_grp;
 
 	err = 0;
  out:
@@ -150,9 +162,13 @@
 	props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
 	props->max_msg_sz        = 0x80000000;
 	props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
+	props->bad_pkey_cntr     = be16_to_cpup((__be16 *) (out_mad->data + 46));
 	props->qkey_viol_cntr    = be16_to_cpup((__be16 *) (out_mad->data + 48));
 	props->active_width      = out_mad->data[31] & 0xf;
 	props->active_speed      = out_mad->data[35] >> 4;
+	props->max_mtu           = out_mad->data[41] & 0xf;
+	props->active_mtu        = out_mad->data[36] >> 4;
+	props->subnet_timeout    = out_mad->data[51] & 0x1f;
 
  out:
 	kfree(in_mad);
@@ -634,6 +650,9 @@
 	int nent;
 	int err;
 
+	if (entries < 1 || entries > to_mdev(ibdev)->limits.max_cqes)
+		return ERR_PTR(-EINVAL);
+
 	if (context) {
 		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
 			return ERR_PTR(-EFAULT);
@@ -1058,6 +1077,26 @@
 	strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
 	dev->ib_dev.owner                = THIS_MODULE;
 
+	dev->ib_dev.uverbs_abi_ver	 = MTHCA_UVERBS_ABI_VERSION;
+	dev->ib_dev.uverbs_cmd_mask	 =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
+		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 	dev->ib_dev.node_type            = IB_NODE_CA;
 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
 	dev->ib_dev.dma_device           = &dev->pdev->dev;
@@ -1077,6 +1116,7 @@
 
 	if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
 		dev->ib_dev.create_srq           = mthca_create_srq;
+		dev->ib_dev.modify_srq		 = mthca_modify_srq;
 		dev->ib_dev.destroy_srq          = mthca_destroy_srq;
 
 		if (mthca_is_memfree(dev))
@@ -1135,10 +1175,13 @@
 		}
 	}
 
+	mthca_start_catas_poll(dev);
+
 	return 0;
 }
 
 void mthca_unregister_device(struct mthca_dev *dev)
 {
+	mthca_stop_catas_poll(dev);
 	ib_unregister_device(&dev->ib_dev);
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 5fa0066..62ff091 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -338,8 +338,7 @@
 				[UC]  = (IB_QP_AV                  |
 					 IB_QP_PATH_MTU            |
 					 IB_QP_DEST_QPN            |
-					 IB_QP_RQ_PSN              |
-					 IB_QP_MAX_DEST_RD_ATOMIC),
+					 IB_QP_RQ_PSN),
 				[RC]  = (IB_QP_AV                  |
 					 IB_QP_PATH_MTU            |
 					 IB_QP_DEST_QPN            |
@@ -368,8 +367,7 @@
 			.trans = MTHCA_TRANS_RTR2RTS,
 			.req_param = {
 				[UD]  = IB_QP_SQ_PSN,
-				[UC]  = (IB_QP_SQ_PSN            |
-					 IB_QP_MAX_QP_RD_ATOMIC),
+				[UC]  = IB_QP_SQ_PSN,
 				[RC]  = (IB_QP_TIMEOUT           |
 					 IB_QP_RETRY_CNT         |
 					 IB_QP_RNR_RETRY         |
@@ -446,8 +444,6 @@
 				[UD]  = (IB_QP_PKEY_INDEX            |
 					 IB_QP_QKEY),
 				[UC]  = (IB_QP_AV                    |
-					 IB_QP_MAX_QP_RD_ATOMIC      |
-					 IB_QP_MAX_DEST_RD_ATOMIC    |
 					 IB_QP_CUR_STATE             |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ACCESS_FLAGS          |
@@ -478,7 +474,7 @@
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
-				[UC]  = (IB_QP_CUR_STATE),
+				[UC]  = IB_QP_CUR_STATE,
 				[RC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_MIN_RNR_TIMER),
 				[MLX] = (IB_QP_CUR_STATE             |
@@ -1112,8 +1108,10 @@
 			     struct mthca_qp *qp)
 {
 	/* Sanity check QP size before proceeding */
-	if (cap->max_send_wr  > 65536 || cap->max_recv_wr  > 65536 ||
-	    cap->max_send_sge > 64    || cap->max_recv_sge > 64)
+	if (cap->max_send_wr  > dev->limits.max_wqes ||
+	    cap->max_recv_wr  > dev->limits.max_wqes ||
+	    cap->max_send_sge > dev->limits.max_sg   ||
+	    cap->max_recv_sge > dev->limits.max_sg)
 		return -EINVAL;
 
 	if (mthca_is_memfree(dev)) {
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 18998d4..64f70aa 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -186,7 +186,8 @@
 	int err;
 
 	/* Sanity check SRQ size before proceeding */
-	if (attr->max_wr > 16 << 20 || attr->max_sge > 64)
+	if (attr->max_wr  > dev->limits.max_srq_wqes ||
+	    attr->max_sge > dev->limits.max_sg)
 		return -EINVAL;
 
 	srq->max      = attr->max_wr;
@@ -332,6 +333,29 @@
 	mthca_free_mailbox(dev, mailbox);
 }
 
+int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		     enum ib_srq_attr_mask attr_mask)
+{	
+	struct mthca_dev *dev = to_mdev(ibsrq->device);
+	struct mthca_srq *srq = to_msrq(ibsrq);
+	int ret;
+	u8 status;
+
+	/* We don't support resizing SRQs (yet?) */
+	if (attr_mask & IB_SRQ_MAX_WR)
+		return -EINVAL;
+
+	if (attr_mask & IB_SRQ_LIMIT) {
+		ret = mthca_ARM_SRQ(dev, srq->srqn, attr->srq_limit, &status);
+		if (ret)
+			return ret;
+		if (status)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type)
 {
@@ -354,7 +378,7 @@
 
 	event.device      = &dev->ib_dev;
 	event.event       = event_type;
-	event.element.srq  = &srq->ibsrq;
+	event.element.srq = &srq->ibsrq;
 	srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context);
 
 out:
@@ -415,6 +439,14 @@
 
 		wqe       = get_wqe(srq, ind);
 		next_ind  = *wqe_to_link(wqe);
+
+		if (next_ind < 0) {
+			mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+			err = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+
 		prev_wqe  = srq->last;
 		srq->last = wqe;
 
@@ -506,6 +538,13 @@
 		wqe       = get_wqe(srq, ind);
 		next_ind  = *wqe_to_link(wqe);
 
+		if (next_ind < 0) {
+			mthca_err(dev, "SRQ %06x full\n", srq->srqn);
+			err = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+
 		((struct mthca_next_seg *) wqe)->nda_op =
 			cpu_to_be32((next_ind << srq->wqe_shift) | 1);
 		((struct mthca_next_seg *) wqe)->ee_nds = 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h
index 41613ec..bb015c6 100644
--- a/drivers/infiniband/hw/mthca/mthca_user.h
+++ b/drivers/infiniband/hw/mthca/mthca_user.h
@@ -38,6 +38,12 @@
 #include <linux/types.h>
 
 /*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define MTHCA_UVERBS_ABI_VERSION	1
+
+/*
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 4ea1c1c..c994a91 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -100,7 +100,12 @@
 
 struct ipoib_mcast;
 
-struct ipoib_buf {
+struct ipoib_rx_buf {
+	struct sk_buff *skb;
+	dma_addr_t	mapping;
+};
+
+struct ipoib_tx_buf {
 	struct sk_buff *skb;
 	DECLARE_PCI_UNMAP_ADDR(mapping)
 };
@@ -150,14 +155,14 @@
 	unsigned int admin_mtu;
 	unsigned int mcast_mtu;
 
-	struct ipoib_buf *rx_ring;
+	struct ipoib_rx_buf *rx_ring;
 
-	spinlock_t        tx_lock;
-	struct ipoib_buf *tx_ring;
-	unsigned          tx_head;
-	unsigned          tx_tail;
-	struct ib_sge     tx_sge;
-	struct ib_send_wr tx_wr;
+	spinlock_t           tx_lock;
+	struct ipoib_tx_buf *tx_ring;
+	unsigned             tx_head;
+	unsigned             tx_tail;
+	struct ib_sge        tx_sge;
+	struct ib_send_wr    tx_wr;
 
 	struct ib_wc ibwc[IPOIB_NUM_WC];
 
@@ -277,7 +282,7 @@
 int ipoib_mcast_detach(struct net_device *dev, u16 mlid,
 		       union ib_gid *mgid);
 
-int ipoib_qp_create(struct net_device *dev);
+int ipoib_init_qp(struct net_device *dev);
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
 void ipoib_transport_dev_cleanup(struct net_device *dev);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f744009..192fef8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -95,65 +95,77 @@
 	}
 }
 
-static inline int ipoib_ib_receive(struct ipoib_dev_priv *priv,
-				   unsigned int wr_id,
-				   dma_addr_t addr)
-{
-	struct ib_sge list = {
-		.addr    = addr,
-		.length  = IPOIB_BUF_SIZE,
-		.lkey    = priv->mr->lkey,
-	};
-	struct ib_recv_wr param = {
-		.wr_id 	    = wr_id | IPOIB_OP_RECV,
-		.sg_list    = &list,
-		.num_sge    = 1,
-	};
-	struct ib_recv_wr *bad_wr;
-
-	return ib_post_recv(priv->qp, &param, &bad_wr);
-}
-
 static int ipoib_ib_post_receive(struct net_device *dev, int id)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct sk_buff *skb;
-	dma_addr_t addr;
+	struct ib_sge list;
+	struct ib_recv_wr param;
+	struct ib_recv_wr *bad_wr;
 	int ret;
 
-	skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
-	if (!skb) {
-		ipoib_warn(priv, "failed to allocate receive buffer\n");
+	list.addr     = priv->rx_ring[id].mapping;
+	list.length   = IPOIB_BUF_SIZE;
+	list.lkey     = priv->mr->lkey;
 
-		priv->rx_ring[id].skb = NULL;
-		return -ENOMEM;
-	}
-	skb_reserve(skb, 4);	/* 16 byte align IP header */
-	priv->rx_ring[id].skb = skb;
-	addr = dma_map_single(priv->ca->dma_device,
-			      skb->data, IPOIB_BUF_SIZE,
-			      DMA_FROM_DEVICE);
-	pci_unmap_addr_set(&priv->rx_ring[id], mapping, addr);
+	param.next    = NULL;
+	param.wr_id   = id | IPOIB_OP_RECV;
+	param.sg_list = &list;
+	param.num_sge = 1;
 
-	ret = ipoib_ib_receive(priv, id, addr);
-	if (ret) {
-		ipoib_warn(priv, "ipoib_ib_receive failed for buf %d (%d)\n",
-			   id, ret);
-		dma_unmap_single(priv->ca->dma_device, addr,
+	ret = ib_post_recv(priv->qp, &param, &bad_wr);
+	if (unlikely(ret)) {
+		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
+		dma_unmap_single(priv->ca->dma_device,
+				 priv->rx_ring[id].mapping,
 				 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
-		dev_kfree_skb_any(skb);
+		dev_kfree_skb_any(priv->rx_ring[id].skb);
 		priv->rx_ring[id].skb = NULL;
 	}
 
 	return ret;
 }
 
+static int ipoib_alloc_rx_skb(struct net_device *dev, int id)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct sk_buff *skb;
+	dma_addr_t addr;
+
+	skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
+	if (!skb)
+		return -ENOMEM;
+
+	/*
+	 * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
+	 * header.  So we need 4 more bytes to get to 48 and align the
+	 * IP header to a multiple of 16.
+	 */
+	skb_reserve(skb, 4);
+
+	addr = dma_map_single(priv->ca->dma_device,
+			      skb->data, IPOIB_BUF_SIZE,
+			      DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(addr))) {
+		dev_kfree_skb_any(skb);
+		return -EIO;
+	}
+
+	priv->rx_ring[id].skb     = skb;
+	priv->rx_ring[id].mapping = addr;
+
+	return 0;
+}
+
 static int ipoib_ib_post_receives(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int i;
 
 	for (i = 0; i < IPOIB_RX_RING_SIZE; ++i) {
+		if (ipoib_alloc_rx_skb(dev, i)) {
+			ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
+			return -ENOMEM;
+		}
 		if (ipoib_ib_post_receive(dev, i)) {
 			ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
 			return -EIO;
@@ -176,28 +188,36 @@
 		wr_id &= ~IPOIB_OP_RECV;
 
 		if (wr_id < IPOIB_RX_RING_SIZE) {
-			struct sk_buff *skb = priv->rx_ring[wr_id].skb;
+			struct sk_buff *skb  = priv->rx_ring[wr_id].skb;
+			dma_addr_t      addr = priv->rx_ring[wr_id].mapping;
 
-			priv->rx_ring[wr_id].skb = NULL;
-
-			dma_unmap_single(priv->ca->dma_device,
-					 pci_unmap_addr(&priv->rx_ring[wr_id],
-							mapping),
-					 IPOIB_BUF_SIZE,
-					 DMA_FROM_DEVICE);
-
-			if (wc->status != IB_WC_SUCCESS) {
+			if (unlikely(wc->status != IB_WC_SUCCESS)) {
 				if (wc->status != IB_WC_WR_FLUSH_ERR)
 					ipoib_warn(priv, "failed recv event "
 						   "(status=%d, wrid=%d vend_err %x)\n",
 						   wc->status, wr_id, wc->vendor_err);
+				dma_unmap_single(priv->ca->dma_device, addr,
+						 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
 				dev_kfree_skb_any(skb);
+				priv->rx_ring[wr_id].skb = NULL;
 				return;
 			}
 
+			/*
+			 * If we can't allocate a new RX buffer, dump
+			 * this packet and reuse the old buffer.
+			 */
+			if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+				++priv->stats.rx_dropped;
+				goto repost;
+			}
+
 			ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
 				       wc->byte_len, wc->slid);
 
+			dma_unmap_single(priv->ca->dma_device, addr,
+					 IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+
 			skb_put(skb, wc->byte_len);
 			skb_pull(skb, IB_GRH_BYTES);
 
@@ -220,8 +240,8 @@
 				dev_kfree_skb_any(skb);
 			}
 
-			/* repost receive */
-			if (ipoib_ib_post_receive(dev, wr_id))
+		repost:
+			if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
 				ipoib_warn(priv, "ipoib_ib_post_receive failed "
 					   "for buf %d\n", wr_id);
 		} else
@@ -229,7 +249,7 @@
 				   wr_id);
 
 	} else {
-		struct ipoib_buf *tx_req;
+		struct ipoib_tx_buf *tx_req;
 		unsigned long flags;
 
 		if (wr_id >= IPOIB_TX_RING_SIZE) {
@@ -302,7 +322,7 @@
 		struct ipoib_ah *address, u32 qpn)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	struct ipoib_buf *tx_req;
+	struct ipoib_tx_buf *tx_req;
 	dma_addr_t addr;
 
 	if (skb->len > dev->mtu + INFINIBAND_ALEN) {
@@ -387,9 +407,9 @@
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
 
-	ret = ipoib_qp_create(dev);
+	ret = ipoib_init_qp(dev);
 	if (ret) {
-		ipoib_warn(priv, "ipoib_qp_create returned %d\n", ret);
+		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
 		return -1;
 	}
 
@@ -468,7 +488,7 @@
 	struct ib_qp_attr qp_attr;
 	int attr_mask;
 	unsigned long begin;
-	struct ipoib_buf *tx_req;
+	struct ipoib_tx_buf *tx_req;
 	int i;
 
 	/* Kill the existing QP and allocate a new one */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6c5bf07..cd4f423 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -637,8 +637,11 @@
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	ipoib_warn(priv, "transmit timeout: latency %ld\n",
-		   jiffies - dev->trans_start);
+	ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
+		   jiffies_to_msecs(jiffies - dev->trans_start));
+	ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
+		   netif_queue_stopped(dev),
+		   priv->tx_head, priv->tx_tail);
 	/* XXX reset QP, etc. */
 }
 
@@ -729,7 +732,7 @@
 
 	/* Allocate RX/TX "rings" to hold queued skbs */
 
-	priv->rx_ring =	kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf),
+	priv->rx_ring =	kmalloc(IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf),
 				GFP_KERNEL);
 	if (!priv->rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
@@ -737,9 +740,9 @@
 		goto out;
 	}
 	memset(priv->rx_ring, 0,
-	       IPOIB_RX_RING_SIZE * sizeof (struct ipoib_buf));
+	       IPOIB_RX_RING_SIZE * sizeof (struct ipoib_rx_buf));
 
-	priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf),
+	priv->tx_ring = kmalloc(IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf),
 				GFP_KERNEL);
 	if (!priv->tx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
@@ -747,7 +750,7 @@
 		goto out_rx_ring_cleanup;
 	}
 	memset(priv->tx_ring, 0,
-	       IPOIB_TX_RING_SIZE * sizeof (struct ipoib_buf));
+	       IPOIB_TX_RING_SIZE * sizeof (struct ipoib_tx_buf));
 
 	/* priv->tx_head & tx_tail are already 0 */
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 79f59d0..b5902a7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -92,7 +92,7 @@
 	return ret;
 }
 
-int ipoib_qp_create(struct net_device *dev)
+int ipoib_init_qp(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
@@ -149,10 +149,11 @@
 	return 0;
 
 out_fail:
-	ib_destroy_qp(priv->qp);
-	priv->qp = NULL;
+	qp_attr.qp_state = IB_QPS_RESET;
+	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
 
-	return -EINVAL;
+	return ret;
 }
 
 int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 5308683..0a9fcd5 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -109,7 +109,6 @@
 
 struct ib_cm_req_event_param {
 	struct ib_cm_id		*listen_id;
-	struct ib_device	*device;
 	u8			port;
 
 	struct ib_sa_path_rec	*primary_path;
@@ -220,7 +219,6 @@
 
 struct ib_cm_sidr_req_event_param {
 	struct ib_cm_id		*listen_id;
-	struct ib_device	*device;
 	u8			port;
 	u16			pkey;
 };
@@ -284,6 +282,7 @@
 struct ib_cm_id {
 	ib_cm_handler		cm_handler;
 	void			*context;
+	struct ib_device	*device;
 	__be64			service_id;
 	__be64			service_mask;
 	enum ib_cm_state	state;		/* internal CM/debug use */
@@ -295,6 +294,8 @@
 
 /**
  * ib_create_cm_id - Allocate a communication identifier.
+ * @device: Device associated with the cm_id.  All related communication will
+ * be associated with the specified device.
  * @cm_handler: Callback invoked to notify the user of CM events.
  * @context: User specified context associated with the communication
  *   identifier.
@@ -302,7 +303,8 @@
  * Communication identifiers are used to track connection states, service
  * ID resolution requests, and listen requests.
  */
-struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler,
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+				 ib_cm_handler cm_handler,
 				 void *context);
 
 /**
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 4172e68..2c13350 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -109,10 +109,14 @@
 #define IB_QP_SET_QKEY	0x80000000
 
 enum {
+	IB_MGMT_MAD_HDR = 24,
 	IB_MGMT_MAD_DATA = 232,
+	IB_MGMT_RMPP_HDR = 36,
 	IB_MGMT_RMPP_DATA = 220,
+	IB_MGMT_VENDOR_HDR = 40,
 	IB_MGMT_VENDOR_DATA = 216,
-	IB_MGMT_SA_DATA = 200
+	IB_MGMT_SA_HDR = 56,
+	IB_MGMT_SA_DATA = 200,
 };
 
 struct ib_mad_hdr {
@@ -203,26 +207,25 @@
 
 /**
  * ib_mad_send_buf - MAD data buffer and work request for sends.
- * @mad: References an allocated MAD data buffer.  The size of the data
- *   buffer is specified in the @send_wr.length field.
- * @mapping: DMA mapping information.
+ * @next: A pointer used to chain together MADs for posting.
+ * @mad: References an allocated MAD data buffer.
  * @mad_agent: MAD agent that allocated the buffer.
+ * @ah: The address handle to use when sending the MAD.
  * @context: User-controlled context fields.
- * @send_wr: An initialized work request structure used when sending the MAD.
- *   The wr_id field of the work request is initialized to reference this
- *   data structure.
- * @sge: A scatter-gather list referenced by the work request.
+ * @timeout_ms: Time to wait for a response.
+ * @retries: Number of times to retry a request for a response.
  *
  * Users are responsible for initializing the MAD buffer itself, with the
  * exception of specifying the payload length field in any RMPP MAD.
  */
 struct ib_mad_send_buf {
-	struct ib_mad		*mad;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
+	struct ib_mad_send_buf	*next;
+	void			*mad;
 	struct ib_mad_agent	*mad_agent;
+	struct ib_ah		*ah;
 	void			*context[2];
-	struct ib_send_wr	send_wr;
-	struct ib_sge		sge;
+	int			timeout_ms;
+	int			retries;
 };
 
 /**
@@ -287,7 +290,7 @@
  * or @mad_send_wc.
  */
 typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
-				     struct ib_send_wr *send_wr,
+				     struct ib_mad_send_buf *send_buf,
 				     struct ib_mad_send_wc *mad_send_wc);
 
 /**
@@ -334,13 +337,13 @@
 
 /**
  * ib_mad_send_wc - MAD send completion information.
- * @wr_id: Work request identifier associated with the send MAD request.
+ * @send_buf: Send MAD data buffer associated with the send MAD request.
  * @status: Completion status.
  * @vendor_err: Optional vendor error information returned with a failed
  *   request.
  */
 struct ib_mad_send_wc {
-	u64			wr_id;
+	struct ib_mad_send_buf	*send_buf;
 	enum ib_wc_status	status;
 	u32			vendor_err;
 };
@@ -366,7 +369,7 @@
  * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
  * @mad_len: The length of the received MAD, without duplicated headers.
  *
- * For received response, the wr_id field of the wc is set to the wr_id
+ * For received response, the wr_id contains a pointer to the ib_mad_send_buf
  *   for the corresponding send request.
  */
 struct ib_mad_recv_wc {
@@ -463,9 +466,9 @@
 /**
  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
  *   with the registered client.
- * @mad_agent: Specifies the associated registration to post the send to.
- * @send_wr: Specifies the information needed to send the MAD(s).
- * @bad_send_wr: Specifies the MAD on which an error was encountered.
+ * @send_buf: Specifies the information needed to send the MAD(s).
+ * @bad_send_buf: Specifies the MAD on which an error was encountered.  This
+ *   parameter is optional if only a single MAD is posted.
  *
  * Sent MADs are not guaranteed to complete in the order that they were posted.
  *
@@ -479,9 +482,8 @@
  * defined data being transferred.  The paylen_newwin field should be
  * specified in network-byte order.
  */
-int ib_post_send_mad(struct ib_mad_agent *mad_agent,
-		     struct ib_send_wr *send_wr,
-		     struct ib_send_wr **bad_send_wr);
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+		     struct ib_mad_send_buf **bad_send_buf);
 
 /**
  * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer.
@@ -507,23 +509,25 @@
 /**
  * ib_cancel_mad - Cancels an outstanding send MAD operation.
  * @mad_agent: Specifies the registration associated with sent MAD.
- * @wr_id: Indicates the work request identifier of the MAD to cancel.
+ * @send_buf: Indicates the MAD to cancel.
  *
  * MADs will be returned to the user through the corresponding
  * ib_mad_send_handler.
  */
-void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id);
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+		   struct ib_mad_send_buf *send_buf);
 
 /**
  * ib_modify_mad - Modifies an outstanding send MAD operation.
  * @mad_agent: Specifies the registration associated with sent MAD.
- * @wr_id: Indicates the work request identifier of the MAD to modify.
+ * @send_buf: Indicates the MAD to modify.
  * @timeout_ms: New timeout value for sent MAD.
  *
  * This call will reset the timeout value for a sent MAD to the specified
  * value.
  */
-int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms);
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+		  struct ib_mad_send_buf *send_buf, u32 timeout_ms);
 
 /**
  * ib_redirect_mad_qp - Registers a QP for MAD services.
@@ -572,7 +576,6 @@
  * @remote_qpn: Specifies the QPN of the receiving node.
  * @pkey_index: Specifies which PKey the MAD will be sent using.  This field
  *   is valid only if the remote_qpn is QP 1.
- * @ah: References the address handle used to transfer to the remote node.
  * @rmpp_active: Indicates if the send will enable RMPP.
  * @hdr_len: Indicates the size of the data header of the MAD.  This length
  *   should include the common MAD header, RMPP header, plus any class
@@ -582,11 +585,10 @@
  *   additional padding that may be necessary.
  * @gfp_mask: GFP mask used for the memory allocation.
  *
- * This is a helper routine that may be used to allocate a MAD.  Users are
- * not required to allocate outbound MADs using this call.  The returned
- * MAD send buffer will reference a data buffer usable for sending a MAD, along
+ * This routine allocates a MAD for sending.  The returned MAD send buffer
+ * will reference a data buffer usable for sending a MAD, along
  * with an initialized work request structure.  Users may modify the returned
- * MAD data buffer or work request before posting the send.
+ * MAD data buffer before posting the send.
  *
  * The returned data buffer will be cleared.  Users are responsible for
  * initializing the common MAD and any class specific headers.  If @rmpp_active
@@ -594,7 +596,7 @@
  */
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
-					    struct ib_ah *ah, int rmpp_active,
+					    int rmpp_active,
 					    int hdr_len, int data_len,
 					    gfp_t gfp_mask);
 
diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h
index e4d1654..3037588 100644
--- a/include/rdma/ib_user_cm.h
+++ b/include/rdma/ib_user_cm.h
@@ -38,7 +38,7 @@
 
 #include <linux/types.h>
 
-#define IB_USER_CM_ABI_VERSION 2
+#define IB_USER_CM_ABI_VERSION 3
 
 enum {
 	IB_USER_CM_CMD_CREATE_ID,
@@ -299,8 +299,6 @@
 };
 
 struct ib_ucm_req_event_resp {
-	/* device */
-	/* port */
 	struct ib_ucm_path_rec primary_path;
 	struct ib_ucm_path_rec alternate_path;
 	__be64                 remote_ca_guid;
@@ -316,6 +314,7 @@
 	__u8  retry_count;
 	__u8  rnr_retry_count;
 	__u8  srq;
+	__u8  port;
 };
 
 struct ib_ucm_rep_event_resp {
@@ -353,10 +352,9 @@
 };
 
 struct ib_ucm_sidr_req_event_resp {
-	/* device */
-	/* port */
 	__u16 pkey;
-	__u8  reserved[2];
+	__u8  port;
+	__u8  reserved;
 };
 
 struct ib_ucm_sidr_rep_event_resp {
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index fd85725..072f3a2 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
+ * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -42,15 +43,12 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define IB_USER_VERBS_ABI_VERSION	2
+#define IB_USER_VERBS_ABI_VERSION	3
 
 enum {
-	IB_USER_VERBS_CMD_QUERY_PARAMS,
 	IB_USER_VERBS_CMD_GET_CONTEXT,
 	IB_USER_VERBS_CMD_QUERY_DEVICE,
 	IB_USER_VERBS_CMD_QUERY_PORT,
-	IB_USER_VERBS_CMD_QUERY_GID,
-	IB_USER_VERBS_CMD_QUERY_PKEY,
 	IB_USER_VERBS_CMD_ALLOC_PD,
 	IB_USER_VERBS_CMD_DEALLOC_PD,
 	IB_USER_VERBS_CMD_CREATE_AH,
@@ -65,6 +63,7 @@
 	IB_USER_VERBS_CMD_ALLOC_MW,
 	IB_USER_VERBS_CMD_BIND_MW,
 	IB_USER_VERBS_CMD_DEALLOC_MW,
+	IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
 	IB_USER_VERBS_CMD_CREATE_CQ,
 	IB_USER_VERBS_CMD_RESIZE_CQ,
 	IB_USER_VERBS_CMD_DESTROY_CQ,
@@ -90,8 +89,11 @@
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
  * avoid incompatibility between 32-bit userspace and 64-bit kernels).
- * In particular do not use pointer types -- pass pointers in __u64
- * instead.
+ * Specifically:
+ *  - Do not use pointer types -- pass pointers in __u64 instead.
+ *  - Make sure that any structure larger than 4 bytes is padded to a
+ *    multiple of 8 bytes.  Otherwise the structure size will be
+ *    different between 32-bit and 64-bit architectures.
  */
 
 struct ib_uverbs_async_event_desc {
@@ -118,27 +120,14 @@
 	__u16 out_words;
 };
 
-/*
- * No driver_data for "query params" command, since this is intended
- * to be a core function with no possible device dependence.
- */
-struct ib_uverbs_query_params {
-	__u64 response;
-};
-
-struct ib_uverbs_query_params_resp {
-	__u32 num_cq_events;
-};
-
 struct ib_uverbs_get_context {
 	__u64 response;
-	__u64 cq_fd_tab;
 	__u64 driver_data[0];
 };
 
 struct ib_uverbs_get_context_resp {
 	__u32 async_fd;
-	__u32 reserved;
+	__u32 num_comp_vectors;
 };
 
 struct ib_uverbs_query_device {
@@ -220,31 +209,6 @@
 	__u8  reserved[3];
 };
 
-struct ib_uverbs_query_gid {
-	__u64 response;
-	__u8  port_num;
-	__u8  index;
-	__u8  reserved[6];
-	__u64 driver_data[0];
-};
-
-struct ib_uverbs_query_gid_resp {
-	__u8  gid[16];
-};
-
-struct ib_uverbs_query_pkey {
-	__u64 response;
-	__u8  port_num;
-	__u8  index;
-	__u8  reserved[6];
-	__u64 driver_data[0];
-};
-
-struct ib_uverbs_query_pkey_resp {
-	__u16 pkey;
-	__u16 reserved;
-};
-
 struct ib_uverbs_alloc_pd {
 	__u64 response;
 	__u64 driver_data[0];
@@ -278,11 +242,21 @@
 	__u32 mr_handle;
 };
 
+struct ib_uverbs_create_comp_channel {
+	__u64 response;
+};
+
+struct ib_uverbs_create_comp_channel_resp {
+	__u32 fd;
+};
+
 struct ib_uverbs_create_cq {
 	__u64 response;
 	__u64 user_handle;
 	__u32 cqe;
-	__u32 event_handler;
+	__u32 comp_vector;
+	__s32 comp_channel;
+	__u32 reserved;
 	__u64 driver_data[0];
 };
 
@@ -291,6 +265,41 @@
 	__u32 cqe;
 };
 
+struct ib_uverbs_poll_cq {
+	__u64 response;
+	__u32 cq_handle;
+	__u32 ne;
+};
+
+struct ib_uverbs_wc {
+	__u64 wr_id;
+	__u32 status;
+	__u32 opcode;
+	__u32 vendor_err;
+	__u32 byte_len;
+	__u32 imm_data;
+	__u32 qp_num;
+	__u32 src_qp;
+	__u32 wc_flags;
+	__u16 pkey_index;
+	__u16 slid;
+	__u8 sl;
+	__u8 dlid_path_bits;
+	__u8 port_num;
+	__u8 reserved;
+};
+
+struct ib_uverbs_poll_cq_resp {
+	__u32 count;
+	__u32 reserved;
+	struct ib_uverbs_wc wc[0];
+};
+
+struct ib_uverbs_req_notify_cq {
+	__u32 cq_handle;
+	__u32 solicited_only;
+};
+
 struct ib_uverbs_destroy_cq {
 	__u64 response;
 	__u32 cq_handle;
@@ -388,6 +397,127 @@
 	__u32 events_reported;
 };
 
+/*
+ * The ib_uverbs_sge structure isn't used anywhere, since we assume
+ * the ib_sge structure is packed the same way on 32-bit and 64-bit
+ * architectures in both kernel and user space.  It's just here to
+ * document the ABI.
+ */
+struct ib_uverbs_sge {
+	__u64 addr;
+	__u32 length;
+	__u32 lkey;
+};
+
+struct ib_uverbs_send_wr {
+	__u64 wr_id; 
+	__u32 num_sge;
+	__u32 opcode;
+	__u32 send_flags;
+	__u32 imm_data;
+	union {
+		struct {
+			__u64 remote_addr;
+			__u32 rkey;
+			__u32 reserved;
+		} rdma;
+		struct {
+			__u64 remote_addr;
+			__u64 compare_add;
+			__u64 swap;
+			__u32 rkey;
+			__u32 reserved;
+		} atomic;
+		struct {
+			__u32 ah;
+			__u32 remote_qpn;
+			__u32 remote_qkey;
+			__u32 reserved;
+		} ud;
+	} wr;
+};
+
+struct ib_uverbs_post_send {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_send_wr send_wr[0];
+};
+
+struct ib_uverbs_post_send_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_recv_wr {
+	__u64 wr_id;
+	__u32 num_sge;
+	__u32 reserved;
+};
+
+struct ib_uverbs_post_recv {
+	__u64 response;
+	__u32 qp_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_recv_wr recv_wr[0];
+};
+
+struct ib_uverbs_post_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_post_srq_recv {
+	__u64 response;
+	__u32 srq_handle;
+	__u32 wr_count;
+	__u32 sge_count;
+	__u32 wqe_size;
+	struct ib_uverbs_recv_wr recv[0];
+};
+
+struct ib_uverbs_post_srq_recv_resp {
+	__u32 bad_wr;
+};
+
+struct ib_uverbs_global_route {
+	__u8  dgid[16];
+	__u32 flow_label;    
+	__u8  sgid_index;
+	__u8  hop_limit;
+	__u8  traffic_class;
+	__u8  reserved;
+};
+
+struct ib_uverbs_ah_attr {
+	struct ib_uverbs_global_route grh;
+	__u16 dlid;
+	__u8  sl;
+	__u8  src_path_bits;
+	__u8  static_rate;
+	__u8  is_global;
+	__u8  port_num;
+	__u8  reserved;
+};
+
+struct ib_uverbs_create_ah {
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 reserved;
+	struct ib_uverbs_ah_attr attr;
+};
+
+struct ib_uverbs_create_ah_resp {
+	__u32 ah_handle;
+};
+
+struct ib_uverbs_destroy_ah {
+	__u32 ah_handle;
+};
+
 struct ib_uverbs_attach_mcast {
 	__u8  gid[16];
 	__u32 qp_handle;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e6f4c9e..f72d46d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -595,11 +595,8 @@
 		} atomic;
 		struct {
 			struct ib_ah *ah;
-			struct ib_mad_hdr *mad_hdr;
 			u32	remote_qpn;
 			u32	remote_qkey;
-			int	timeout_ms; /* valid for MADs only */
-			int	retries;    /* valid for MADs only */
 			u16	pkey_index; /* valid for GSI only */
 			u8	port_num;   /* valid for DR SMPs on switch only */
 		} ud;
@@ -951,6 +948,9 @@
 		IB_DEV_UNREGISTERED
 	}                            reg_state;
 
+	u64			     uverbs_cmd_mask;
+	int			     uverbs_abi_ver;
+
 	u8                           node_type;
 	u8                           phys_port_cnt;
 };