Merge branches 'amso1100', 'cma', 'cxgb3', 'cxgb4', 'fdr', 'ipath', 'ipoib', 'misc', 'mlx4', 'misc', 'nes', 'qib' and 'xrc' into for-next
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index fc0f2bd..4104ea24 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -889,6 +889,8 @@
 		break;
 	case IB_CM_ESTABLISHED:
 		spin_unlock_irq(&cm_id_priv->lock);
+		if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
+			break;
 		ib_send_cm_dreq(cm_id, NULL, 0);
 		goto retest;
 	case IB_CM_DREQ_SENT:
@@ -1008,7 +1010,6 @@
 	req_msg->service_id = param->service_id;
 	req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
 	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
-	cm_req_set_resp_res(req_msg, param->responder_resources);
 	cm_req_set_init_depth(req_msg, param->initiator_depth);
 	cm_req_set_remote_resp_timeout(req_msg,
 				       param->remote_cm_response_timeout);
@@ -1017,12 +1018,16 @@
 	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
 	cm_req_set_local_resp_timeout(req_msg,
 				      param->local_cm_response_timeout);
-	cm_req_set_retry_count(req_msg, param->retry_count);
 	req_msg->pkey = param->primary_path->pkey;
 	cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
-	cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
 	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
-	cm_req_set_srq(req_msg, param->srq);
+
+	if (param->qp_type != IB_QPT_XRC_INI) {
+		cm_req_set_resp_res(req_msg, param->responder_resources);
+		cm_req_set_retry_count(req_msg, param->retry_count);
+		cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
+		cm_req_set_srq(req_msg, param->srq);
+	}
 
 	if (pri_path->hop_limit <= 1) {
 		req_msg->primary_local_lid = pri_path->slid;
@@ -1080,7 +1085,8 @@
 	if (!param->primary_path)
 		return -EINVAL;
 
-	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
+	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
+	    param->qp_type != IB_QPT_XRC_INI)
 		return -EINVAL;
 
 	if (param->private_data &&
@@ -1601,18 +1607,24 @@
 	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
 	rep_msg->local_comm_id = cm_id_priv->id.local_id;
 	rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
-	cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
 	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
 	rep_msg->resp_resources = param->responder_resources;
-	rep_msg->initiator_depth = param->initiator_depth;
 	cm_rep_set_target_ack_delay(rep_msg,
 				    cm_id_priv->av.port->cm_dev->ack_delay);
 	cm_rep_set_failover(rep_msg, param->failover_accepted);
-	cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
 	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
-	cm_rep_set_srq(rep_msg, param->srq);
 	rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
 
+	if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
+		rep_msg->initiator_depth = param->initiator_depth;
+		cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
+		cm_rep_set_srq(rep_msg, param->srq);
+		cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
+	} else {
+		cm_rep_set_srq(rep_msg, 1);
+		cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
+	}
+
 	if (param->private_data && param->private_data_len)
 		memcpy(rep_msg->private_data, param->private_data,
 		       param->private_data_len);
@@ -1660,7 +1672,7 @@
 	cm_id_priv->initiator_depth = param->initiator_depth;
 	cm_id_priv->responder_resources = param->responder_resources;
 	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
-	cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
+	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
 
 out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return ret;
@@ -1731,7 +1743,7 @@
 }
 EXPORT_SYMBOL(ib_send_cm_rtu);
 
-static void cm_format_rep_event(struct cm_work *work)
+static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
 {
 	struct cm_rep_msg *rep_msg;
 	struct ib_cm_rep_event_param *param;
@@ -1740,7 +1752,7 @@
 	param = &work->cm_event.param.rep_rcvd;
 	param->remote_ca_guid = rep_msg->local_ca_guid;
 	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
-	param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
+	param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
 	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
 	param->responder_resources = rep_msg->initiator_depth;
 	param->initiator_depth = rep_msg->resp_resources;
@@ -1808,7 +1820,7 @@
 		return -EINVAL;
 	}
 
-	cm_format_rep_event(work);
+	cm_format_rep_event(work, cm_id_priv->qp_type);
 
 	spin_lock_irq(&cm_id_priv->lock);
 	switch (cm_id_priv->id.state) {
@@ -1823,7 +1835,7 @@
 
 	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
 	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
-	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
 
 	spin_lock(&cm.lock);
 	/* Check for duplicate REP. */
@@ -1850,7 +1862,7 @@
 
 	cm_id_priv->id.state = IB_CM_REP_RCVD;
 	cm_id_priv->id.remote_id = rep_msg->local_comm_id;
-	cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
+	cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
 	cm_id_priv->initiator_depth = rep_msg->resp_resources;
 	cm_id_priv->responder_resources = rep_msg->initiator_depth;
 	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
@@ -3492,7 +3504,8 @@
 		qp_attr->path_mtu = cm_id_priv->path_mtu;
 		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
 		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
-		if (cm_id_priv->qp_type == IB_QPT_RC) {
+		if (cm_id_priv->qp_type == IB_QPT_RC ||
+		    cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
 			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
 					 IB_QP_MIN_RNR_TIMER;
 			qp_attr->max_dest_rd_atomic =
@@ -3537,15 +3550,21 @@
 		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
 			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
 			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
-			if (cm_id_priv->qp_type == IB_QPT_RC) {
-				*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
-						 IB_QP_RNR_RETRY |
+			switch (cm_id_priv->qp_type) {
+			case IB_QPT_RC:
+			case IB_QPT_XRC_INI:
+				*qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
 						 IB_QP_MAX_QP_RD_ATOMIC;
-				qp_attr->timeout = cm_id_priv->av.timeout;
 				qp_attr->retry_cnt = cm_id_priv->retry_count;
 				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
-				qp_attr->max_rd_atomic =
-					cm_id_priv->initiator_depth;
+				qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
+				/* fall through */
+			case IB_QPT_XRC_TGT:
+				*qp_attr_mask |= IB_QP_TIMEOUT;
+				qp_attr->timeout = cm_id_priv->av.timeout;
+				break;
+			default:
+				break;
 			}
 			if (cm_id_priv->alt_av.ah_attr.dlid) {
 				*qp_attr_mask |= IB_QP_PATH_MIG_STATE;
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 7e63c08..505db2a 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2011 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
  *
@@ -86,7 +86,7 @@
 	__be16 pkey;
 	/* path MTU:4, RDC exists:1, RNR retry count:3. */
 	u8 offset50;
-	/* max CM Retries:4, SRQ:1, rsvd:3 */
+	/* max CM Retries:4, SRQ:1, extended transport type:3 */
 	u8 offset51;
 
 	__be16 primary_local_lid;
@@ -175,6 +175,11 @@
 	switch(transport_type) {
 	case 0: return IB_QPT_RC;
 	case 1: return IB_QPT_UC;
+	case 3:
+		switch (req_msg->offset51 & 0x7) {
+		case 1: return IB_QPT_XRC_TGT;
+		default: return 0;
+		}
 	default: return 0;
 	}
 }
@@ -188,6 +193,12 @@
 						  req_msg->offset40) &
 						   0xFFFFFFF9) | 0x2);
 		break;
+	case IB_QPT_XRC_INI:
+		req_msg->offset40 = cpu_to_be32((be32_to_cpu(
+						 req_msg->offset40) &
+						   0xFFFFFFF9) | 0x6);
+		req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
+		break;
 	default:
 		req_msg->offset40 = cpu_to_be32(be32_to_cpu(
 						 req_msg->offset40) &
@@ -527,6 +538,23 @@
 			    (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
 }
 
+static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
+{
+	return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
+}
+
+static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
+{
+	rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
+			    (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
+}
+
+static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
+{
+	return (qp_type == IB_QPT_XRC_INI) ?
+		cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
+}
+
 static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
 {
 	return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index ca4c5dc..872b184 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -81,6 +81,7 @@
 static DEFINE_IDR(tcp_ps);
 static DEFINE_IDR(udp_ps);
 static DEFINE_IDR(ipoib_ps);
+static DEFINE_IDR(ib_ps);
 
 struct cma_device {
 	struct list_head	list;
@@ -1179,6 +1180,15 @@
 	event->param.conn.qp_num = req_data->remote_qpn;
 }
 
+static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
+{
+	return (((ib_event->event == IB_CM_REQ_RECEIVED) ||
+		 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
+		((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
+		 (id->qp_type == IB_QPT_UD)) ||
+		(!id->qp_type));
+}
+
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *listen_id, *conn_id;
@@ -1186,13 +1196,16 @@
 	int offset, ret;
 
 	listen_id = cm_id->context;
+	if (!cma_check_req_qp_type(&listen_id->id, ib_event))
+		return -EINVAL;
+
 	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
 		return -ECONNABORTED;
 
 	memset(&event, 0, sizeof event);
 	offset = cma_user_data_offset(listen_id->id.ps);
 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
-	if (listen_id->id.qp_type == IB_QPT_UD) {
+	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
 		conn_id = cma_new_udp_id(&listen_id->id, ib_event);
 		event.param.ud.private_data = ib_event->private_data + offset;
 		event.param.ud.private_data_len =
@@ -1328,6 +1341,8 @@
 		switch (iw_event->status) {
 		case 0:
 			event.event = RDMA_CM_EVENT_ESTABLISHED;
+			event.param.conn.initiator_depth = iw_event->ird;
+			event.param.conn.responder_resources = iw_event->ord;
 			break;
 		case -ECONNRESET:
 		case -ECONNREFUSED:
@@ -1343,6 +1358,8 @@
 		break;
 	case IW_CM_EVENT_ESTABLISHED:
 		event.event = RDMA_CM_EVENT_ESTABLISHED;
+		event.param.conn.initiator_depth = iw_event->ird;
+		event.param.conn.responder_resources = iw_event->ord;
 		break;
 	default:
 		BUG_ON(1);
@@ -1433,8 +1450,8 @@
 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
 	event.param.conn.private_data = iw_event->private_data;
 	event.param.conn.private_data_len = iw_event->private_data_len;
-	event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
-	event.param.conn.responder_resources = attr.max_qp_rd_atom;
+	event.param.conn.initiator_depth = iw_event->ird;
+	event.param.conn.responder_resources = iw_event->ord;
 
 	/*
 	 * Protect against the user destroying conn_id from another thread
@@ -2234,6 +2251,9 @@
 	case RDMA_PS_IPOIB:
 		ps = &ipoib_ps;
 		break;
+	case RDMA_PS_IB:
+		ps = &ib_ps;
+		break;
 	default:
 		return -EPROTONOSUPPORT;
 	}
@@ -2569,7 +2589,7 @@
 	req.service_id = cma_get_service_id(id_priv->id.ps,
 					    (struct sockaddr *) &route->addr.dst_addr);
 	req.qp_num = id_priv->qp_num;
-	req.qp_type = IB_QPT_RC;
+	req.qp_type = id_priv->id.qp_type;
 	req.starting_psn = id_priv->seq_num;
 	req.responder_resources = conn_param->responder_resources;
 	req.initiator_depth = conn_param->initiator_depth;
@@ -2616,14 +2636,16 @@
 	if (ret)
 		goto out;
 
-	iw_param.ord = conn_param->initiator_depth;
-	iw_param.ird = conn_param->responder_resources;
-	iw_param.private_data = conn_param->private_data;
-	iw_param.private_data_len = conn_param->private_data_len;
-	if (id_priv->id.qp)
+	if (conn_param) {
+		iw_param.ord = conn_param->initiator_depth;
+		iw_param.ird = conn_param->responder_resources;
+		iw_param.private_data = conn_param->private_data;
+		iw_param.private_data_len = conn_param->private_data_len;
+		iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
+	} else {
+		memset(&iw_param, 0, sizeof iw_param);
 		iw_param.qpn = id_priv->qp_num;
-	else
-		iw_param.qpn = conn_param->qp_num;
+	}
 	ret = iw_cm_connect(cm_id, &iw_param);
 out:
 	if (ret) {
@@ -2765,14 +2787,20 @@
 
 	switch (rdma_node_get_transport(id->device->node_type)) {
 	case RDMA_TRANSPORT_IB:
-		if (id->qp_type == IB_QPT_UD)
-			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
-						conn_param->private_data,
-						conn_param->private_data_len);
-		else if (conn_param)
-			ret = cma_accept_ib(id_priv, conn_param);
-		else
-			ret = cma_rep_recv(id_priv);
+		if (id->qp_type == IB_QPT_UD) {
+			if (conn_param)
+				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+							conn_param->private_data,
+							conn_param->private_data_len);
+			else
+				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
+							NULL, 0);
+		} else {
+			if (conn_param)
+				ret = cma_accept_ib(id_priv, conn_param);
+			else
+				ret = cma_rep_recv(id_priv);
+		}
 		break;
 	case RDMA_TRANSPORT_IWARP:
 		ret = cma_accept_iw(id_priv, conn_param);
@@ -3460,6 +3488,7 @@
 	idr_destroy(&tcp_ps);
 	idr_destroy(&udp_ps);
 	idr_destroy(&ipoib_ps);
+	idr_destroy(&ib_ps);
 }
 
 module_init(cma_init);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b4d8672..0563892 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1596,6 +1596,9 @@
 					mad->mad_hdr.class_version].class;
 			if (!class)
 				goto out;
+			if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >=
+			    IB_MGMT_MAX_METHODS)
+				goto out;
 			method = class->method_table[convert_mgmt_class(
 							mad->mad_hdr.mgmt_class)];
 			if (method)
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 9ab5df7..2b59b72 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -185,17 +185,35 @@
 	if (ret)
 		return ret;
 
+	rate = (25 * attr.active_speed) / 10;
+
 	switch (attr.active_speed) {
-	case 2: speed = " DDR"; break;
-	case 4: speed = " QDR"; break;
+	case 2:
+		speed = " DDR";
+		break;
+	case 4:
+		speed = " QDR";
+		break;
+	case 8:
+		speed = " FDR10";
+		rate = 10;
+		break;
+	case 16:
+		speed = " FDR";
+		rate = 14;
+		break;
+	case 32:
+		speed = " EDR";
+		rate = 25;
+		break;
 	}
 
-	rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
+	rate *= ib_width_enum_to_int(attr.active_width);
 	if (rate < 0)
 		return -EINVAL;
 
 	return sprintf(buf, "%d%s Gb/sec (%dX%s)\n",
-		       rate / 10, rate % 10 ? ".5" : "",
+		       rate, (attr.active_speed == 1) ? ".5" : "",
 		       ib_width_enum_to_int(attr.active_width), speed);
 }
 
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 08f948d..b8a0b4a 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1122,7 +1122,7 @@
 	if (copy_from_user(&hdr, buf, sizeof(hdr)))
 		return -EFAULT;
 
-	if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
+	if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
 		return -EINVAL;
 
 	if (hdr.in + sizeof(hdr) > len)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 71be5ee..b69307f 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -276,7 +276,7 @@
 	ucma_set_event_context(ctx, event, uevent);
 	uevent->resp.event = event->event;
 	uevent->resp.status = event->status;
-	if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
+	if (cm_id->qp_type == IB_QPT_UD)
 		ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
 	else
 		ucma_copy_conn_event(&uevent->resp.param.conn,
@@ -377,6 +377,9 @@
 	case RDMA_PS_IPOIB:
 		*qp_type = IB_QPT_UD;
 		return 0;
+	case RDMA_PS_IB:
+		*qp_type = cmd->qp_type;
+		return 0;
 	default:
 		return -EINVAL;
 	}
@@ -1270,7 +1273,7 @@
 	if (copy_from_user(&hdr, buf, sizeof(hdr)))
 		return -EFAULT;
 
-	if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
+	if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
 		return -EINVAL;
 
 	if (hdr.in + sizeof(hdr) > len)
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 8d261b6..07db229 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -458,8 +458,7 @@
 		goto err;
 	}
 
-	if (packet->mad.hdr.id < 0 ||
-	    packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
+	if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
 		ret = -EINVAL;
 		goto err;
 	}
@@ -703,7 +702,7 @@
 	mutex_lock(&file->port->file_mutex);
 	mutex_lock(&file->mutex);
 
-	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
+	if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
 		ret = -EINVAL;
 		goto out;
 	}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a078e56..5bcb2af 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -76,6 +76,8 @@
 	struct ib_device		       *ib_dev;
 	int					devnum;
 	struct cdev			        cdev;
+	struct rb_root				xrcd_tree;
+	struct mutex				xrcd_tree_mutex;
 };
 
 struct ib_uverbs_event_file {
@@ -120,6 +122,16 @@
 	u32			events_reported;
 };
 
+struct ib_uxrcd_object {
+	struct ib_uobject	uobject;
+	atomic_t		refcnt;
+};
+
+struct ib_usrq_object {
+	struct ib_uevent_object	uevent;
+	struct ib_uxrcd_object *uxrcd;
+};
+
 struct ib_uqp_object {
 	struct ib_uevent_object	uevent;
 	struct list_head 	mcast_list;
@@ -142,6 +154,7 @@
 extern struct idr ib_uverbs_cq_idr;
 extern struct idr ib_uverbs_qp_idr;
 extern struct idr ib_uverbs_srq_idr;
+extern struct idr ib_uverbs_xrcd_idr;
 
 void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
 
@@ -161,6 +174,7 @@
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 			     struct ib_event *event);
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
 
 #define IB_UVERBS_DECLARE_CMD(name)					\
 	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\
@@ -181,6 +195,7 @@
 IB_UVERBS_DECLARE_CMD(req_notify_cq);
 IB_UVERBS_DECLARE_CMD(destroy_cq);
 IB_UVERBS_DECLARE_CMD(create_qp);
+IB_UVERBS_DECLARE_CMD(open_qp);
 IB_UVERBS_DECLARE_CMD(query_qp);
 IB_UVERBS_DECLARE_CMD(modify_qp);
 IB_UVERBS_DECLARE_CMD(destroy_qp);
@@ -195,5 +210,8 @@
 IB_UVERBS_DECLARE_CMD(modify_srq);
 IB_UVERBS_DECLARE_CMD(query_srq);
 IB_UVERBS_DECLARE_CMD(destroy_srq);
+IB_UVERBS_DECLARE_CMD(create_xsrq);
+IB_UVERBS_DECLARE_CMD(open_xrcd);
+IB_UVERBS_DECLARE_CMD(close_xrcd);
 
 #endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index c426992..254f164 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -47,6 +47,7 @@
 static struct lock_class_key qp_lock_key;
 static struct lock_class_key ah_lock_key;
 static struct lock_class_key srq_lock_key;
+static struct lock_class_key xrcd_lock_key;
 
 #define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\
 	do {								\
@@ -255,6 +256,18 @@
 	put_uobj_read(srq->uobject);
 }
 
+static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
+				     struct ib_uobject **uobj)
+{
+	*uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
+	return *uobj ? (*uobj)->object : NULL;
+}
+
+static void put_xrcd_read(struct ib_uobject *uobj)
+{
+	put_uobj_read(uobj);
+}
+
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 			      const char __user *buf,
 			      int in_len, int out_len)
@@ -298,6 +311,7 @@
 	INIT_LIST_HEAD(&ucontext->qp_list);
 	INIT_LIST_HEAD(&ucontext->srq_list);
 	INIT_LIST_HEAD(&ucontext->ah_list);
+	INIT_LIST_HEAD(&ucontext->xrcd_list);
 	ucontext->closing = 0;
 
 	resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -579,6 +593,310 @@
 	return in_len;
 }
 
+struct xrcd_table_entry {
+	struct rb_node  node;
+	struct ib_xrcd *xrcd;
+	struct inode   *inode;
+};
+
+static int xrcd_table_insert(struct ib_uverbs_device *dev,
+			    struct inode *inode,
+			    struct ib_xrcd *xrcd)
+{
+	struct xrcd_table_entry *entry, *scan;
+	struct rb_node **p = &dev->xrcd_tree.rb_node;
+	struct rb_node *parent = NULL;
+
+	entry = kmalloc(sizeof *entry, GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->xrcd  = xrcd;
+	entry->inode = inode;
+
+	while (*p) {
+		parent = *p;
+		scan = rb_entry(parent, struct xrcd_table_entry, node);
+
+		if (inode < scan->inode) {
+			p = &(*p)->rb_left;
+		} else if (inode > scan->inode) {
+			p = &(*p)->rb_right;
+		} else {
+			kfree(entry);
+			return -EEXIST;
+		}
+	}
+
+	rb_link_node(&entry->node, parent, p);
+	rb_insert_color(&entry->node, &dev->xrcd_tree);
+	igrab(inode);
+	return 0;
+}
+
+static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
+						  struct inode *inode)
+{
+	struct xrcd_table_entry *entry;
+	struct rb_node *p = dev->xrcd_tree.rb_node;
+
+	while (p) {
+		entry = rb_entry(p, struct xrcd_table_entry, node);
+
+		if (inode < entry->inode)
+			p = p->rb_left;
+		else if (inode > entry->inode)
+			p = p->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
+{
+	struct xrcd_table_entry *entry;
+
+	entry = xrcd_table_search(dev, inode);
+	if (!entry)
+		return NULL;
+
+	return entry->xrcd;
+}
+
+static void xrcd_table_delete(struct ib_uverbs_device *dev,
+			      struct inode *inode)
+{
+	struct xrcd_table_entry *entry;
+
+	entry = xrcd_table_search(dev, inode);
+	if (entry) {
+		iput(inode);
+		rb_erase(&entry->node, &dev->xrcd_tree);
+		kfree(entry);
+	}
+}
+
+ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
+			    const char __user *buf, int in_len,
+			    int out_len)
+{
+	struct ib_uverbs_open_xrcd	cmd;
+	struct ib_uverbs_open_xrcd_resp	resp;
+	struct ib_udata			udata;
+	struct ib_uxrcd_object         *obj;
+	struct ib_xrcd                 *xrcd = NULL;
+	struct file                    *f = NULL;
+	struct inode                   *inode = NULL;
+	int				ret = 0;
+	int				new_xrcd = 0;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	INIT_UDATA(&udata, buf + sizeof cmd,
+		   (unsigned long) cmd.response + sizeof resp,
+		   in_len - sizeof cmd, out_len - sizeof  resp);
+
+	mutex_lock(&file->device->xrcd_tree_mutex);
+
+	if (cmd.fd != -1) {
+		/* search for file descriptor */
+		f = fget(cmd.fd);
+		if (!f) {
+			ret = -EBADF;
+			goto err_tree_mutex_unlock;
+		}
+
+		inode = f->f_dentry->d_inode;
+		if (!inode) {
+			ret = -EBADF;
+			goto err_tree_mutex_unlock;
+		}
+
+		xrcd = find_xrcd(file->device, inode);
+		if (!xrcd && !(cmd.oflags & O_CREAT)) {
+			/* no file descriptor. Need CREATE flag */
+			ret = -EAGAIN;
+			goto err_tree_mutex_unlock;
+		}
+
+		if (xrcd && cmd.oflags & O_EXCL) {
+			ret = -EINVAL;
+			goto err_tree_mutex_unlock;
+		}
+	}
+
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj) {
+		ret = -ENOMEM;
+		goto err_tree_mutex_unlock;
+	}
+
+	init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key);
+
+	down_write(&obj->uobject.mutex);
+
+	if (!xrcd) {
+		xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
+							file->ucontext, &udata);
+		if (IS_ERR(xrcd)) {
+			ret = PTR_ERR(xrcd);
+			goto err;
+		}
+
+		xrcd->inode   = inode;
+		xrcd->device  = file->device->ib_dev;
+		atomic_set(&xrcd->usecnt, 0);
+		mutex_init(&xrcd->tgt_qp_mutex);
+		INIT_LIST_HEAD(&xrcd->tgt_qp_list);
+		new_xrcd = 1;
+	}
+
+	atomic_set(&obj->refcnt, 0);
+	obj->uobject.object = xrcd;
+	ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+	if (ret)
+		goto err_idr;
+
+	memset(&resp, 0, sizeof resp);
+	resp.xrcd_handle = obj->uobject.id;
+
+	if (inode) {
+		if (new_xrcd) {
+			/* create new inode/xrcd table entry */
+			ret = xrcd_table_insert(file->device, inode, xrcd);
+			if (ret)
+				goto err_insert_xrcd;
+		}
+		atomic_inc(&xrcd->usecnt);
+	}
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp)) {
+		ret = -EFAULT;
+		goto err_copy;
+	}
+
+	if (f)
+		fput(f);
+
+	mutex_lock(&file->mutex);
+	list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
+	mutex_unlock(&file->mutex);
+
+	obj->uobject.live = 1;
+	up_write(&obj->uobject.mutex);
+
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+	return in_len;
+
+err_copy:
+	if (inode) {
+		if (new_xrcd)
+			xrcd_table_delete(file->device, inode);
+		atomic_dec(&xrcd->usecnt);
+	}
+
+err_insert_xrcd:
+	idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
+
+err_idr:
+	ib_dealloc_xrcd(xrcd);
+
+err:
+	put_uobj_write(&obj->uobject);
+
+err_tree_mutex_unlock:
+	if (f)
+		fput(f);
+
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+
+	return ret;
+}
+
+ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
+			     const char __user *buf, int in_len,
+			     int out_len)
+{
+	struct ib_uverbs_close_xrcd cmd;
+	struct ib_uobject           *uobj;
+	struct ib_xrcd              *xrcd = NULL;
+	struct inode                *inode = NULL;
+	struct ib_uxrcd_object      *obj;
+	int                         live;
+	int                         ret = 0;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	mutex_lock(&file->device->xrcd_tree_mutex);
+	uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
+	if (!uobj) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	xrcd  = uobj->object;
+	inode = xrcd->inode;
+	obj   = container_of(uobj, struct ib_uxrcd_object, uobject);
+	if (atomic_read(&obj->refcnt)) {
+		put_uobj_write(uobj);
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
+		ret = ib_dealloc_xrcd(uobj->object);
+		if (!ret)
+			uobj->live = 0;
+	}
+
+	live = uobj->live;
+	if (inode && ret)
+		atomic_inc(&xrcd->usecnt);
+
+	put_uobj_write(uobj);
+
+	if (ret)
+		goto out;
+
+	if (inode && !live)
+		xrcd_table_delete(file->device, inode);
+
+	idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+	mutex_lock(&file->mutex);
+	list_del(&uobj->list);
+	mutex_unlock(&file->mutex);
+
+	put_uobj(uobj);
+	ret = in_len;
+
+out:
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+	return ret;
+}
+
+void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+			    struct ib_xrcd *xrcd)
+{
+	struct inode *inode;
+
+	inode = xrcd->inode;
+	if (inode && !atomic_dec_and_test(&xrcd->usecnt))
+		return;
+
+	ib_dealloc_xrcd(xrcd);
+
+	if (inode)
+		xrcd_table_delete(dev, inode);
+}
+
 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 			 const char __user *buf, int in_len,
 			 int out_len)
@@ -1052,9 +1370,12 @@
 	struct ib_uverbs_create_qp_resp resp;
 	struct ib_udata                 udata;
 	struct ib_uqp_object           *obj;
-	struct ib_pd                   *pd;
-	struct ib_cq                   *scq, *rcq;
-	struct ib_srq                  *srq;
+	struct ib_device	       *device;
+	struct ib_pd                   *pd = NULL;
+	struct ib_xrcd		       *xrcd = NULL;
+	struct ib_uobject	       *uninitialized_var(xrcd_uobj);
+	struct ib_cq                   *scq = NULL, *rcq = NULL;
+	struct ib_srq                  *srq = NULL;
 	struct ib_qp                   *qp;
 	struct ib_qp_init_attr          attr;
 	int ret;
@@ -1076,15 +1397,39 @@
 	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
 	down_write(&obj->uevent.uobject.mutex);
 
-	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
-	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
-	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
+	if (cmd.qp_type == IB_QPT_XRC_TGT) {
+		xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+		if (!xrcd) {
+			ret = -EINVAL;
+			goto err_put;
+		}
+		device = xrcd->device;
+	} else {
+		pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+		scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
+		if (!pd || !scq) {
+			ret = -EINVAL;
+			goto err_put;
+		}
 
-	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
-		ret = -EINVAL;
-		goto err_put;
+		if (cmd.qp_type == IB_QPT_XRC_INI) {
+			cmd.max_recv_wr = cmd.max_recv_sge = 0;
+		} else {
+			if (cmd.is_srq) {
+				srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+				if (!srq || srq->srq_type != IB_SRQT_BASIC) {
+					ret = -EINVAL;
+					goto err_put;
+				}
+			}
+			rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ?
+			       scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
+			if (!rcq) {
+				ret = -EINVAL;
+				goto err_put;
+			}
+		}
+		device = pd->device;
 	}
 
 	attr.event_handler = ib_uverbs_qp_event_handler;
@@ -1092,6 +1437,7 @@
 	attr.send_cq       = scq;
 	attr.recv_cq       = rcq;
 	attr.srq           = srq;
+	attr.xrcd	   = xrcd;
 	attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 	attr.qp_type       = cmd.qp_type;
 	attr.create_flags  = 0;
@@ -1106,26 +1452,34 @@
 	INIT_LIST_HEAD(&obj->uevent.event_list);
 	INIT_LIST_HEAD(&obj->mcast_list);
 
-	qp = pd->device->create_qp(pd, &attr, &udata);
+	if (cmd.qp_type == IB_QPT_XRC_TGT)
+		qp = ib_create_qp(pd, &attr);
+	else
+		qp = device->create_qp(pd, &attr, &udata);
+
 	if (IS_ERR(qp)) {
 		ret = PTR_ERR(qp);
 		goto err_put;
 	}
 
-	qp->device     	  = pd->device;
-	qp->pd         	  = pd;
-	qp->send_cq    	  = attr.send_cq;
-	qp->recv_cq    	  = attr.recv_cq;
-	qp->srq	       	  = attr.srq;
-	qp->uobject       = &obj->uevent.uobject;
-	qp->event_handler = attr.event_handler;
-	qp->qp_context    = attr.qp_context;
-	qp->qp_type	  = attr.qp_type;
-	atomic_inc(&pd->usecnt);
-	atomic_inc(&attr.send_cq->usecnt);
-	atomic_inc(&attr.recv_cq->usecnt);
-	if (attr.srq)
-		atomic_inc(&attr.srq->usecnt);
+	if (cmd.qp_type != IB_QPT_XRC_TGT) {
+		qp->real_qp	  = qp;
+		qp->device	  = device;
+		qp->pd		  = pd;
+		qp->send_cq	  = attr.send_cq;
+		qp->recv_cq	  = attr.recv_cq;
+		qp->srq		  = attr.srq;
+		qp->event_handler = attr.event_handler;
+		qp->qp_context	  = attr.qp_context;
+		qp->qp_type	  = attr.qp_type;
+		atomic_inc(&pd->usecnt);
+		atomic_inc(&attr.send_cq->usecnt);
+		if (attr.recv_cq)
+			atomic_inc(&attr.recv_cq->usecnt);
+		if (attr.srq)
+			atomic_inc(&attr.srq->usecnt);
+	}
+	qp->uobject = &obj->uevent.uobject;
 
 	obj->uevent.uobject.object = qp;
 	ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@@ -1147,9 +1501,13 @@
 		goto err_copy;
 	}
 
-	put_pd_read(pd);
-	put_cq_read(scq);
-	if (rcq != scq)
+	if (xrcd)
+		put_xrcd_read(xrcd_uobj);
+	if (pd)
+		put_pd_read(pd);
+	if (scq)
+		put_cq_read(scq);
+	if (rcq && rcq != scq)
 		put_cq_read(rcq);
 	if (srq)
 		put_srq_read(srq);
@@ -1171,6 +1529,8 @@
 	ib_destroy_qp(qp);
 
 err_put:
+	if (xrcd)
+		put_xrcd_read(xrcd_uobj);
 	if (pd)
 		put_pd_read(pd);
 	if (scq)
@@ -1184,6 +1544,98 @@
 	return ret;
 }
 
+ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
+			  const char __user *buf, int in_len, int out_len)
+{
+	struct ib_uverbs_open_qp        cmd;
+	struct ib_uverbs_create_qp_resp resp;
+	struct ib_udata                 udata;
+	struct ib_uqp_object           *obj;
+	struct ib_xrcd		       *xrcd;
+	struct ib_uobject	       *uninitialized_var(xrcd_uobj);
+	struct ib_qp                   *qp;
+	struct ib_qp_open_attr          attr;
+	int ret;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	INIT_UDATA(&udata, buf + sizeof cmd,
+		   (unsigned long) cmd.response + sizeof resp,
+		   in_len - sizeof cmd, out_len - sizeof resp);
+
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
+	down_write(&obj->uevent.uobject.mutex);
+
+	xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+	if (!xrcd) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+
+	attr.event_handler = ib_uverbs_qp_event_handler;
+	attr.qp_context    = file;
+	attr.qp_num        = cmd.qpn;
+	attr.qp_type       = cmd.qp_type;
+
+	obj->uevent.events_reported = 0;
+	INIT_LIST_HEAD(&obj->uevent.event_list);
+	INIT_LIST_HEAD(&obj->mcast_list);
+
+	qp = ib_open_qp(xrcd, &attr);
+	if (IS_ERR(qp)) {
+		ret = PTR_ERR(qp);
+		goto err_put;
+	}
+
+	qp->uobject = &obj->uevent.uobject;
+
+	obj->uevent.uobject.object = qp;
+	ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+	if (ret)
+		goto err_destroy;
+
+	memset(&resp, 0, sizeof resp);
+	resp.qpn       = qp->qp_num;
+	resp.qp_handle = obj->uevent.uobject.id;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd.response,
+			 &resp, sizeof resp)) {
+		ret = -EFAULT;
+		goto err_remove;
+	}
+
+	put_xrcd_read(xrcd_uobj);
+
+	mutex_lock(&file->mutex);
+	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
+	mutex_unlock(&file->mutex);
+
+	obj->uevent.uobject.live = 1;
+
+	up_write(&obj->uevent.uobject.mutex);
+
+	return in_len;
+
+err_remove:
+	idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
+
+err_destroy:
+	ib_destroy_qp(qp);
+
+err_put:
+	put_xrcd_read(xrcd_uobj);
+	put_uobj_write(&obj->uevent.uobject);
+	return ret;
+}
+
 ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
 			   const char __user *buf, int in_len,
 			   int out_len)
@@ -1284,6 +1736,20 @@
 	return ret ? ret : in_len;
 }
 
+/* Remove ignored fields set in the attribute mask */
+static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
+{
+	switch (qp_type) {
+	case IB_QPT_XRC_INI:
+		return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
+	case IB_QPT_XRC_TGT:
+		return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
+				IB_QP_RNR_RETRY);
+	default:
+		return mask;
+	}
+}
+
 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 			    const char __user *buf, int in_len,
 			    int out_len)
@@ -1356,7 +1822,12 @@
 	attr->alt_ah_attr.ah_flags 	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
 
-	ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
+	if (qp->real_qp == qp) {
+		ret = qp->device->modify_qp(qp, attr,
+			modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
+	} else {
+		ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
+	}
 
 	put_qp_read(qp);
 
@@ -1553,7 +2024,7 @@
 	}
 
 	resp.bad_wr = 0;
-	ret = qp->device->post_send(qp, wr, &bad_wr);
+	ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
 	if (ret)
 		for (next = wr; next; next = next->next) {
 			++resp.bad_wr;
@@ -1691,7 +2162,7 @@
 		goto out;
 
 	resp.bad_wr = 0;
-	ret = qp->device->post_recv(qp, wr, &bad_wr);
+	ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
 
 	put_qp_read(qp);
 
@@ -1975,17 +2446,180 @@
 	return ret ? ret : in_len;
 }
 
+int __uverbs_create_xsrq(struct ib_uverbs_file *file,
+			 struct ib_uverbs_create_xsrq *cmd,
+			 struct ib_udata *udata)
+{
+	struct ib_uverbs_create_srq_resp resp;
+	struct ib_usrq_object           *obj;
+	struct ib_pd                    *pd;
+	struct ib_srq                   *srq;
+	struct ib_uobject               *uninitialized_var(xrcd_uobj);
+	struct ib_srq_init_attr          attr;
+	int ret;
+
+	obj = kmalloc(sizeof *obj, GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key);
+	down_write(&obj->uevent.uobject.mutex);
+
+	pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
+	if (!pd) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (cmd->srq_type == IB_SRQT_XRC) {
+		attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0);
+		if (!attr.ext.xrc.cq) {
+			ret = -EINVAL;
+			goto err_put_pd;
+		}
+
+		attr.ext.xrc.xrcd  = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj);
+		if (!attr.ext.xrc.xrcd) {
+			ret = -EINVAL;
+			goto err_put_cq;
+		}
+
+		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
+		atomic_inc(&obj->uxrcd->refcnt);
+	}
+
+	attr.event_handler  = ib_uverbs_srq_event_handler;
+	attr.srq_context    = file;
+	attr.srq_type       = cmd->srq_type;
+	attr.attr.max_wr    = cmd->max_wr;
+	attr.attr.max_sge   = cmd->max_sge;
+	attr.attr.srq_limit = cmd->srq_limit;
+
+	obj->uevent.events_reported = 0;
+	INIT_LIST_HEAD(&obj->uevent.event_list);
+
+	srq = pd->device->create_srq(pd, &attr, udata);
+	if (IS_ERR(srq)) {
+		ret = PTR_ERR(srq);
+		goto err_put;
+	}
+
+	srq->device        = pd->device;
+	srq->pd            = pd;
+	srq->srq_type	   = cmd->srq_type;
+	srq->uobject       = &obj->uevent.uobject;
+	srq->event_handler = attr.event_handler;
+	srq->srq_context   = attr.srq_context;
+
+	if (cmd->srq_type == IB_SRQT_XRC) {
+		srq->ext.xrc.cq   = attr.ext.xrc.cq;
+		srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
+		atomic_inc(&attr.ext.xrc.cq->usecnt);
+		atomic_inc(&attr.ext.xrc.xrcd->usecnt);
+	}
+
+	atomic_inc(&pd->usecnt);
+	atomic_set(&srq->usecnt, 0);
+
+	obj->uevent.uobject.object = srq;
+	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
+	if (ret)
+		goto err_destroy;
+
+	memset(&resp, 0, sizeof resp);
+	resp.srq_handle = obj->uevent.uobject.id;
+	resp.max_wr     = attr.attr.max_wr;
+	resp.max_sge    = attr.attr.max_sge;
+	if (cmd->srq_type == IB_SRQT_XRC)
+		resp.srqn = srq->ext.xrc.srq_num;
+
+	if (copy_to_user((void __user *) (unsigned long) cmd->response,
+			 &resp, sizeof resp)) {
+		ret = -EFAULT;
+		goto err_copy;
+	}
+
+	if (cmd->srq_type == IB_SRQT_XRC) {
+		put_uobj_read(xrcd_uobj);
+		put_cq_read(attr.ext.xrc.cq);
+	}
+	put_pd_read(pd);
+
+	mutex_lock(&file->mutex);
+	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);
+	mutex_unlock(&file->mutex);
+
+	obj->uevent.uobject.live = 1;
+
+	up_write(&obj->uevent.uobject.mutex);
+
+	return 0;
+
+err_copy:
+	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);
+
+err_destroy:
+	ib_destroy_srq(srq);
+
+err_put:
+	if (cmd->srq_type == IB_SRQT_XRC) {
+		atomic_dec(&obj->uxrcd->refcnt);
+		put_uobj_read(xrcd_uobj);
+	}
+
+err_put_cq:
+	if (cmd->srq_type == IB_SRQT_XRC)
+		put_cq_read(attr.ext.xrc.cq);
+
+err_put_pd:
+	put_pd_read(pd);
+
+err:
+	put_uobj_write(&obj->uevent.uobject);
+	return ret;
+}
+
 ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
 	struct ib_uverbs_create_srq      cmd;
+	struct ib_uverbs_create_xsrq     xcmd;
 	struct ib_uverbs_create_srq_resp resp;
 	struct ib_udata                  udata;
-	struct ib_uevent_object         *obj;
-	struct ib_pd                    *pd;
-	struct ib_srq                   *srq;
-	struct ib_srq_init_attr          attr;
+	int ret;
+
+	if (out_len < sizeof resp)
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof cmd))
+		return -EFAULT;
+
+	xcmd.response	 = cmd.response;
+	xcmd.user_handle = cmd.user_handle;
+	xcmd.srq_type	 = IB_SRQT_BASIC;
+	xcmd.pd_handle	 = cmd.pd_handle;
+	xcmd.max_wr	 = cmd.max_wr;
+	xcmd.max_sge	 = cmd.max_sge;
+	xcmd.srq_limit	 = cmd.srq_limit;
+
+	INIT_UDATA(&udata, buf + sizeof cmd,
+		   (unsigned long) cmd.response + sizeof resp,
+		   in_len - sizeof cmd, out_len - sizeof resp);
+
+	ret = __uverbs_create_xsrq(file, &xcmd, &udata);
+	if (ret)
+		return ret;
+
+	return in_len;
+}
+
+ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
+			      const char __user *buf, int in_len, int out_len)
+{
+	struct ib_uverbs_create_xsrq     cmd;
+	struct ib_uverbs_create_srq_resp resp;
+	struct ib_udata                  udata;
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -1998,82 +2632,11 @@
 		   (unsigned long) cmd.response + sizeof resp,
 		   in_len - sizeof cmd, out_len - sizeof resp);
 
-	obj = kmalloc(sizeof *obj, GFP_KERNEL);
-	if (!obj)
-		return -ENOMEM;
-
-	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
-	down_write(&obj->uobject.mutex);
-
-	pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-	if (!pd) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	attr.event_handler  = ib_uverbs_srq_event_handler;
-	attr.srq_context    = file;
-	attr.attr.max_wr    = cmd.max_wr;
-	attr.attr.max_sge   = cmd.max_sge;
-	attr.attr.srq_limit = cmd.srq_limit;
-
-	obj->events_reported     = 0;
-	INIT_LIST_HEAD(&obj->event_list);
-
-	srq = pd->device->create_srq(pd, &attr, &udata);
-	if (IS_ERR(srq)) {
-		ret = PTR_ERR(srq);
-		goto err_put;
-	}
-
-	srq->device    	   = pd->device;
-	srq->pd        	   = pd;
-	srq->uobject       = &obj->uobject;
-	srq->event_handler = attr.event_handler;
-	srq->srq_context   = attr.srq_context;
-	atomic_inc(&pd->usecnt);
-	atomic_set(&srq->usecnt, 0);
-
-	obj->uobject.object = srq;
-	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
+	ret = __uverbs_create_xsrq(file, &cmd, &udata);
 	if (ret)
-		goto err_destroy;
-
-	memset(&resp, 0, sizeof resp);
-	resp.srq_handle = obj->uobject.id;
-	resp.max_wr     = attr.attr.max_wr;
-	resp.max_sge    = attr.attr.max_sge;
-
-	if (copy_to_user((void __user *) (unsigned long) cmd.response,
-			 &resp, sizeof resp)) {
-		ret = -EFAULT;
-		goto err_copy;
-	}
-
-	put_pd_read(pd);
-
-	mutex_lock(&file->mutex);
-	list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
-	mutex_unlock(&file->mutex);
-
-	obj->uobject.live = 1;
-
-	up_write(&obj->uobject.mutex);
+		return ret;
 
 	return in_len;
-
-err_copy:
-	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
-
-err_destroy:
-	ib_destroy_srq(srq);
-
-err_put:
-	put_pd_read(pd);
-
-err:
-	put_uobj_write(&obj->uobject);
-	return ret;
 }
 
 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 56898b6..8796367 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -72,6 +72,7 @@
 DEFINE_IDR(ib_uverbs_cq_idr);
 DEFINE_IDR(ib_uverbs_qp_idr);
 DEFINE_IDR(ib_uverbs_srq_idr);
+DEFINE_IDR(ib_uverbs_xrcd_idr);
 
 static DEFINE_SPINLOCK(map_lock);
 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -107,6 +108,10 @@
 	[IB_USER_VERBS_CMD_MODIFY_SRQ]		= ib_uverbs_modify_srq,
 	[IB_USER_VERBS_CMD_QUERY_SRQ]		= ib_uverbs_query_srq,
 	[IB_USER_VERBS_CMD_DESTROY_SRQ]		= ib_uverbs_destroy_srq,
+	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd,
+	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,
+	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq,
+	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
@@ -202,8 +207,12 @@
 			container_of(uobj, struct ib_uqp_object, uevent.uobject);
 
 		idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-		ib_uverbs_detach_umcast(qp, uqp);
-		ib_destroy_qp(qp);
+		if (qp != qp->real_qp) {
+			ib_close_qp(qp);
+		} else {
+			ib_uverbs_detach_umcast(qp, uqp);
+			ib_destroy_qp(qp);
+		}
 		ib_uverbs_release_uevent(file, &uqp->uevent);
 		kfree(uqp);
 	}
@@ -241,6 +250,18 @@
 		kfree(uobj);
 	}
 
+	mutex_lock(&file->device->xrcd_tree_mutex);
+	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
+		struct ib_xrcd *xrcd = uobj->object;
+		struct ib_uxrcd_object *uxrcd =
+			container_of(uobj, struct ib_uxrcd_object, uobject);
+
+		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
+		ib_uverbs_dealloc_xrcd(file->device, xrcd);
+		kfree(uxrcd);
+	}
+	mutex_unlock(&file->device->xrcd_tree_mutex);
+
 	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
 		struct ib_pd *pd = uobj->object;
 
@@ -557,8 +578,7 @@
 	if (hdr.in_words * 4 != count)
 		return -EINVAL;
 
-	if (hdr.command < 0				||
-	    hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
+	if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
 	    !uverbs_cmd_table[hdr.command])
 		return -EINVAL;
 
@@ -741,6 +761,8 @@
 
 	kref_init(&uverbs_dev->ref);
 	init_completion(&uverbs_dev->comp);
+	uverbs_dev->xrcd_tree = RB_ROOT;
+	mutex_init(&uverbs_dev->xrcd_tree_mutex);
 
 	spin_lock(&map_lock);
 	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index af7a8b0..4251750 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -39,6 +39,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
@@ -77,6 +78,31 @@
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
+int ib_rate_to_mbps(enum ib_rate rate)
+{
+	switch (rate) {
+	case IB_RATE_2_5_GBPS: return 2500;
+	case IB_RATE_5_GBPS:   return 5000;
+	case IB_RATE_10_GBPS:  return 10000;
+	case IB_RATE_20_GBPS:  return 20000;
+	case IB_RATE_30_GBPS:  return 30000;
+	case IB_RATE_40_GBPS:  return 40000;
+	case IB_RATE_60_GBPS:  return 60000;
+	case IB_RATE_80_GBPS:  return 80000;
+	case IB_RATE_120_GBPS: return 120000;
+	case IB_RATE_14_GBPS:  return 14062;
+	case IB_RATE_56_GBPS:  return 56250;
+	case IB_RATE_112_GBPS: return 112500;
+	case IB_RATE_168_GBPS: return 168750;
+	case IB_RATE_25_GBPS:  return 25781;
+	case IB_RATE_100_GBPS: return 103125;
+	case IB_RATE_200_GBPS: return 206250;
+	case IB_RATE_300_GBPS: return 309375;
+	default:	       return -1;
+	}
+}
+EXPORT_SYMBOL(ib_rate_to_mbps);
+
 enum rdma_transport_type
 rdma_node_get_transport(enum rdma_node_type node_type)
 {
@@ -250,6 +276,13 @@
 		srq->uobject       = NULL;
 		srq->event_handler = srq_init_attr->event_handler;
 		srq->srq_context   = srq_init_attr->srq_context;
+		srq->srq_type      = srq_init_attr->srq_type;
+		if (srq->srq_type == IB_SRQT_XRC) {
+			srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
+			srq->ext.xrc.cq   = srq_init_attr->ext.xrc.cq;
+			atomic_inc(&srq->ext.xrc.xrcd->usecnt);
+			atomic_inc(&srq->ext.xrc.cq->usecnt);
+		}
 		atomic_inc(&pd->usecnt);
 		atomic_set(&srq->usecnt, 0);
 	}
@@ -279,16 +312,29 @@
 int ib_destroy_srq(struct ib_srq *srq)
 {
 	struct ib_pd *pd;
+	enum ib_srq_type srq_type;
+	struct ib_xrcd *uninitialized_var(xrcd);
+	struct ib_cq *uninitialized_var(cq);
 	int ret;
 
 	if (atomic_read(&srq->usecnt))
 		return -EBUSY;
 
 	pd = srq->pd;
+	srq_type = srq->srq_type;
+	if (srq_type == IB_SRQT_XRC) {
+		xrcd = srq->ext.xrc.xrcd;
+		cq = srq->ext.xrc.cq;
+	}
 
 	ret = srq->device->destroy_srq(srq);
-	if (!ret)
+	if (!ret) {
 		atomic_dec(&pd->usecnt);
+		if (srq_type == IB_SRQT_XRC) {
+			atomic_dec(&xrcd->usecnt);
+			atomic_dec(&cq->usecnt);
+		}
+	}
 
 	return ret;
 }
@@ -296,28 +342,123 @@
 
 /* Queue pairs */
 
+static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
+{
+	struct ib_qp *qp = context;
+
+	list_for_each_entry(event->element.qp, &qp->open_list, open_list)
+		event->element.qp->event_handler(event, event->element.qp->qp_context);
+}
+
+static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
+{
+	mutex_lock(&xrcd->tgt_qp_mutex);
+	list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
+	mutex_unlock(&xrcd->tgt_qp_mutex);
+}
+
+static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
+				  void (*event_handler)(struct ib_event *, void *),
+				  void *qp_context)
+{
+	struct ib_qp *qp;
+	unsigned long flags;
+
+	qp = kzalloc(sizeof *qp, GFP_KERNEL);
+	if (!qp)
+		return ERR_PTR(-ENOMEM);
+
+	qp->real_qp = real_qp;
+	atomic_inc(&real_qp->usecnt);
+	qp->device = real_qp->device;
+	qp->event_handler = event_handler;
+	qp->qp_context = qp_context;
+	qp->qp_num = real_qp->qp_num;
+	qp->qp_type = real_qp->qp_type;
+
+	spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+	list_add(&qp->open_list, &real_qp->open_list);
+	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+
+	return qp;
+}
+
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+			 struct ib_qp_open_attr *qp_open_attr)
+{
+	struct ib_qp *qp, *real_qp;
+
+	if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
+		return ERR_PTR(-EINVAL);
+
+	qp = ERR_PTR(-EINVAL);
+	mutex_lock(&xrcd->tgt_qp_mutex);
+	list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
+		if (real_qp->qp_num == qp_open_attr->qp_num) {
+			qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
+					  qp_open_attr->qp_context);
+			break;
+		}
+	}
+	mutex_unlock(&xrcd->tgt_qp_mutex);
+	return qp;
+}
+EXPORT_SYMBOL(ib_open_qp);
+
 struct ib_qp *ib_create_qp(struct ib_pd *pd,
 			   struct ib_qp_init_attr *qp_init_attr)
 {
-	struct ib_qp *qp;
+	struct ib_qp *qp, *real_qp;
+	struct ib_device *device;
 
-	qp = pd->device->create_qp(pd, qp_init_attr, NULL);
+	device = pd ? pd->device : qp_init_attr->xrcd->device;
+	qp = device->create_qp(pd, qp_init_attr, NULL);
 
 	if (!IS_ERR(qp)) {
-		qp->device     	  = pd->device;
-		qp->pd         	  = pd;
-		qp->send_cq    	  = qp_init_attr->send_cq;
-		qp->recv_cq    	  = qp_init_attr->recv_cq;
-		qp->srq	       	  = qp_init_attr->srq;
-		qp->uobject       = NULL;
-		qp->event_handler = qp_init_attr->event_handler;
-		qp->qp_context    = qp_init_attr->qp_context;
-		qp->qp_type	  = qp_init_attr->qp_type;
-		atomic_inc(&pd->usecnt);
-		atomic_inc(&qp_init_attr->send_cq->usecnt);
-		atomic_inc(&qp_init_attr->recv_cq->usecnt);
-		if (qp_init_attr->srq)
-			atomic_inc(&qp_init_attr->srq->usecnt);
+		qp->device     = device;
+		qp->real_qp    = qp;
+		qp->uobject    = NULL;
+		qp->qp_type    = qp_init_attr->qp_type;
+
+		if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) {
+			qp->event_handler = __ib_shared_qp_event_handler;
+			qp->qp_context = qp;
+			qp->pd = NULL;
+			qp->send_cq = qp->recv_cq = NULL;
+			qp->srq = NULL;
+			qp->xrcd = qp_init_attr->xrcd;
+			atomic_inc(&qp_init_attr->xrcd->usecnt);
+			INIT_LIST_HEAD(&qp->open_list);
+			atomic_set(&qp->usecnt, 0);
+
+			real_qp = qp;
+			qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
+					  qp_init_attr->qp_context);
+			if (!IS_ERR(qp))
+				__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
+			else
+				real_qp->device->destroy_qp(real_qp);
+		} else {
+			qp->event_handler = qp_init_attr->event_handler;
+			qp->qp_context = qp_init_attr->qp_context;
+			if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
+				qp->recv_cq = NULL;
+				qp->srq = NULL;
+			} else {
+				qp->recv_cq = qp_init_attr->recv_cq;
+				atomic_inc(&qp_init_attr->recv_cq->usecnt);
+				qp->srq = qp_init_attr->srq;
+				if (qp->srq)
+					atomic_inc(&qp_init_attr->srq->usecnt);
+			}
+
+			qp->pd	    = pd;
+			qp->send_cq = qp_init_attr->send_cq;
+			qp->xrcd    = NULL;
+
+			atomic_inc(&pd->usecnt);
+			atomic_inc(&qp_init_attr->send_cq->usecnt);
+		}
 	}
 
 	return qp;
@@ -326,8 +467,8 @@
 
 static const struct {
 	int			valid;
-	enum ib_qp_attr_mask	req_param[IB_QPT_RAW_ETHERTYPE + 1];
-	enum ib_qp_attr_mask	opt_param[IB_QPT_RAW_ETHERTYPE + 1];
+	enum ib_qp_attr_mask	req_param[IB_QPT_MAX];
+	enum ib_qp_attr_mask	opt_param[IB_QPT_MAX];
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = {
 		[IB_QPS_RESET] = { .valid = 1 },
@@ -343,6 +484,12 @@
 				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
 						IB_QP_PORT			|
 						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
@@ -365,6 +512,12 @@
 				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
 						IB_QP_PORT			|
 						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
+				[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX		|
+						IB_QP_PORT			|
+						IB_QP_ACCESS_FLAGS),
 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
@@ -384,6 +537,16 @@
 						IB_QP_RQ_PSN			|
 						IB_QP_MAX_DEST_RD_ATOMIC	|
 						IB_QP_MIN_RNR_TIMER),
+				[IB_QPT_XRC_INI] = (IB_QP_AV			|
+						IB_QP_PATH_MTU			|
+						IB_QP_DEST_QPN			|
+						IB_QP_RQ_PSN),
+				[IB_QPT_XRC_TGT] = (IB_QP_AV			|
+						IB_QP_PATH_MTU			|
+						IB_QP_DEST_QPN			|
+						IB_QP_RQ_PSN			|
+						IB_QP_MAX_DEST_RD_ATOMIC	|
+						IB_QP_MIN_RNR_TIMER),
 			},
 			.opt_param = {
 				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
@@ -394,6 +557,12 @@
 				 [IB_QPT_RC]  = (IB_QP_ALT_PATH			|
 						 IB_QP_ACCESS_FLAGS		|
 						 IB_QP_PKEY_INDEX),
+				 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH		|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PKEY_INDEX),
+				 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH		|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PKEY_INDEX),
 				 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
 						 IB_QP_QKEY),
 				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
@@ -414,6 +583,13 @@
 						IB_QP_RNR_RETRY			|
 						IB_QP_SQ_PSN			|
 						IB_QP_MAX_QP_RD_ATOMIC),
+				[IB_QPT_XRC_INI] = (IB_QP_TIMEOUT		|
+						IB_QP_RETRY_CNT			|
+						IB_QP_RNR_RETRY			|
+						IB_QP_SQ_PSN			|
+						IB_QP_MAX_QP_RD_ATOMIC),
+				[IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT		|
+						IB_QP_SQ_PSN),
 				[IB_QPT_SMI] = IB_QP_SQ_PSN,
 				[IB_QPT_GSI] = IB_QP_SQ_PSN,
 			},
@@ -429,6 +605,15 @@
 						 IB_QP_ACCESS_FLAGS		|
 						 IB_QP_MIN_RNR_TIMER		|
 						 IB_QP_PATH_MIG_STATE),
+				 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
+						 IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_PATH_MIG_STATE),
+				 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
+						 IB_QP_ALT_PATH			|
+						 IB_QP_ACCESS_FLAGS		|
+						 IB_QP_MIN_RNR_TIMER		|
+						 IB_QP_PATH_MIG_STATE),
 				 [IB_QPT_SMI] = (IB_QP_CUR_STATE		|
 						 IB_QP_QKEY),
 				 [IB_QPT_GSI] = (IB_QP_CUR_STATE		|
@@ -453,6 +638,15 @@
 						IB_QP_ALT_PATH			|
 						IB_QP_PATH_MIG_STATE		|
 						IB_QP_MIN_RNR_TIMER),
+				[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_ALT_PATH			|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_ALT_PATH			|
+						IB_QP_PATH_MIG_STATE		|
+						IB_QP_MIN_RNR_TIMER),
 				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
@@ -465,6 +659,8 @@
 				[IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
 				[IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
 				[IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
+				[IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
 				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
 				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
 			}
@@ -487,6 +683,15 @@
 						IB_QP_ACCESS_FLAGS		|
 						IB_QP_MIN_RNR_TIMER		|
 						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
 				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
@@ -515,6 +720,25 @@
 						IB_QP_PKEY_INDEX		|
 						IB_QP_MIN_RNR_TIMER		|
 						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_XRC_INI] = (IB_QP_PORT			|
+						IB_QP_AV			|
+						IB_QP_TIMEOUT			|
+						IB_QP_RETRY_CNT			|
+						IB_QP_RNR_RETRY			|
+						IB_QP_MAX_QP_RD_ATOMIC		|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PKEY_INDEX		|
+						IB_QP_PATH_MIG_STATE),
+				[IB_QPT_XRC_TGT] = (IB_QP_PORT			|
+						IB_QP_AV			|
+						IB_QP_TIMEOUT			|
+						IB_QP_MAX_DEST_RD_ATOMIC	|
+						IB_QP_ALT_PATH			|
+						IB_QP_ACCESS_FLAGS		|
+						IB_QP_PKEY_INDEX		|
+						IB_QP_MIN_RNR_TIMER		|
+						IB_QP_PATH_MIG_STATE),
 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
 						IB_QP_QKEY),
 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
@@ -579,7 +803,7 @@
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
 {
-	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
+	return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
@@ -589,11 +813,59 @@
 		struct ib_qp_init_attr *qp_init_attr)
 {
 	return qp->device->query_qp ?
-		qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
+		qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
 		-ENOSYS;
 }
 EXPORT_SYMBOL(ib_query_qp);
 
+int ib_close_qp(struct ib_qp *qp)
+{
+	struct ib_qp *real_qp;
+	unsigned long flags;
+
+	real_qp = qp->real_qp;
+	if (real_qp == qp)
+		return -EINVAL;
+
+	spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
+	list_del(&qp->open_list);
+	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
+
+	atomic_dec(&real_qp->usecnt);
+	kfree(qp);
+
+	return 0;
+}
+EXPORT_SYMBOL(ib_close_qp);
+
+static int __ib_destroy_shared_qp(struct ib_qp *qp)
+{
+	struct ib_xrcd *xrcd;
+	struct ib_qp *real_qp;
+	int ret;
+
+	real_qp = qp->real_qp;
+	xrcd = real_qp->xrcd;
+
+	mutex_lock(&xrcd->tgt_qp_mutex);
+	ib_close_qp(qp);
+	if (atomic_read(&real_qp->usecnt) == 0)
+		list_del(&real_qp->xrcd_list);
+	else
+		real_qp = NULL;
+	mutex_unlock(&xrcd->tgt_qp_mutex);
+
+	if (real_qp) {
+		ret = ib_destroy_qp(real_qp);
+		if (!ret)
+			atomic_dec(&xrcd->usecnt);
+		else
+			__ib_insert_xrcd_qp(xrcd, real_qp);
+	}
+
+	return 0;
+}
+
 int ib_destroy_qp(struct ib_qp *qp)
 {
 	struct ib_pd *pd;
@@ -601,16 +873,25 @@
 	struct ib_srq *srq;
 	int ret;
 
-	pd  = qp->pd;
-	scq = qp->send_cq;
-	rcq = qp->recv_cq;
-	srq = qp->srq;
+	if (atomic_read(&qp->usecnt))
+		return -EBUSY;
+
+	if (qp->real_qp != qp)
+		return __ib_destroy_shared_qp(qp);
+
+	pd   = qp->pd;
+	scq  = qp->send_cq;
+	rcq  = qp->recv_cq;
+	srq  = qp->srq;
 
 	ret = qp->device->destroy_qp(qp);
 	if (!ret) {
-		atomic_dec(&pd->usecnt);
-		atomic_dec(&scq->usecnt);
-		atomic_dec(&rcq->usecnt);
+		if (pd)
+			atomic_dec(&pd->usecnt);
+		if (scq)
+			atomic_dec(&scq->usecnt);
+		if (rcq)
+			atomic_dec(&rcq->usecnt);
 		if (srq)
 			atomic_dec(&srq->usecnt);
 	}
@@ -920,3 +1201,42 @@
 	return qp->device->detach_mcast(qp, gid, lid);
 }
 EXPORT_SYMBOL(ib_detach_mcast);
+
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
+{
+	struct ib_xrcd *xrcd;
+
+	if (!device->alloc_xrcd)
+		return ERR_PTR(-ENOSYS);
+
+	xrcd = device->alloc_xrcd(device, NULL, NULL);
+	if (!IS_ERR(xrcd)) {
+		xrcd->device = device;
+		xrcd->inode = NULL;
+		atomic_set(&xrcd->usecnt, 0);
+		mutex_init(&xrcd->tgt_qp_mutex);
+		INIT_LIST_HEAD(&xrcd->tgt_qp_list);
+	}
+
+	return xrcd;
+}
+EXPORT_SYMBOL(ib_alloc_xrcd);
+
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+	struct ib_qp *qp;
+	int ret;
+
+	if (atomic_read(&xrcd->usecnt))
+		return -EBUSY;
+
+	while (!list_empty(&xrcd->tgt_qp_list)) {
+		qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
+		ret = ib_destroy_qp(qp);
+		if (ret)
+			return ret;
+	}
+
+	return xrcd->device->dealloc_xrcd(xrcd);
+}
+EXPORT_SYMBOL(ib_dealloc_xrcd);
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index 24f9e3a..32d34e8 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -288,6 +288,11 @@
 		cm_event.private_data_len =
 			be32_to_cpu(req->private_data_length);
 		cm_event.private_data = req->private_data;
+		/*
+		 * Until ird/ord negotiation via MPAv2 support is added, send
+		 * max supported values
+		 */
+		cm_event.ird = cm_event.ord = 128;
 
 		if (cm_id->event_handler)
 			cm_id->event_handler(cm_id, &cm_event);
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
index 0ebe4e8..8951db4 100644
--- a/drivers/infiniband/hw/amso1100/c2_intr.c
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -183,6 +183,11 @@
 	case IW_CM_EVENT_ESTABLISHED:
 		c2_set_qp_state(req->qp,
 				C2_QP_STATE_RTS);
+		/*
+		 * Until ird/ord negotiation via MPAv2 support is added, send
+		 * max supported values
+		 */
+		cm_event.ird = cm_event.ord = 128;
 	case IW_CM_EVENT_CLOSE:
 
 		/*
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index f101bb7..12f923d 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -753,10 +753,7 @@
 	memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
 
 	/* Print out the MAC address */
-	pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
-		netdev->name,
-		netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
-		netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
+	pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr);
 
 #if 0
 	/* Disable network packets */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 6cd642a..de6d077 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -753,6 +753,11 @@
 	event.private_data_len = ep->plen;
 	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 	event.provider_data = ep;
+	/*
+	 * Until ird/ord negotiation via MPAv2 support is added, send max
+	 * supported values
+	 */
+	event.ird = event.ord = 8;
 	if (state_read(&ep->parent_ep->com) != DEAD) {
 		get_ep(&ep->com);
 		ep->parent_ep->com.cm_id->event_handler(
@@ -770,6 +775,11 @@
 	PDBG("%s ep %p\n", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_ESTABLISHED;
+	/*
+	 * Until ird/ord negotiation via MPAv2 support is added, send max
+	 * supported values
+	 */
+	event.ird = event.ord = 8;
 	if (ep->com.cm_id) {
 		PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index 71e0d84..abcc9e7 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -46,6 +46,7 @@
 	struct ib_event event;
 	struct iwch_qp_attributes attrs;
 	struct iwch_qp *qhp;
+	unsigned long flag;
 
 	spin_lock(&rnicp->lock);
 	qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
@@ -94,7 +95,9 @@
 	if (qhp->ibqp.event_handler)
 		(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
 
+	spin_lock_irqsave(&chp->comp_handler_lock, flag);
 	(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+	spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
 
 	if (atomic_dec_and_test(&qhp->refcnt))
 		wake_up(&qhp->wait);
@@ -107,6 +110,7 @@
 	struct iwch_cq *chp;
 	struct iwch_qp *qhp;
 	u32 cqid = RSPQ_CQID(rsp_msg);
+	unsigned long flag;
 
 	rnicp = (struct iwch_dev *) rdev_p->ulp;
 	spin_lock(&rnicp->lock);
@@ -170,7 +174,9 @@
 		 */
 		if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
 			dst_confirm(qhp->ep->dst);
+		spin_lock_irqsave(&chp->comp_handler_lock, flag);
 		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+		spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
 		break;
 
 	case TPT_ERR_STAG:
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index c7d9411..37c224f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -190,6 +190,7 @@
 	chp->rhp = rhp;
 	chp->ibcq.cqe = 1 << chp->cq.size_log2;
 	spin_lock_init(&chp->lock);
+	spin_lock_init(&chp->comp_handler_lock);
 	atomic_set(&chp->refcnt, 1);
 	init_waitqueue_head(&chp->wait);
 	if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 9a342c9..87c14b0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -103,6 +103,7 @@
 	struct iwch_dev *rhp;
 	struct t3_cq cq;
 	spinlock_t lock;
+	spinlock_t comp_handler_lock;
 	atomic_t refcnt;
 	wait_queue_head_t wait;
 	u32 __user *user_rptr_addr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index ecd313f..bea5839 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -822,8 +822,11 @@
 	flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
 	spin_unlock(&qhp->lock);
 	spin_unlock_irqrestore(&rchp->lock, *flag);
-	if (flushed)
+	if (flushed) {
+		spin_lock_irqsave(&rchp->comp_handler_lock, *flag);
 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+		spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag);
+	}
 
 	/* locking hierarchy: cq lock first, then qp lock. */
 	spin_lock_irqsave(&schp->lock, *flag);
@@ -833,8 +836,11 @@
 	flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
 	spin_unlock(&qhp->lock);
 	spin_unlock_irqrestore(&schp->lock, *flag);
-	if (flushed)
+	if (flushed) {
+		spin_lock_irqsave(&schp->comp_handler_lock, *flag);
 		(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+		spin_unlock_irqrestore(&schp->comp_handler_lock, *flag);
+	}
 
 	/* deref */
 	if (atomic_dec_and_test(&qhp->refcnt))
@@ -853,11 +859,15 @@
 	if (qhp->ibqp.uobject) {
 		cxio_set_wq_in_error(&qhp->wq);
 		cxio_set_cq_in_error(&rchp->cq);
+		spin_lock_irqsave(&rchp->comp_handler_lock, *flag);
 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+		spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag);
 		if (schp != rchp) {
 			cxio_set_cq_in_error(&schp->cq);
+			spin_lock_irqsave(&schp->comp_handler_lock, *flag);
 			(*schp->ibcq.comp_handler)(&schp->ibcq,
 						   schp->ibcq.cq_context);
+			spin_unlock_irqrestore(&schp->comp_handler_lock, *flag);
 		}
 		return;
 	}
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 77f769d..b36cdac 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -103,7 +103,8 @@
 static int mpa_rev = 1;
 module_param(mpa_rev, int, 0644);
 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
-		 "1 is spec compliant. (default=1)");
+		"1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
+		" compliant (default=1)");
 
 static int markers_enabled;
 module_param(markers_enabled, int, 0644);
@@ -497,17 +498,21 @@
 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
-static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb)
+static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
+		u8 mpa_rev_to_use)
 {
 	int mpalen, wrlen;
 	struct fw_ofld_tx_data_wr *req;
 	struct mpa_message *mpa;
+	struct mpa_v2_conn_params mpa_v2_params;
 
 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
 
 	BUG_ON(skb_cloned(skb));
 
 	mpalen = sizeof(*mpa) + ep->plen;
+	if (mpa_rev_to_use == 2)
+		mpalen += sizeof(struct mpa_v2_conn_params);
 	wrlen = roundup(mpalen + sizeof *req, 16);
 	skb = get_skb(skb, wrlen, GFP_KERNEL);
 	if (!skb) {
@@ -533,12 +538,39 @@
 	mpa = (struct mpa_message *)(req + 1);
 	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
-		     (markers_enabled ? MPA_MARKERS : 0);
+		     (markers_enabled ? MPA_MARKERS : 0) |
+		     (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
 	mpa->private_data_size = htons(ep->plen);
-	mpa->revision = mpa_rev;
+	mpa->revision = mpa_rev_to_use;
+	if (mpa_rev_to_use == 1)
+		ep->tried_with_mpa_v1 = 1;
 
-	if (ep->plen)
-		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
+	if (mpa_rev_to_use == 2) {
+		mpa->private_data_size +=
+			htons(sizeof(struct mpa_v2_conn_params));
+		mpa_v2_params.ird = htons((u16)ep->ird);
+		mpa_v2_params.ord = htons((u16)ep->ord);
+
+		if (peer2peer) {
+			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
+			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
+				mpa_v2_params.ord |=
+					htons(MPA_V2_RDMA_WRITE_RTR);
+			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
+				mpa_v2_params.ord |=
+					htons(MPA_V2_RDMA_READ_RTR);
+		}
+		memcpy(mpa->private_data, &mpa_v2_params,
+		       sizeof(struct mpa_v2_conn_params));
+
+		if (ep->plen)
+			memcpy(mpa->private_data +
+			       sizeof(struct mpa_v2_conn_params),
+			       ep->mpa_pkt + sizeof(*mpa), ep->plen);
+	} else
+		if (ep->plen)
+			memcpy(mpa->private_data,
+					ep->mpa_pkt + sizeof(*mpa), ep->plen);
 
 	/*
 	 * Reference the mpa skb.  This ensures the data area
@@ -562,10 +594,13 @@
 	struct fw_ofld_tx_data_wr *req;
 	struct mpa_message *mpa;
 	struct sk_buff *skb;
+	struct mpa_v2_conn_params mpa_v2_params;
 
 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
 
 	mpalen = sizeof(*mpa) + plen;
+	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
+		mpalen += sizeof(struct mpa_v2_conn_params);
 	wrlen = roundup(mpalen + sizeof *req, 16);
 
 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -595,8 +630,29 @@
 	mpa->flags = MPA_REJECT;
 	mpa->revision = mpa_rev;
 	mpa->private_data_size = htons(plen);
-	if (plen)
-		memcpy(mpa->private_data, pdata, plen);
+
+	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+		mpa->private_data_size +=
+			htons(sizeof(struct mpa_v2_conn_params));
+		mpa_v2_params.ird = htons(((u16)ep->ird) |
+					  (peer2peer ? MPA_V2_PEER2PEER_MODEL :
+					   0));
+		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
+					  (p2p_type ==
+					   FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
+					   MPA_V2_RDMA_WRITE_RTR : p2p_type ==
+					   FW_RI_INIT_P2PTYPE_READ_REQ ?
+					   MPA_V2_RDMA_READ_RTR : 0) : 0));
+		memcpy(mpa->private_data, &mpa_v2_params,
+		       sizeof(struct mpa_v2_conn_params));
+
+		if (ep->plen)
+			memcpy(mpa->private_data +
+			       sizeof(struct mpa_v2_conn_params), pdata, plen);
+	} else
+		if (plen)
+			memcpy(mpa->private_data, pdata, plen);
 
 	/*
 	 * Reference the mpa skb again.  This ensures the data area
@@ -617,10 +673,13 @@
 	struct fw_ofld_tx_data_wr *req;
 	struct mpa_message *mpa;
 	struct sk_buff *skb;
+	struct mpa_v2_conn_params mpa_v2_params;
 
 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
 
 	mpalen = sizeof(*mpa) + plen;
+	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
+		mpalen += sizeof(struct mpa_v2_conn_params);
 	wrlen = roundup(mpalen + sizeof *req, 16);
 
 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -649,10 +708,36 @@
 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
 		     (markers_enabled ? MPA_MARKERS : 0);
-	mpa->revision = mpa_rev;
+	mpa->revision = ep->mpa_attr.version;
 	mpa->private_data_size = htons(plen);
-	if (plen)
-		memcpy(mpa->private_data, pdata, plen);
+
+	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+		mpa->private_data_size +=
+			htons(sizeof(struct mpa_v2_conn_params));
+		mpa_v2_params.ird = htons((u16)ep->ird);
+		mpa_v2_params.ord = htons((u16)ep->ord);
+		if (peer2peer && (ep->mpa_attr.p2p_type !=
+					FW_RI_INIT_P2PTYPE_DISABLED)) {
+			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
+
+			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
+				mpa_v2_params.ord |=
+					htons(MPA_V2_RDMA_WRITE_RTR);
+			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
+				mpa_v2_params.ord |=
+					htons(MPA_V2_RDMA_READ_RTR);
+		}
+
+		memcpy(mpa->private_data, &mpa_v2_params,
+		       sizeof(struct mpa_v2_conn_params));
+
+		if (ep->plen)
+			memcpy(mpa->private_data +
+			       sizeof(struct mpa_v2_conn_params), pdata, plen);
+	} else
+		if (plen)
+			memcpy(mpa->private_data, pdata, plen);
 
 	/*
 	 * Reference the mpa skb.  This ensures the data area
@@ -695,7 +780,10 @@
 
 	/* start MPA negotiation */
 	send_flowc(ep, NULL);
-	send_mpa_req(ep, skb);
+	if (ep->retry_with_mpa_v1)
+		send_mpa_req(ep, skb, 1);
+	else
+		send_mpa_req(ep, skb, mpa_rev);
 
 	return 0;
 }
@@ -769,8 +857,19 @@
 	event.remote_addr = ep->com.remote_addr;
 
 	if ((status == 0) || (status == -ECONNREFUSED)) {
-		event.private_data_len = ep->plen;
-		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+		if (!ep->tried_with_mpa_v1) {
+			/* this means MPA_v2 is used */
+			event.private_data_len = ep->plen -
+				sizeof(struct mpa_v2_conn_params);
+			event.private_data = ep->mpa_pkt +
+				sizeof(struct mpa_message) +
+				sizeof(struct mpa_v2_conn_params);
+		} else {
+			/* this means MPA_v1 is used */
+			event.private_data_len = ep->plen;
+			event.private_data = ep->mpa_pkt +
+				sizeof(struct mpa_message);
+		}
 	}
 
 	PDBG("%s ep %p tid %u status %d\n", __func__, ep,
@@ -793,9 +892,22 @@
 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
 	event.local_addr = ep->com.local_addr;
 	event.remote_addr = ep->com.remote_addr;
-	event.private_data_len = ep->plen;
-	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 	event.provider_data = ep;
+	if (!ep->tried_with_mpa_v1) {
+		/* this means MPA_v2 is used */
+		event.ord = ep->ord;
+		event.ird = ep->ird;
+		event.private_data_len = ep->plen -
+			sizeof(struct mpa_v2_conn_params);
+		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
+			sizeof(struct mpa_v2_conn_params);
+	} else {
+		/* this means MPA_v1 is used. Send max supported */
+		event.ord = c4iw_max_read_depth;
+		event.ird = c4iw_max_read_depth;
+		event.private_data_len = ep->plen;
+		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+	}
 	if (state_read(&ep->parent_ep->com) != DEAD) {
 		c4iw_get_ep(&ep->com);
 		ep->parent_ep->com.cm_id->event_handler(
@@ -813,6 +925,8 @@
 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_ESTABLISHED;
+	event.ird = ep->ird;
+	event.ord = ep->ord;
 	if (ep->com.cm_id) {
 		PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -848,7 +962,10 @@
 static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
 {
 	struct mpa_message *mpa;
+	struct mpa_v2_conn_params *mpa_v2_params;
 	u16 plen;
+	u16 resp_ird, resp_ord;
+	u8 rtr_mismatch = 0, insuff_ird = 0;
 	struct c4iw_qp_attributes attrs;
 	enum c4iw_qp_attr_mask mask;
 	int err;
@@ -888,7 +1005,9 @@
 	mpa = (struct mpa_message *) ep->mpa_pkt;
 
 	/* Validate MPA header. */
-	if (mpa->revision != mpa_rev) {
+	if (mpa->revision > mpa_rev) {
+		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
+		       " Received = %d\n", __func__, mpa_rev, mpa->revision);
 		err = -EPROTO;
 		goto err;
 	}
@@ -938,13 +1057,66 @@
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
-	ep->mpa_attr.version = mpa_rev;
-	ep->mpa_attr.p2p_type = peer2peer ? p2p_type :
-					    FW_RI_INIT_P2PTYPE_DISABLED;
+	ep->mpa_attr.version = mpa->revision;
+	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+
+	if (mpa->revision == 2) {
+		ep->mpa_attr.enhanced_rdma_conn =
+			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
+		if (ep->mpa_attr.enhanced_rdma_conn) {
+			mpa_v2_params = (struct mpa_v2_conn_params *)
+				(ep->mpa_pkt + sizeof(*mpa));
+			resp_ird = ntohs(mpa_v2_params->ird) &
+				MPA_V2_IRD_ORD_MASK;
+			resp_ord = ntohs(mpa_v2_params->ord) &
+				MPA_V2_IRD_ORD_MASK;
+
+			/*
+			 * This is a double-check. Ideally, below checks are
+			 * not required since ird/ord stuff has been taken
+			 * care of in c4iw_accept_cr
+			 */
+			if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
+				err = -ENOMEM;
+				ep->ird = resp_ord;
+				ep->ord = resp_ird;
+				insuff_ird = 1;
+			}
+
+			if (ntohs(mpa_v2_params->ird) &
+					MPA_V2_PEER2PEER_MODEL) {
+				if (ntohs(mpa_v2_params->ord) &
+						MPA_V2_RDMA_WRITE_RTR)
+					ep->mpa_attr.p2p_type =
+						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
+				else if (ntohs(mpa_v2_params->ord) &
+						MPA_V2_RDMA_READ_RTR)
+					ep->mpa_attr.p2p_type =
+						FW_RI_INIT_P2PTYPE_READ_REQ;
+			}
+		}
+	} else if (mpa->revision == 1)
+		if (peer2peer)
+			ep->mpa_attr.p2p_type = p2p_type;
+
 	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
-	     "xmit_marker_enabled=%d, version=%d\n", __func__,
-	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
-	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+	     "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
+	     "%d\n", __func__, ep->mpa_attr.crc_enabled,
+	     ep->mpa_attr.recv_marker_enabled,
+	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+	     ep->mpa_attr.p2p_type, p2p_type);
+
+	/*
+	 * If responder's RTR does not match with that of initiator, assign
+	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
+	 * generated when moving QP to RTS state.
+	 * A TERM message will be sent after QP has moved to RTS state
+	 */
+	if ((ep->mpa_attr.version == 2) &&
+			(ep->mpa_attr.p2p_type != p2p_type)) {
+		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+		rtr_mismatch = 1;
+	}
 
 	attrs.mpa_attr = ep->mpa_attr;
 	attrs.max_ird = ep->ird;
@@ -961,6 +1133,39 @@
 			     ep->com.qp, mask, &attrs, 1);
 	if (err)
 		goto err;
+
+	/*
+	 * If responder's RTR requirement did not match with what initiator
+	 * supports, generate TERM message
+	 */
+	if (rtr_mismatch) {
+		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
+		attrs.layer_etype = LAYER_MPA | DDP_LLP;
+		attrs.ecode = MPA_NOMATCH_RTR;
+		attrs.next_state = C4IW_QP_STATE_TERMINATE;
+		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+				C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Generate TERM if initiator IRD is not sufficient for responder
+	 * provided ORD. Currently, we do the same behaviour even when
+	 * responder provided IRD is also not sufficient as regards to
+	 * initiator ORD.
+	 */
+	if (insuff_ird) {
+		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
+				__func__);
+		attrs.layer_etype = LAYER_MPA | DDP_LLP;
+		attrs.ecode = MPA_INSUFF_IRD;
+		attrs.next_state = C4IW_QP_STATE_TERMINATE;
+		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+				C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+		err = -ENOMEM;
+		goto out;
+	}
 	goto out;
 err:
 	state_set(&ep->com, ABORTING);
@@ -973,6 +1178,7 @@
 static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
 {
 	struct mpa_message *mpa;
+	struct mpa_v2_conn_params *mpa_v2_params;
 	u16 plen;
 
 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
@@ -1013,7 +1219,9 @@
 	/*
 	 * Validate MPA Header.
 	 */
-	if (mpa->revision != mpa_rev) {
+	if (mpa->revision > mpa_rev) {
+		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
+		       " Received = %d\n", __func__, mpa_rev, mpa->revision);
 		abort_connection(ep, skb, GFP_KERNEL);
 		return;
 	}
@@ -1056,9 +1264,37 @@
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
-	ep->mpa_attr.version = mpa_rev;
-	ep->mpa_attr.p2p_type = peer2peer ? p2p_type :
-					    FW_RI_INIT_P2PTYPE_DISABLED;
+	ep->mpa_attr.version = mpa->revision;
+	if (mpa->revision == 1)
+		ep->tried_with_mpa_v1 = 1;
+	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+
+	if (mpa->revision == 2) {
+		ep->mpa_attr.enhanced_rdma_conn =
+			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
+		if (ep->mpa_attr.enhanced_rdma_conn) {
+			mpa_v2_params = (struct mpa_v2_conn_params *)
+				(ep->mpa_pkt + sizeof(*mpa));
+			ep->ird = ntohs(mpa_v2_params->ird) &
+				MPA_V2_IRD_ORD_MASK;
+			ep->ord = ntohs(mpa_v2_params->ord) &
+				MPA_V2_IRD_ORD_MASK;
+			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
+				if (peer2peer) {
+					if (ntohs(mpa_v2_params->ord) &
+							MPA_V2_RDMA_WRITE_RTR)
+						ep->mpa_attr.p2p_type =
+						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
+					else if (ntohs(mpa_v2_params->ord) &
+							MPA_V2_RDMA_READ_RTR)
+						ep->mpa_attr.p2p_type =
+						FW_RI_INIT_P2PTYPE_READ_REQ;
+				}
+		}
+	} else if (mpa->revision == 1)
+		if (peer2peer)
+			ep->mpa_attr.p2p_type = p2p_type;
+
 	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
 	     "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
 	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
@@ -1550,6 +1786,112 @@
 	       status == CPL_ERR_PERSIST_NEG_ADVICE;
 }
 
+static int c4iw_reconnect(struct c4iw_ep *ep)
+{
+	int err = 0;
+	struct rtable *rt;
+	struct net_device *pdev;
+	struct neighbour *neigh;
+	int step;
+
+	PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
+	init_timer(&ep->timer);
+
+	/*
+	 * Allocate an active TID to initiate a TCP connection.
+	 */
+	ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
+	if (ep->atid == -1) {
+		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
+		err = -ENOMEM;
+		goto fail2;
+	}
+
+	/* find a route */
+	rt = find_route(ep->com.dev,
+			ep->com.cm_id->local_addr.sin_addr.s_addr,
+			ep->com.cm_id->remote_addr.sin_addr.s_addr,
+			ep->com.cm_id->local_addr.sin_port,
+			ep->com.cm_id->remote_addr.sin_port, 0);
+	if (!rt) {
+		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+		err = -EHOSTUNREACH;
+		goto fail3;
+	}
+	ep->dst = &rt->dst;
+
+	neigh = dst_get_neighbour(ep->dst);
+
+	/* get a l2t entry */
+	if (neigh->dev->flags & IFF_LOOPBACK) {
+		PDBG("%s LOOPBACK\n", __func__);
+		pdev = ip_dev_find(&init_net,
+				   ep->com.cm_id->remote_addr.sin_addr.s_addr);
+		ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
+					neigh, pdev, 0);
+		ep->mtu = pdev->mtu;
+		ep->tx_chan = cxgb4_port_chan(pdev);
+		ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+		step = ep->com.dev->rdev.lldi.ntxq /
+			ep->com.dev->rdev.lldi.nchan;
+		ep->txq_idx = cxgb4_port_idx(pdev) * step;
+		step = ep->com.dev->rdev.lldi.nrxq /
+			ep->com.dev->rdev.lldi.nchan;
+		ep->ctrlq_idx = cxgb4_port_idx(pdev);
+		ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
+			cxgb4_port_idx(pdev) * step];
+		dev_put(pdev);
+	} else {
+		ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
+					neigh, neigh->dev, 0);
+		ep->mtu = dst_mtu(ep->dst);
+		ep->tx_chan = cxgb4_port_chan(neigh->dev);
+		ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
+		step = ep->com.dev->rdev.lldi.ntxq /
+			ep->com.dev->rdev.lldi.nchan;
+		ep->txq_idx = cxgb4_port_idx(neigh->dev) * step;
+		ep->ctrlq_idx = cxgb4_port_idx(neigh->dev);
+		step = ep->com.dev->rdev.lldi.nrxq /
+			ep->com.dev->rdev.lldi.nchan;
+		ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
+			cxgb4_port_idx(neigh->dev) * step];
+	}
+	if (!ep->l2t) {
+		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
+		err = -ENOMEM;
+		goto fail4;
+	}
+
+	PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
+	     __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
+	     ep->l2t->idx);
+
+	state_set(&ep->com, CONNECTING);
+	ep->tos = 0;
+
+	/* send connect request to rnic */
+	err = send_connect(ep);
+	if (!err)
+		goto out;
+
+	cxgb4_l2t_release(ep->l2t);
+fail4:
+	dst_release(ep->dst);
+fail3:
+	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
+fail2:
+	/*
+	 * remember to send notification to upper layer.
+	 * We are in here so the upper layer is not aware that this is
+	 * re-connect attempt and so, upper layer is still waiting for
+	 * response of 1st connect request.
+	 */
+	connect_reply_upcall(ep, -ECONNRESET);
+	c4iw_put_ep(&ep->com);
+out:
+	return err;
+}
+
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct cpl_abort_req_rss *req = cplhdr(skb);
@@ -1573,8 +1915,11 @@
 
 	/*
 	 * Wake up any threads in rdma_init() or rdma_fini().
+	 * However, this is not needed if com state is just
+	 * MPA_REQ_SENT
 	 */
-	c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+	if (ep->com.state != MPA_REQ_SENT)
+		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
 
 	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
@@ -1585,7 +1930,21 @@
 		break;
 	case MPA_REQ_SENT:
 		stop_ep_timer(ep);
-		connect_reply_upcall(ep, -ECONNRESET);
+		if (mpa_rev == 2 && ep->tried_with_mpa_v1)
+			connect_reply_upcall(ep, -ECONNRESET);
+		else {
+			/*
+			 * we just don't send notification upwards because we
+			 * want to retry with mpa_v1 without upper layers even
+			 * knowing it.
+			 *
+			 * do some housekeeping so as to re-initiate the
+			 * connection
+			 */
+			PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
+			     mpa_rev);
+			ep->retry_with_mpa_v1 = 1;
+		}
 		break;
 	case MPA_REP_SENT:
 		break;
@@ -1621,7 +1980,9 @@
 	dst_confirm(ep->dst);
 	if (ep->com.state != ABORTING) {
 		__state_set(&ep->com, DEAD);
-		release = 1;
+		/* we don't release if we want to retry with mpa_v1 */
+		if (!ep->retry_with_mpa_v1)
+			release = 1;
 	}
 	mutex_unlock(&ep->com.mutex);
 
@@ -1641,6 +2002,15 @@
 out:
 	if (release)
 		release_ep_resources(ep);
+
+	/* retry with mpa-v1 */
+	if (ep && ep->retry_with_mpa_v1) {
+		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
+		dst_release(ep->dst);
+		cxgb4_l2t_release(ep->l2t);
+		c4iw_reconnect(ep);
+	}
+
 	return 0;
 }
 
@@ -1792,18 +2162,40 @@
 		goto err;
 	}
 
-	cm_id->add_ref(cm_id);
-	ep->com.cm_id = cm_id;
-	ep->com.qp = qp;
+	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+		if (conn_param->ord > ep->ird) {
+			ep->ird = conn_param->ird;
+			ep->ord = conn_param->ord;
+			send_mpa_reject(ep, conn_param->private_data,
+					conn_param->private_data_len);
+			abort_connection(ep, NULL, GFP_KERNEL);
+			err = -ENOMEM;
+			goto err;
+		}
+		if (conn_param->ird > ep->ord) {
+			if (!ep->ord)
+				conn_param->ird = 1;
+			else {
+				abort_connection(ep, NULL, GFP_KERNEL);
+				err = -ENOMEM;
+				goto err;
+			}
+		}
 
+	}
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
 
-	if (peer2peer && ep->ird == 0)
-		ep->ird = 1;
+	if (ep->mpa_attr.version != 2)
+		if (peer2peer && ep->ird == 0)
+			ep->ird = 1;
 
 	PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
 
+	cm_id->add_ref(cm_id);
+	ep->com.cm_id = cm_id;
+	ep->com.qp = qp;
+
 	/* bind QP to EP and move to RTS */
 	attrs.mpa_attr = ep->mpa_attr;
 	attrs.max_ird = ep->ird;
@@ -1944,6 +2336,8 @@
 		       ep->com.dev->rdev.lldi.nchan;
 		ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
 			      cxgb4_port_idx(neigh->dev) * step];
+		ep->retry_with_mpa_v1 = 0;
+		ep->tried_with_mpa_v1 = 0;
 	}
 	if (!ep->l2t) {
 		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
@@ -2323,8 +2717,11 @@
 
 	/*
 	 * Wake up any threads in rdma_init() or rdma_fini().
+	 * However, this is not needed if com state is just
+	 * MPA_REQ_SENT
 	 */
-	c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+	if (ep->com.state != MPA_REQ_SENT)
+		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
 	sched(dev, skb);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 1720dc7..f35a935 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -185,7 +185,7 @@
 				 V_CQE_OPCODE(FW_RI_SEND) |
 				 V_CQE_TYPE(0) |
 				 V_CQE_SWCQE(1) |
-				 V_CQE_QPID(wq->rq.qid));
+				 V_CQE_QPID(wq->sq.qid));
 	cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
 	cq->sw_queue[cq->sw_pidx] = cqe;
 	t4_swcq_produce(cq);
@@ -818,6 +818,7 @@
 	chp->cq.size--;				/* status page */
 	chp->ibcq.cqe = entries - 2;
 	spin_lock_init(&chp->lock);
+	spin_lock_init(&chp->comp_handler_lock);
 	atomic_set(&chp->refcnt, 1);
 	init_waitqueue_head(&chp->wait);
 	ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 40a13cc..6d0df6e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -376,10 +376,8 @@
 	struct c4iw_dev *dev;
 };
 
-static void c4iw_remove(struct uld_ctx *ctx)
+static void c4iw_dealloc(struct uld_ctx *ctx)
 {
-	PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
-	c4iw_unregister_device(ctx->dev);
 	c4iw_rdev_close(&ctx->dev->rdev);
 	idr_destroy(&ctx->dev->cqidr);
 	idr_destroy(&ctx->dev->qpidr);
@@ -389,11 +387,30 @@
 	ctx->dev = NULL;
 }
 
+static void c4iw_remove(struct uld_ctx *ctx)
+{
+	PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
+	c4iw_unregister_device(ctx->dev);
+	c4iw_dealloc(ctx);
+}
+
+static int rdma_supported(const struct cxgb4_lld_info *infop)
+{
+	return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
+	       infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
+	       infop->vr->cq.size > 0 && infop->vr->ocq.size > 0;
+}
+
 static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 {
 	struct c4iw_dev *devp;
 	int ret;
 
+	if (!rdma_supported(infop)) {
+		printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
+		       pci_name(infop->pdev));
+		return ERR_PTR(-ENOSYS);
+	}
 	devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
 	if (!devp) {
 		printk(KERN_ERR MOD "Cannot allocate ib device\n");
@@ -414,7 +431,6 @@
 
 	ret = c4iw_rdev_open(&devp->rdev);
 	if (ret) {
-		mutex_unlock(&dev_mutex);
 		printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
 		ib_dealloc_device(&devp->ibdev);
 		return ERR_PTR(ret);
@@ -519,15 +535,24 @@
 	case CXGB4_STATE_UP:
 		printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
 		if (!ctx->dev) {
-			int ret = 0;
+			int ret;
 
 			ctx->dev = c4iw_alloc(&ctx->lldi);
-			if (!IS_ERR(ctx->dev))
-				ret = c4iw_register_device(ctx->dev);
-			if (IS_ERR(ctx->dev) || ret)
+			if (IS_ERR(ctx->dev)) {
+				printk(KERN_ERR MOD
+				       "%s: initialization failed: %ld\n",
+				       pci_name(ctx->lldi.pdev),
+				       PTR_ERR(ctx->dev));
+				ctx->dev = NULL;
+				break;
+			}
+			ret = c4iw_register_device(ctx->dev);
+			if (ret) {
 				printk(KERN_ERR MOD
 				       "%s: RDMA registration failed: %d\n",
 				       pci_name(ctx->lldi.pdev), ret);
+				c4iw_dealloc(ctx);
+			}
 		}
 		break;
 	case CXGB4_STATE_DOWN:
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index c13041a..397cb36 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -42,6 +42,7 @@
 {
 	struct ib_event event;
 	struct c4iw_qp_attributes attrs;
+	unsigned long flag;
 
 	if ((qhp->attr.state == C4IW_QP_STATE_ERROR) ||
 	    (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) {
@@ -72,7 +73,9 @@
 	if (qhp->ibqp.event_handler)
 		(*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
 
+	spin_lock_irqsave(&chp->comp_handler_lock, flag);
 	(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+	spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
 }
 
 void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
@@ -183,11 +186,14 @@
 int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
 {
 	struct c4iw_cq *chp;
+	unsigned long flag;
 
 	chp = get_chp(dev, qid);
-	if (chp)
+	if (chp) {
+		spin_lock_irqsave(&chp->comp_handler_lock, flag);
 		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
-	else
+		spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
+	} else
 		PDBG("%s unknown cqid 0x%x\n", __func__, qid);
 	return 0;
 }
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 4f04537..1357c5b 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -309,6 +309,7 @@
 	struct c4iw_dev *rhp;
 	struct t4_cq cq;
 	spinlock_t lock;
+	spinlock_t comp_handler_lock;
 	atomic_t refcnt;
 	wait_queue_head_t wait;
 };
@@ -323,6 +324,7 @@
 	u8 recv_marker_enabled;
 	u8 xmit_marker_enabled;
 	u8 crc_enabled;
+	u8 enhanced_rdma_conn;
 	u8 version;
 	u8 p2p_type;
 };
@@ -349,6 +351,8 @@
 	u8 is_terminate_local;
 	struct c4iw_mpa_attributes mpa_attr;
 	struct c4iw_ep *llp_stream_handle;
+	u8 layer_etype;
+	u8 ecode;
 };
 
 struct c4iw_qp {
@@ -501,11 +505,18 @@
 #define MPA_KEY_REP "MPA ID Rep Frame"
 
 #define MPA_MAX_PRIVATE_DATA	256
+#define MPA_ENHANCED_RDMA_CONN	0x10
 #define MPA_REJECT		0x20
 #define MPA_CRC			0x40
 #define MPA_MARKERS		0x80
 #define MPA_FLAGS_MASK		0xE0
 
+#define MPA_V2_PEER2PEER_MODEL          0x8000
+#define MPA_V2_ZERO_LEN_FPDU_RTR        0x4000
+#define MPA_V2_RDMA_WRITE_RTR           0x8000
+#define MPA_V2_RDMA_READ_RTR            0x4000
+#define MPA_V2_IRD_ORD_MASK             0x3FFF
+
 #define c4iw_put_ep(ep) { \
 	PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__,  \
 	     ep, atomic_read(&((ep)->kref.refcount))); \
@@ -528,6 +539,11 @@
 	u8 private_data[0];
 };
 
+struct mpa_v2_conn_params {
+	__be16 ird;
+	__be16 ord;
+};
+
 struct terminate_message {
 	u8 layer_etype;
 	u8 ecode;
@@ -580,7 +596,10 @@
 
 enum c4iw_mpa_ecodes {
 	MPA_CRC_ERR		= 0x02,
-	MPA_MARKER_ERR		= 0x03
+	MPA_MARKER_ERR          = 0x03,
+	MPA_LOCAL_CATA          = 0x05,
+	MPA_INSUFF_IRD          = 0x06,
+	MPA_NOMATCH_RTR         = 0x07,
 };
 
 enum c4iw_ep_state {
@@ -651,6 +670,8 @@
 	u16 txq_idx;
 	u16 ctrlq_idx;
 	u8 tos;
+	u8 retry_with_mpa_v1;
+	u8 tried_with_mpa_v1;
 };
 
 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index a41578e..d6ccc7e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -917,7 +917,11 @@
 	wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
 	wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
 	term = (struct terminate_message *)wqe->u.terminate.termmsg;
-	build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
+	if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
+		term->layer_etype = qhp->attr.layer_etype;
+		term->ecode = qhp->attr.ecode;
+	} else
+		build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
 	c4iw_ofld_send(&qhp->rhp->rdev, skb);
 }
 
@@ -941,8 +945,11 @@
 	flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
 	spin_unlock(&qhp->lock);
 	spin_unlock_irqrestore(&rchp->lock, flag);
-	if (flushed)
+	if (flushed) {
+		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+	}
 
 	/* locking hierarchy: cq lock first, then qp lock. */
 	spin_lock_irqsave(&schp->lock, flag);
@@ -952,13 +959,17 @@
 	flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count);
 	spin_unlock(&qhp->lock);
 	spin_unlock_irqrestore(&schp->lock, flag);
-	if (flushed)
+	if (flushed) {
+		spin_lock_irqsave(&schp->comp_handler_lock, flag);
 		(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+		spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+	}
 }
 
 static void flush_qp(struct c4iw_qp *qhp)
 {
 	struct c4iw_cq *rchp, *schp;
+	unsigned long flag;
 
 	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
 	schp = get_chp(qhp->rhp, qhp->attr.scq);
@@ -966,8 +977,16 @@
 	if (qhp->ibqp.uobject) {
 		t4_set_wq_in_error(&qhp->wq);
 		t4_set_cq_in_error(&rchp->cq);
-		if (schp != rchp)
+		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+		if (schp != rchp) {
 			t4_set_cq_in_error(&schp->cq);
+			spin_lock_irqsave(&schp->comp_handler_lock, flag);
+			(*schp->ibcq.comp_handler)(&schp->ibcq,
+					schp->ibcq.cq_context);
+			spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+		}
 		return;
 	}
 	__flush_qp(qhp, rchp, schp);
@@ -1012,6 +1031,7 @@
 
 static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
 {
+	PDBG("%s p2p_type = %d\n", __func__, p2p_type);
 	memset(&init->u, 0, sizeof init->u);
 	switch (p2p_type) {
 	case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
@@ -1206,12 +1226,16 @@
 				disconnect = 1;
 				c4iw_get_ep(&qhp->ep->com);
 			}
+			if (qhp->ibqp.uobject)
+				t4_set_wq_in_error(&qhp->wq);
 			ret = rdma_fini(rhp, qhp, ep);
 			if (ret)
 				goto err;
 			break;
 		case C4IW_QP_STATE_TERMINATE:
 			set_state(qhp, C4IW_QP_STATE_TERMINATE);
+			qhp->attr.layer_etype = attrs->layer_etype;
+			qhp->attr.ecode = attrs->ecode;
 			if (qhp->ibqp.uobject)
 				t4_set_wq_in_error(&qhp->wq);
 			ep = qhp->ep;
@@ -1222,6 +1246,8 @@
 			break;
 		case C4IW_QP_STATE_ERROR:
 			set_state(qhp, C4IW_QP_STATE_ERROR);
+			if (qhp->ibqp.uobject)
+				t4_set_wq_in_error(&qhp->wq);
 			if (!internal) {
 				abort = 1;
 				disconnect = 1;
@@ -1334,7 +1360,10 @@
 	rhp = qhp->rhp;
 
 	attrs.next_state = C4IW_QP_STATE_ERROR;
-	c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+	if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
+		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+	else
+		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
 	wait_event(qhp->wait, !qhp->ep);
 
 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index d9b1bb4..818d721 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -125,7 +125,7 @@
 		tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
 
 		ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
-					  IRQF_DISABLED, "ehca_eq",
+					  0, "ehca_eq",
 					  (void *)shca);
 		if (ret < 0)
 			ehca_err(ib_dev, "Can't map interrupt handler.");
@@ -133,7 +133,7 @@
 		tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
 
 		ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
-					  IRQF_DISABLED, "ehca_neq",
+					  0, "ehca_neq",
 					  (void *)shca);
 		if (ret < 0)
 			ehca_err(ib_dev, "Can't map interrupt handler.");
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 32fb342..964f855 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -977,6 +977,9 @@
 	struct hcp_modify_qp_control_block *mqpcb;
 	u64 hret, update_mask;
 
+	if (srq_init_attr->srq_type != IB_SRQT_BASIC)
+		return ERR_PTR(-ENOSYS);
+
 	/* For common attributes, internal_create_qp() takes its info
 	 * out of qp_init_attr, so copy all common attrs there.
 	 */
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 7c1eebe..824a4d5 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -34,6 +34,7 @@
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/slab.h>
+#include <linux/stat.h>
 #include <linux/vmalloc.h>
 
 #include "ipath_kernel.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 386e2c7..2627198 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -107,6 +107,11 @@
 	u32 sz;
 	struct ib_srq *ret;
 
+	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+		ret = ERR_PTR(-ENOSYS);
+		goto done;
+	}
+
 	if (srq_init_attr->attr.max_wr == 0) {
 		ret = ERR_PTR(-EINVAL);
 		goto done;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index fa643f4..77f3dbc 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -128,6 +128,8 @@
 	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
 	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
 		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
+		props->device_cap_flags |= IB_DEVICE_XRC;
 
 	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 		0xffffff;
@@ -181,8 +183,12 @@
 
 static int ib_link_query_port(struct ib_device *ibdev, u8 port,
 			      struct ib_port_attr *props,
+			      struct ib_smp *in_mad,
 			      struct ib_smp *out_mad)
 {
+	int ext_active_speed;
+	int err;
+
 	props->lid		= be16_to_cpup((__be16 *) (out_mad->data + 16));
 	props->lmc		= out_mad->data[34] & 0x7;
 	props->sm_lid		= be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -203,6 +209,39 @@
 	props->max_vl_num	= out_mad->data[37] >> 4;
 	props->init_type_reply	= out_mad->data[41] >> 4;
 
+	/* Check if extended speeds (EDR/FDR/...) are supported */
+	if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
+		ext_active_speed = out_mad->data[62] >> 4;
+
+		switch (ext_active_speed) {
+		case 1:
+			props->active_speed = 16; /* FDR */
+			break;
+		case 2:
+			props->active_speed = 32; /* EDR */
+			break;
+		}
+	}
+
+	/* If reported active speed is QDR, check if is FDR-10 */
+	if (props->active_speed == 4) {
+		if (to_mdev(ibdev)->dev->caps.ext_port_cap[port] &
+		    MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
+			init_query_mad(in_mad);
+			in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
+			in_mad->attr_mod = cpu_to_be32(port);
+
+			err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port,
+					   NULL, NULL, in_mad, out_mad);
+			if (err)
+				return err;
+
+			/* Checking LinkSpeedActive for FDR-10 */
+			if (out_mad->data[15] & 0x1)
+				props->active_speed = 8;
+		}
+	}
+
 	return 0;
 }
 
@@ -227,7 +266,7 @@
 	props->pkey_tbl_len	= 1;
 	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
 	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
-	props->max_mtu		= IB_MTU_2048;
+	props->max_mtu		= IB_MTU_4096;
 	props->subnet_timeout	= 0;
 	props->max_vl_num	= out_mad->data[37] >> 4;
 	props->init_type_reply	= 0;
@@ -274,7 +313,7 @@
 		goto out;
 
 	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
-		ib_link_query_port(ibdev, port, props, out_mad) :
+		ib_link_query_port(ibdev, port, props, in_mad, out_mad) :
 		eth_link_query_port(ibdev, port, props, out_mad);
 
 out:
@@ -566,6 +605,57 @@
 	return 0;
 }
 
+static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
+					  struct ib_ucontext *context,
+					  struct ib_udata *udata)
+{
+	struct mlx4_ib_xrcd *xrcd;
+	int err;
+
+	if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+		return ERR_PTR(-ENOSYS);
+
+	xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
+	if (!xrcd)
+		return ERR_PTR(-ENOMEM);
+
+	err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
+	if (err)
+		goto err1;
+
+	xrcd->pd = ib_alloc_pd(ibdev);
+	if (IS_ERR(xrcd->pd)) {
+		err = PTR_ERR(xrcd->pd);
+		goto err2;
+	}
+
+	xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
+	if (IS_ERR(xrcd->cq)) {
+		err = PTR_ERR(xrcd->cq);
+		goto err3;
+	}
+
+	return &xrcd->ibxrcd;
+
+err3:
+	ib_dealloc_pd(xrcd->pd);
+err2:
+	mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
+err1:
+	kfree(xrcd);
+	return ERR_PTR(err);
+}
+
+static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
+{
+	ib_destroy_cq(to_mxrcd(xrcd)->cq);
+	ib_dealloc_pd(to_mxrcd(xrcd)->pd);
+	mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
+	kfree(xrcd);
+
+	return 0;
+}
+
 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
 {
 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
@@ -1044,7 +1134,9 @@
 		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
+		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
 
 	ibdev->ib_dev.query_device	= mlx4_ib_query_device;
 	ibdev->ib_dev.query_port	= mlx4_ib_query_port;
@@ -1093,6 +1185,14 @@
 	ibdev->ib_dev.unmap_fmr		= mlx4_ib_unmap_fmr;
 	ibdev->ib_dev.dealloc_fmr	= mlx4_ib_fmr_dealloc;
 
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
+		ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
+		ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
+		ibdev->ib_dev.uverbs_cmd_mask |=
+			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
+			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+	}
+
 	spin_lock_init(&iboe->lock);
 
 	if (init_node_data(ibdev))
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e4bf2cf..ed80345 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -56,6 +56,13 @@
 	u32			pdn;
 };
 
+struct mlx4_ib_xrcd {
+	struct ib_xrcd		ibxrcd;
+	u32			xrcdn;
+	struct ib_pd	       *pd;
+	struct ib_cq	       *cq;
+};
+
 struct mlx4_ib_cq_buf {
 	struct mlx4_buf		buf;
 	struct mlx4_mtt		mtt;
@@ -138,6 +145,7 @@
 	struct mlx4_mtt		mtt;
 	int			buf_size;
 	struct mutex		mutex;
+	u16			xrcdn;
 	u32			flags;
 	u8			port;
 	u8			alt_port;
@@ -211,6 +219,11 @@
 	return container_of(ibpd, struct mlx4_ib_pd, ibpd);
 }
 
+static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
+{
+	return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd);
+}
+
 static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
 {
 	return container_of(ibcq, struct mlx4_ib_cq, ibcq);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 3a91d9d..a16f0c8 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -302,15 +302,14 @@
 }
 
 static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
-		       int is_user, int has_srq, struct mlx4_ib_qp *qp)
+		       int is_user, int has_rq, struct mlx4_ib_qp *qp)
 {
 	/* Sanity check RQ size before proceeding */
 	if (cap->max_recv_wr  > dev->dev->caps.max_wqes  ||
 	    cap->max_recv_sge > dev->dev->caps.max_rq_sg)
 		return -EINVAL;
 
-	if (has_srq) {
-		/* QPs attached to an SRQ should have no RQ */
+	if (!has_rq) {
 		if (cap->max_recv_wr)
 			return -EINVAL;
 
@@ -463,6 +462,14 @@
 	return 0;
 }
 
+static int qp_has_rq(struct ib_qp_init_attr *attr)
+{
+	if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
+		return 0;
+
+	return !attr->srq;
+}
+
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
 			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
@@ -479,7 +486,7 @@
 	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
 		qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
 
-	err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
+	err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
 	if (err)
 		goto err;
 
@@ -513,7 +520,7 @@
 		if (err)
 			goto err_mtt;
 
-		if (!init_attr->srq) {
+		if (qp_has_rq(init_attr)) {
 			err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
 						  ucmd.db_addr, &qp->db);
 			if (err)
@@ -532,7 +539,7 @@
 		if (err)
 			goto err;
 
-		if (!init_attr->srq) {
+		if (qp_has_rq(init_attr)) {
 			err = mlx4_db_alloc(dev->dev, &qp->db, 0);
 			if (err)
 				goto err;
@@ -575,6 +582,9 @@
 	if (err)
 		goto err_qpn;
 
+	if (init_attr->qp_type == IB_QPT_XRC_TGT)
+		qp->mqp.qpn |= (1 << 23);
+
 	/*
 	 * Hardware wants QPN written in big-endian order (after
 	 * shifting) for send doorbell.  Precompute this value to save
@@ -592,9 +602,8 @@
 
 err_wrid:
 	if (pd->uobject) {
-		if (!init_attr->srq)
-			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
-					      &qp->db);
+		if (qp_has_rq(init_attr))
+			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
 	} else {
 		kfree(qp->sq.wrid);
 		kfree(qp->rq.wrid);
@@ -610,7 +619,7 @@
 		mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
 
 err_db:
-	if (!pd->uobject && !init_attr->srq)
+	if (!pd->uobject && qp_has_rq(init_attr))
 		mlx4_db_free(dev->dev, &qp->db);
 
 err:
@@ -671,6 +680,33 @@
 	}
 }
 
+static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
+{
+	if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
+		return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
+	else
+		return to_mpd(qp->ibqp.pd);
+}
+
+static void get_cqs(struct mlx4_ib_qp *qp,
+		    struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
+{
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_XRC_TGT:
+		*send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
+		*recv_cq = *send_cq;
+		break;
+	case IB_QPT_XRC_INI:
+		*send_cq = to_mcq(qp->ibqp.send_cq);
+		*recv_cq = *send_cq;
+		break;
+	default:
+		*send_cq = to_mcq(qp->ibqp.send_cq);
+		*recv_cq = to_mcq(qp->ibqp.recv_cq);
+		break;
+	}
+}
+
 static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 			      int is_user)
 {
@@ -682,8 +718,7 @@
 			printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
 			       qp->mqp.qpn);
 
-	send_cq = to_mcq(qp->ibqp.send_cq);
-	recv_cq = to_mcq(qp->ibqp.recv_cq);
+	get_cqs(qp, &send_cq, &recv_cq);
 
 	mlx4_ib_lock_cqs(send_cq, recv_cq);
 
@@ -706,7 +741,7 @@
 	mlx4_mtt_cleanup(dev->dev, &qp->mtt);
 
 	if (is_user) {
-		if (!qp->ibqp.srq)
+		if (qp->rq.wqe_cnt)
 			mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
 					      &qp->db);
 		ib_umem_release(qp->umem);
@@ -714,7 +749,7 @@
 		kfree(qp->sq.wrid);
 		kfree(qp->rq.wrid);
 		mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
-		if (!qp->ibqp.srq)
+		if (qp->rq.wqe_cnt)
 			mlx4_db_free(dev->dev, &qp->db);
 	}
 
@@ -725,10 +760,10 @@
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata)
 {
-	struct mlx4_ib_dev *dev = to_mdev(pd->device);
 	struct mlx4_ib_sqp *sqp;
 	struct mlx4_ib_qp *qp;
 	int err;
+	u16 xrcdn = 0;
 
 	/*
 	 * We only support LSO and multicast loopback blocking, and
@@ -739,10 +774,20 @@
 		return ERR_PTR(-EINVAL);
 
 	if (init_attr->create_flags &&
-	    (pd->uobject || init_attr->qp_type != IB_QPT_UD))
+	    (udata || init_attr->qp_type != IB_QPT_UD))
 		return ERR_PTR(-EINVAL);
 
 	switch (init_attr->qp_type) {
+	case IB_QPT_XRC_TGT:
+		pd = to_mxrcd(init_attr->xrcd)->pd;
+		xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
+		init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
+		/* fall through */
+	case IB_QPT_XRC_INI:
+		if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+			return ERR_PTR(-ENOSYS);
+		init_attr->recv_cq = init_attr->send_cq;
+		/* fall through */
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_UD:
@@ -751,13 +796,14 @@
 		if (!qp)
 			return ERR_PTR(-ENOMEM);
 
-		err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
+		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp);
 		if (err) {
 			kfree(qp);
 			return ERR_PTR(err);
 		}
 
 		qp->ibqp.qp_num = qp->mqp.qpn;
+		qp->xrcdn = xrcdn;
 
 		break;
 	}
@@ -765,7 +811,7 @@
 	case IB_QPT_GSI:
 	{
 		/* Userspace is not allowed to create special QPs: */
-		if (pd->uobject)
+		if (udata)
 			return ERR_PTR(-EINVAL);
 
 		sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
@@ -774,8 +820,8 @@
 
 		qp = &sqp->qp;
 
-		err = create_qp_common(dev, pd, init_attr, udata,
-				       dev->dev->caps.sqp_start +
+		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
+				       to_mdev(pd->device)->dev->caps.sqp_start +
 				       (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
 				       init_attr->port_num - 1,
 				       qp);
@@ -801,11 +847,13 @@
 {
 	struct mlx4_ib_dev *dev = to_mdev(qp->device);
 	struct mlx4_ib_qp *mqp = to_mqp(qp);
+	struct mlx4_ib_pd *pd;
 
 	if (is_qp0(dev, mqp))
 		mlx4_CLOSE_PORT(dev->dev, mqp->port);
 
-	destroy_qp_common(dev, mqp, !!qp->pd->uobject);
+	pd = get_pd(mqp);
+	destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
 
 	if (is_sqp(dev, mqp))
 		kfree(to_msqp(mqp));
@@ -821,6 +869,8 @@
 	case IB_QPT_RC:		return MLX4_QP_ST_RC;
 	case IB_QPT_UC:		return MLX4_QP_ST_UC;
 	case IB_QPT_UD:		return MLX4_QP_ST_UD;
+	case IB_QPT_XRC_INI:
+	case IB_QPT_XRC_TGT:	return MLX4_QP_ST_XRC;
 	case IB_QPT_SMI:
 	case IB_QPT_GSI:	return MLX4_QP_ST_MLX;
 	default:		return -1;
@@ -959,6 +1009,8 @@
 {
 	struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
+	struct mlx4_ib_pd *pd;
+	struct mlx4_ib_cq *send_cq, *recv_cq;
 	struct mlx4_qp_context *context;
 	enum mlx4_qp_optpar optpar = 0;
 	int sqd_event;
@@ -1014,8 +1066,10 @@
 		context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
 	context->sq_size_stride |= qp->sq.wqe_shift - 4;
 
-	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
 		context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
+		context->xrcd = cpu_to_be32((u32) qp->xrcdn);
+	}
 
 	if (qp->ibqp.uobject)
 		context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
@@ -1079,8 +1133,12 @@
 		optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
 	}
 
-	context->pd	    = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
-	context->params1    = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
+	pd = get_pd(qp);
+	get_cqs(qp, &send_cq, &recv_cq);
+	context->pd       = cpu_to_be32(pd->pdn);
+	context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
+	context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
+	context->params1  = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
 
 	/* Set "fast registration enabled" for all kernel QPs */
 	if (!qp->ibqp.uobject)
@@ -1106,8 +1164,6 @@
 	if (attr_mask & IB_QP_SQ_PSN)
 		context->next_send_psn = cpu_to_be32(attr->sq_psn);
 
-	context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
-
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
 		if (attr->max_dest_rd_atomic)
 			context->params2 |=
@@ -1130,8 +1186,6 @@
 	if (attr_mask & IB_QP_RQ_PSN)
 		context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
 
-	context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
-
 	if (attr_mask & IB_QP_QKEY) {
 		context->qkey = cpu_to_be32(attr->qkey);
 		optpar |= MLX4_QP_OPTPAR_Q_KEY;
@@ -1140,7 +1194,7 @@
 	if (ibqp->srq)
 		context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
 
-	if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+	if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
 		context->db_rec_addr = cpu_to_be64(qp->db.dma);
 
 	if (cur_state == IB_QPS_INIT &&
@@ -1225,17 +1279,17 @@
 	 * entries and reinitialize the QP.
 	 */
 	if (new_state == IB_QPS_RESET && !ibqp->uobject) {
-		mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
+		mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
 				 ibqp->srq ? to_msrq(ibqp->srq): NULL);
-		if (ibqp->send_cq != ibqp->recv_cq)
-			mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
+		if (send_cq != recv_cq)
+			mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
 
 		qp->rq.head = 0;
 		qp->rq.tail = 0;
 		qp->sq.head = 0;
 		qp->sq.tail = 0;
 		qp->sq_next_wqe = 0;
-		if (!ibqp->srq)
+		if (qp->rq.wqe_cnt)
 			*qp->db.db  = 0;
 	}
 
@@ -1547,14 +1601,13 @@
 }
 
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
-			     struct ib_send_wr *wr, __be16 *vlan)
+			     struct ib_send_wr *wr)
 {
 	memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
 	dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
 	dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
 	dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
 	memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
-	*vlan = dseg->vlan;
 }
 
 static void set_mlx_icrc_seg(void *dseg)
@@ -1657,7 +1710,6 @@
 	__be32 uninitialized_var(lso_hdr_sz);
 	__be32 blh;
 	int i;
-	__be16 vlan = cpu_to_be16(0xffff);
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
@@ -1761,7 +1813,7 @@
 			break;
 
 		case IB_QPT_UD:
-			set_datagram_seg(wqe, wr, &vlan);
+			set_datagram_seg(wqe, wr);
 			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
@@ -1824,11 +1876,6 @@
 		ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
 				    MLX4_WQE_CTRL_FENCE : 0) | size;
 
-		if (be16_to_cpu(vlan) < 0x1000) {
-			ctrl->ins_vlan = 1 << 6;
-			ctrl->vlan_tag = vlan;
-		}
-
 		/*
 		 * Make sure descriptor is fully written before
 		 * setting ownership bit (because HW can start
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 818b7ec..39542f3 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -76,6 +76,8 @@
 	struct mlx4_ib_srq *srq;
 	struct mlx4_wqe_srq_next_seg *next;
 	struct mlx4_wqe_data_seg *scatter;
+	u32 cqn;
+	u16 xrcdn;
 	int desc_size;
 	int buf_size;
 	int err;
@@ -174,12 +176,18 @@
 		}
 	}
 
-	err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
+	cqn = (init_attr->srq_type == IB_SRQT_XRC) ?
+		to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
+	xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
+		to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
+		(u16) dev->dev->caps.reserved_xrcds;
+	err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
 			     srq->db.dma, &srq->msrq);
 	if (err)
 		goto err_wrid;
 
 	srq->msrq.event = mlx4_ib_srq_event;
+	srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
 
 	if (pd->uobject)
 		if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 365fe0e..cb9a0b9 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -438,6 +438,9 @@
 	struct mthca_srq *srq;
 	int err;
 
+	if (init_attr->srq_type != IB_SRQT_BASIC)
+		return ERR_PTR(-ENOSYS);
+
 	srq = kmalloc(sizeof *srq, GFP_KERNEL);
 	if (!srq)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile
index 3514851..97820c2 100644
--- a/drivers/infiniband/hw/nes/Makefile
+++ b/drivers/infiniband/hw/nes/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o
 
-iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o
+iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 2d668c6..5965b3d 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -84,7 +84,7 @@
 MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection");
 
 
-unsigned int nes_drv_opt = 0;
+unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU;
 module_param(nes_drv_opt, int, 0644);
 MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
 
@@ -130,9 +130,6 @@
 	.notifier_call = nes_net_event
 };
 
-
-
-
 /**
  * nes_inetaddr_event
  */
@@ -321,6 +318,9 @@
 	}
 
 	if (atomic_dec_and_test(&nesqp->refcount)) {
+		if (nesqp->pau_mode)
+			nes_destroy_pau_qp(nesdev, nesqp);
+
 		/* Destroy the QP */
 		cqp_request = nes_get_cqp_request(nesdev);
 		if (cqp_request == NULL) {
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 6fe7987..568b4f1 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -102,6 +102,7 @@
 #define NES_DRV_OPT_NO_INLINE_DATA       0x00000080
 #define NES_DRV_OPT_DISABLE_INT_MOD      0x00000100
 #define NES_DRV_OPT_DISABLE_VIRT_WQ      0x00000200
+#define NES_DRV_OPT_ENABLE_PAU           0x00000400
 
 #define NES_AEQ_EVENT_TIMEOUT         2500
 #define NES_DISCONNECT_EVENT_TIMEOUT  2000
@@ -128,6 +129,7 @@
 #define NES_DBG_IW_RX       0x00020000
 #define NES_DBG_IW_TX       0x00040000
 #define NES_DBG_SHUTDOWN    0x00080000
+#define NES_DBG_PAU         0x00100000
 #define NES_DBG_RSVD1       0x10000000
 #define NES_DBG_RSVD2       0x20000000
 #define NES_DBG_RSVD3       0x40000000
@@ -162,6 +164,7 @@
 #include "nes_context.h"
 #include "nes_user.h"
 #include "nes_cm.h"
+#include "nes_mgt.h"
 
 extern int max_mtu;
 #define max_frame_len (max_mtu+ETH_HLEN)
@@ -202,6 +205,8 @@
 extern atomic_t cm_nodes_destroyed;
 extern atomic_t cm_accel_dropped_pkts;
 extern atomic_t cm_resets_recvd;
+extern atomic_t pau_qps_created;
+extern atomic_t pau_qps_destroyed;
 
 extern u32 int_mod_timer_init;
 extern u32 int_mod_cq_depth_256;
@@ -273,6 +278,14 @@
 	u8                     link_recheck;
 };
 
+/* Receive skb private area - must fit in skb->cb area */
+struct nes_rskb_cb {
+	u64                    busaddr;
+	u32                    maplen;
+	u32                    seqnum;
+	u8                     *data_start;
+	struct nes_qp          *nesqp;
+};
 
 static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad)
 {
@@ -305,8 +318,8 @@
 static inline void
 nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev)
 {
-	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_CTX_LOW_IDX,
-			(u64)((unsigned long) &nesdev->cqp));
+	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX]       = 0;
+	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX]      = 0;
 	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]   = 0;
 	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]  = 0;
 	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0;
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index c118663..dfce9ea 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -77,26 +77,19 @@
 atomic_t cm_accel_dropped_pkts;
 atomic_t cm_resets_recvd;
 
-static inline int mini_cm_accelerated(struct nes_cm_core *,
-	struct nes_cm_node *);
-static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *,
-	struct nes_vnic *, struct nes_cm_info *);
+static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *);
+static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *);
 static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
-static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *,
-	struct nes_vnic *, u16, void *, struct nes_cm_info *);
+static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *);
 static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
-static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
-	struct nes_cm_node *);
-static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
-	struct nes_cm_node *);
-static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
-	struct sk_buff *);
+static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *);
+static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *);
 static int mini_cm_dealloc_core(struct nes_cm_core *);
 static int mini_cm_get(struct nes_cm_core *);
 static int mini_cm_set(struct nes_cm_core *, u32, u32);
 
-static void form_cm_frame(struct sk_buff *, struct nes_cm_node *,
-	void *, u32, void *, u32, u8);
+static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8);
 static int add_ref_cm_node(struct nes_cm_node *);
 static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
 
@@ -111,16 +104,14 @@
 static int send_reset(struct nes_cm_node *, struct sk_buff *);
 static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb);
 static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
-static void process_packet(struct nes_cm_node *, struct sk_buff *,
-	struct nes_cm_core *);
+static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *);
 
 static void active_open_err(struct nes_cm_node *, struct sk_buff *, int);
 static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int);
 static void cleanup_retrans_entry(struct nes_cm_node *);
 static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *);
 static void free_retrans_entry(struct nes_cm_node *cm_node);
-static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
-	struct sk_buff *skb, int optionsize, int passive);
+static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive);
 
 /* CM event handler functions */
 static void cm_event_connected(struct nes_cm_event *);
@@ -130,6 +121,12 @@
 static void cm_event_mpa_reject(struct nes_cm_event *);
 static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node);
 
+/* MPA build functions */
+static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8);
+static void build_mpa_v2(struct nes_cm_node *, void *, u8);
+static void build_mpa_v1(struct nes_cm_node *, void *, u8);
+static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
+
 static void print_core(struct nes_cm_core *core);
 
 /* External CM API Interface */
@@ -159,12 +156,21 @@
 atomic_t cm_connect_reqs;
 atomic_t cm_rejects;
 
+int nes_add_ref_cm_node(struct nes_cm_node *cm_node)
+{
+	return add_ref_cm_node(cm_node);
+}
+
+int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
+{
+	return rem_ref_cm_node(cm_node->cm_core, cm_node);
+}
 
 /**
  * create_event
  */
-static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
-		enum nes_cm_event_type type)
+static struct nes_cm_event *create_event(struct nes_cm_node *	cm_node,
+					 enum nes_cm_event_type type)
 {
 	struct nes_cm_event *event;
 
@@ -186,10 +192,10 @@
 	event->cm_info.cm_id = cm_node->cm_id;
 
 	nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, "
-		"dst_addr=%08x[%x], src_addr=%08x[%x]\n",
-		cm_node, event, type, event->cm_info.loc_addr,
-		event->cm_info.loc_port, event->cm_info.rem_addr,
-		event->cm_info.rem_port);
+		  "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
+		  cm_node, event, type, event->cm_info.loc_addr,
+		  event->cm_info.loc_port, event->cm_info.rem_addr,
+		  event->cm_info.rem_port);
 
 	nes_cm_post_event(event);
 	return event;
@@ -201,14 +207,19 @@
  */
 static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
 {
+	u8 start_addr = 0;
+	u8 *start_ptr = &start_addr;
+	u8 **start_buff = &start_ptr;
+	u16 buff_len = 0;
+
 	if (!skb) {
 		nes_debug(NES_DBG_CM, "skb set to NULL\n");
 		return -1;
 	}
 
 	/* send an MPA Request frame */
-	form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
-			cm_node->mpa_frame_size, SET_ACK);
+	cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST);
+	form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK);
 
 	return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
 }
@@ -217,7 +228,11 @@
 
 static int send_mpa_reject(struct nes_cm_node *cm_node)
 {
-	struct sk_buff  *skb = NULL;
+	struct sk_buff *skb = NULL;
+	u8 start_addr = 0;
+	u8 *start_ptr = &start_addr;
+	u8 **start_buff = &start_ptr;
+	u16 buff_len = 0;
 
 	skb = dev_alloc_skb(MAX_CM_BUFFER);
 	if (!skb) {
@@ -226,8 +241,8 @@
 	}
 
 	/* send an MPA reject frame */
-	form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
-			cm_node->mpa_frame_size, SET_ACK | SET_FIN);
+	cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY);
+	form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN);
 
 	cm_node->state = NES_CM_STATE_FIN_WAIT1;
 	return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
@@ -239,24 +254,31 @@
  * IETF MPA frame
  */
 static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
-		u32 len)
+		     u32 len)
 {
-	struct ietf_mpa_frame *mpa_frame;
+	struct ietf_mpa_v1 *mpa_frame;
+	struct ietf_mpa_v2 *mpa_v2_frame;
+	struct ietf_rtr_msg *rtr_msg;
+	int mpa_hdr_len;
+	int priv_data_len;
 
 	*type = NES_MPA_REQUEST_ACCEPT;
 
 	/* assume req frame is in tcp data payload */
-	if (len < sizeof(struct ietf_mpa_frame)) {
+	if (len < sizeof(struct ietf_mpa_v1)) {
 		nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len);
 		return -EINVAL;
 	}
 
-	mpa_frame = (struct ietf_mpa_frame *)buffer;
-	cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
+	/* points to the beginning of the frame, which could be MPA V1 or V2 */
+	mpa_frame = (struct ietf_mpa_v1 *)buffer;
+	mpa_hdr_len = sizeof(struct ietf_mpa_v1);
+	priv_data_len = ntohs(mpa_frame->priv_data_len);
+
 	/* make sure mpa private data len is less than 512 bytes */
-	if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) {
+	if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
 		nes_debug(NES_DBG_CM, "The received Length of Private"
-			" Data field exceeds 512 octets\n");
+			  " Data field exceeds 512 octets\n");
 		return -EINVAL;
 	}
 	/*
@@ -264,11 +286,22 @@
 	 * received MPA version and MPA key information
 	 *
 	 */
-	if (mpa_frame->rev != mpa_version) {
+	if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
 		nes_debug(NES_DBG_CM, "The received mpa version"
-				" can not be interoperated\n");
+			  " is not supported\n");
 		return -EINVAL;
 	}
+	/*
+	* backwards compatibility only
+	*/
+	if (mpa_frame->rev > cm_node->mpa_frame_rev) {
+		nes_debug(NES_DBG_CM, "The received mpa version"
+			" can not be interoperated\n");
+		return -EINVAL;
+	} else {
+		cm_node->mpa_frame_rev = mpa_frame->rev;
+	}
+
 	if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
 		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
 			nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
@@ -281,25 +314,75 @@
 		}
 	}
 
-	if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
+
+	if (priv_data_len + mpa_hdr_len != len) {
 		nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
-				" complete (%x + %x != %x)\n",
-				cm_node->mpa_frame_size,
-				(u32)sizeof(struct ietf_mpa_frame), len);
+			" complete (%x + %x != %x)\n",
+			priv_data_len, mpa_hdr_len, len);
 		return -EINVAL;
 	}
 	/* make sure it does not exceed the max size */
 	if (len > MAX_CM_BUFFER) {
 		nes_debug(NES_DBG_CM, "The received ietf buffer was too large"
-				" (%x + %x != %x)\n",
-				cm_node->mpa_frame_size,
-				(u32)sizeof(struct ietf_mpa_frame), len);
+			" (%x + %x != %x)\n",
+			priv_data_len, mpa_hdr_len, len);
 		return -EINVAL;
 	}
 
+	cm_node->mpa_frame_size = priv_data_len;
+
+	switch (mpa_frame->rev) {
+	case IETF_MPA_V2: {
+		u16 ird_size;
+		u16 ord_size;
+		mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
+		mpa_hdr_len += IETF_RTR_MSG_SIZE;
+		cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE;
+		rtr_msg = &mpa_v2_frame->rtr_msg;
+
+		/* parse rtr message */
+		rtr_msg->ctrl_ird = ntohs(rtr_msg->ctrl_ird);
+		rtr_msg->ctrl_ord = ntohs(rtr_msg->ctrl_ord);
+		ird_size = rtr_msg->ctrl_ird & IETF_NO_IRD_ORD;
+		ord_size = rtr_msg->ctrl_ord & IETF_NO_IRD_ORD;
+
+		if (!(rtr_msg->ctrl_ird & IETF_PEER_TO_PEER)) {
+			/* send reset */
+			return -EINVAL;
+		}
+
+		if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+			/* responder */
+			if (cm_node->ord_size > ird_size)
+				cm_node->ord_size = ird_size;
+		} else {
+			/* initiator */
+			if (cm_node->ord_size > ird_size)
+				cm_node->ord_size = ird_size;
+
+			if (cm_node->ird_size < ord_size) {
+				/* no resources available */
+				/* send terminate message */
+				return -EINVAL;
+			}
+		}
+
+		if (rtr_msg->ctrl_ord & IETF_RDMA0_READ) {
+			cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+		} else if (rtr_msg->ctrl_ord & IETF_RDMA0_WRITE) {
+			cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
+		} else {        /* Not supported RDMA0 operation */
+			return -EINVAL;
+		}
+		break;
+	}
+	case IETF_MPA_V1:
+	default:
+		break;
+	}
+
 	/* copy entire MPA frame to our cm_node's frame */
-	memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame),
-			cm_node->mpa_frame_size);
+	memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size);
 
 	if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
 		*type = NES_MPA_REQUEST_REJECT;
@@ -312,8 +395,8 @@
  * node info to build.
  */
 static void form_cm_frame(struct sk_buff *skb,
-	struct nes_cm_node *cm_node, void *options, u32 optionsize,
-	void *data, u32 datasize, u8 flags)
+			  struct nes_cm_node *cm_node, void *options, u32 optionsize,
+			  void *data, u32 datasize, u8 flags)
 {
 	struct tcphdr *tcph;
 	struct iphdr *iph;
@@ -322,14 +405,14 @@
 	u16 packetsize = sizeof(*iph);
 
 	packetsize += sizeof(*tcph);
-	packetsize +=  optionsize + datasize;
+	packetsize += optionsize + datasize;
 
+	skb_trim(skb, 0);
 	memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph));
 
-	skb->len = 0;
 	buf = skb_put(skb, packetsize + ETH_HLEN);
 
-	ethh = (struct ethhdr *) buf;
+	ethh = (struct ethhdr *)buf;
 	buf += ETH_HLEN;
 
 	iph = (struct iphdr *)buf;
@@ -337,7 +420,7 @@
 	tcph = (struct tcphdr *)buf;
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, ETH_HLEN);
-	skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph));
+	skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph));
 	buf += sizeof(*tcph);
 
 	skb->ip_summed = CHECKSUM_PARTIAL;
@@ -350,14 +433,14 @@
 	ethh->h_proto = htons(0x0800);
 
 	iph->version = IPVERSION;
-	iph->ihl = 5;		/* 5 * 4Byte words, IP headr len */
+	iph->ihl = 5;           /* 5 * 4Byte words, IP headr len */
 	iph->tos = 0;
 	iph->tot_len = htons(packetsize);
 	iph->id = htons(++cm_node->tcp_cntxt.loc_id);
 
 	iph->frag_off = htons(0x4000);
 	iph->ttl = 0x40;
-	iph->protocol = 0x06;	/* IPPROTO_TCP */
+	iph->protocol = 0x06;   /* IPPROTO_TCP */
 
 	iph->saddr = htonl(cm_node->loc_addr);
 	iph->daddr = htonl(cm_node->rem_addr);
@@ -370,14 +453,16 @@
 		cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
 		tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
 		tcph->ack = 1;
-	} else
+	} else {
 		tcph->ack_seq = 0;
+	}
 
 	if (flags & SET_SYN) {
 		cm_node->tcp_cntxt.loc_seq_num++;
 		tcph->syn = 1;
-	} else
+	} else {
 		cm_node->tcp_cntxt.loc_seq_num += datasize;
+	}
 
 	if (flags & SET_FIN) {
 		cm_node->tcp_cntxt.loc_seq_num++;
@@ -398,10 +483,8 @@
 
 	skb_shinfo(skb)->nr_frags = 0;
 	cm_packets_created++;
-
 }
 
-
 /**
  * print_core - dump a cm core
  */
@@ -413,7 +496,7 @@
 		return;
 	nes_debug(NES_DBG_CM, "---------------------------------------------\n");
 
-	nes_debug(NES_DBG_CM, "State         : %u \n",  core->state);
+	nes_debug(NES_DBG_CM, "State         : %u \n", core->state);
 
 	nes_debug(NES_DBG_CM, "Listen Nodes  : %u \n", atomic_read(&core->listen_node_cnt));
 	nes_debug(NES_DBG_CM, "Active Nodes  : %u \n", atomic_read(&core->node_cnt));
@@ -423,6 +506,147 @@
 	nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
 }
 
+/**
+ * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
+ */
+static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff,
+			      u16 *buff_len, u8 *pci_mem, u8 mpa_key)
+{
+	int ret = 0;
+
+	*start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0];
+
+	switch (cm_node->mpa_frame_rev) {
+	case IETF_MPA_V1:
+		*start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg);
+		*buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size;
+		build_mpa_v1(cm_node, *start_buff, mpa_key);
+		break;
+	case IETF_MPA_V2:
+		*buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size;
+		build_mpa_v2(cm_node, *start_buff, mpa_key);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+/**
+ * build_mpa_v2 - build a MPA V2 frame
+ */
+static void build_mpa_v2(struct nes_cm_node *cm_node,
+			 void *start_addr, u8 mpa_key)
+{
+	struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
+	struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
+
+	/* initialize the upper 5 bytes of the frame */
+	build_mpa_v1(cm_node, start_addr, mpa_key);
+	mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */
+	mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
+
+	/* initialize RTR msg */
+	rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
+			    IETF_NO_IRD_ORD : cm_node->ird_size;
+	rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
+			    IETF_NO_IRD_ORD : cm_node->ord_size;
+
+	rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER;
+	rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN;
+
+	switch (mpa_key) {
+	case MPA_KEY_REQUEST:
+		rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+		rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+		break;
+	case MPA_KEY_REPLY:
+		switch (cm_node->send_rdma0_op) {
+		case SEND_RDMA_WRITE_ZERO:
+			rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE;
+			break;
+		case SEND_RDMA_READ_ZERO:
+			rtr_msg->ctrl_ord |= IETF_RDMA0_READ;
+			break;
+		}
+	}
+	rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird);
+	rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord);
+}
+
+/**
+ * build_mpa_v1 - build a MPA V1 frame
+ */
+static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key)
+{
+	struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
+
+	switch (mpa_key) {
+	case MPA_KEY_REQUEST:
+		memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
+		break;
+	case MPA_KEY_REPLY:
+		memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
+		break;
+	}
+	mpa_frame->flags = IETF_MPA_FLAGS_CRC;
+	mpa_frame->rev = cm_node->mpa_frame_rev;
+	mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size);
+}
+
+static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr)
+{
+	u64 u64temp;
+	struct nes_qp *nesqp = *nesqp_addr;
+	struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
+
+	u64temp = (unsigned long)nesqp;
+	u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
+	set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
+
+	wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
+	wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
+
+	switch (cm_node->send_rdma0_op) {
+	case SEND_RDMA_WRITE_ZERO:
+		nes_debug(NES_DBG_CM, "Sending first write.\n");
+		wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+			cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
+		wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
+		wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
+		wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+		break;
+
+	case SEND_RDMA_READ_ZERO:
+	default:
+		if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) {
+			printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n",
+				 __func__, __LINE__, cm_node->send_rdma0_op);
+			WARN_ON(1);
+		}
+		nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
+		wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
+			cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
+		wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1;
+		wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0;
+		wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0;
+		wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1;
+		wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1;
+		break;
+	}
+
+	if (nesqp->sq_kmapped) {
+		nesqp->sq_kmapped = 0;
+		kunmap(nesqp->page);
+	}
+
+	/*use the reserved spot on the WQ for the extra first WQE*/
+	nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
+							     NES_QPCONTEXT_ORDIRD_WRPDU |
+							     NES_QPCONTEXT_ORDIRD_ALSMM));
+	nesqp->skip_lsmm = 1;
+	nesqp->hwqp.sq_tail = 0;
+}
 
 /**
  * schedule_nes_timer
@@ -430,10 +654,10 @@
  *			rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node);
  */
 int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
-		enum nes_timer_type type, int send_retrans,
-		int close_when_complete)
+		       enum nes_timer_type type, int send_retrans,
+		       int close_when_complete)
 {
-	unsigned long  flags;
+	unsigned long flags;
 	struct nes_cm_core *cm_core = cm_node->cm_core;
 	struct nes_timer_entry *new_send;
 	int ret = 0;
@@ -454,7 +678,7 @@
 	new_send->close_when_complete = close_when_complete;
 
 	if (type == NES_TIMER_TYPE_CLOSE) {
-		new_send->timetosend += (HZ/10);
+		new_send->timetosend += (HZ / 10);
 		if (cm_node->recv_entry) {
 			kfree(new_send);
 			WARN_ON(1);
@@ -475,7 +699,7 @@
 		ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
 		if (ret != NETDEV_TX_OK) {
 			nes_debug(NES_DBG_CM, "Error sending packet %p "
-				"(jiffies = %lu)\n", new_send, jiffies);
+				  "(jiffies = %lu)\n", new_send, jiffies);
 			new_send->timetosend = jiffies;
 			ret = NETDEV_TX_OK;
 		} else {
@@ -504,6 +728,7 @@
 	struct iw_cm_id *cm_id = cm_node->cm_id;
 	enum nes_cm_node_state state = cm_node->state;
 	cm_node->state = NES_CM_STATE_CLOSED;
+
 	switch (state) {
 	case NES_CM_STATE_SYN_RCVD:
 	case NES_CM_STATE_CLOSING:
@@ -536,10 +761,10 @@
 		spin_lock_irqsave(&nesqp->lock, qplockflags);
 		if (nesqp->cm_id) {
 			nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
-				"refcount = %d: HIT A "
-				"NES_TIMER_TYPE_CLOSE with something "
-				"to do!!!\n", nesqp->hwqp.qp_id, cm_id,
-				atomic_read(&nesqp->refcount));
+				  "refcount = %d: HIT A "
+				  "NES_TIMER_TYPE_CLOSE with something "
+				  "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+				  atomic_read(&nesqp->refcount));
 			nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
 			nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
 			nesqp->ibqp_state = IB_QPS_ERR;
@@ -548,10 +773,10 @@
 		} else {
 			spin_unlock_irqrestore(&nesqp->lock, qplockflags);
 			nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
-				"refcount = %d: HIT A "
-				"NES_TIMER_TYPE_CLOSE with nothing "
-				"to do!!!\n", nesqp->hwqp.qp_id, cm_id,
-				atomic_read(&nesqp->refcount));
+				  "refcount = %d: HIT A "
+				  "NES_TIMER_TYPE_CLOSE with nothing "
+				  "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
+				  atomic_read(&nesqp->refcount));
 		}
 	} else if (rem_node) {
 		/* TIME_WAIT state */
@@ -580,11 +805,12 @@
 	int ret = NETDEV_TX_OK;
 
 	struct list_head timer_list;
+
 	INIT_LIST_HEAD(&timer_list);
 	spin_lock_irqsave(&cm_core->ht_lock, flags);
 
 	list_for_each_safe(list_node, list_core_temp,
-				&cm_core->connected_nodes) {
+			   &cm_core->connected_nodes) {
 		cm_node = container_of(list_node, struct nes_cm_node, list);
 		if ((cm_node->recv_entry) || (cm_node->send_entry)) {
 			add_ref_cm_node(cm_node);
@@ -595,18 +821,19 @@
 
 	list_for_each_safe(list_node, list_core_temp, &timer_list) {
 		cm_node = container_of(list_node, struct nes_cm_node,
-					timer_entry);
+				       timer_entry);
 		recv_entry = cm_node->recv_entry;
 
 		if (recv_entry) {
 			if (time_after(recv_entry->timetosend, jiffies)) {
 				if (nexttimeout > recv_entry->timetosend ||
-						!settimer) {
+				    !settimer) {
 					nexttimeout = recv_entry->timetosend;
 					settimer = 1;
 				}
-			} else
+			} else {
 				handle_recv_entry(cm_node, 1);
+			}
 		}
 
 		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
@@ -617,8 +844,8 @@
 			if (time_after(send_entry->timetosend, jiffies)) {
 				if (cm_node->state != NES_CM_STATE_TSA) {
 					if ((nexttimeout >
-						send_entry->timetosend) ||
-						!settimer) {
+					     send_entry->timetosend) ||
+					    !settimer) {
 						nexttimeout =
 							send_entry->timetosend;
 						settimer = 1;
@@ -630,13 +857,13 @@
 			}
 
 			if ((cm_node->state == NES_CM_STATE_TSA) ||
-				(cm_node->state == NES_CM_STATE_CLOSED)) {
+			    (cm_node->state == NES_CM_STATE_CLOSED)) {
 				free_retrans_entry(cm_node);
 				break;
 			}
 
 			if (!send_entry->retranscount ||
-				!send_entry->retrycount) {
+			    !send_entry->retrycount) {
 				cm_packets_dropped++;
 				free_retrans_entry(cm_node);
 
@@ -645,28 +872,28 @@
 				nes_retrans_expired(cm_node);
 				cm_node->state = NES_CM_STATE_CLOSED;
 				spin_lock_irqsave(&cm_node->retrans_list_lock,
-					flags);
+						  flags);
 				break;
 			}
 			atomic_inc(&send_entry->skb->users);
 			cm_packets_retrans++;
 			nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
-				"for node %p, jiffies = %lu, time to send = "
-				"%lu, retranscount = %u, send_entry->seq_num = "
-				"0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
-				"0x%08X\n", send_entry, cm_node, jiffies,
-				send_entry->timetosend,
-				send_entry->retranscount,
-				send_entry->seq_num,
-				cm_node->tcp_cntxt.rem_ack_num);
+				  "for node %p, jiffies = %lu, time to send = "
+				  "%lu, retranscount = %u, send_entry->seq_num = "
+				  "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
+				  "0x%08X\n", send_entry, cm_node, jiffies,
+				  send_entry->timetosend,
+				  send_entry->retranscount,
+				  send_entry->seq_num,
+				  cm_node->tcp_cntxt.rem_ack_num);
 
 			spin_unlock_irqrestore(&cm_node->retrans_list_lock,
-				flags);
+					       flags);
 			ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
 			spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
 			if (ret != NETDEV_TX_OK) {
 				nes_debug(NES_DBG_CM, "rexmit failed for "
-					"node=%p\n", cm_node);
+					  "node=%p\n", cm_node);
 				cm_packets_bounced++;
 				send_entry->retrycount--;
 				nexttimeout = jiffies + NES_SHORT_TIME;
@@ -676,18 +903,18 @@
 				cm_packets_sent++;
 			}
 			nes_debug(NES_DBG_CM, "Packet Sent: retrans count = "
-				"%u, retry count = %u.\n",
-				send_entry->retranscount,
-				send_entry->retrycount);
+				  "%u, retry count = %u.\n",
+				  send_entry->retranscount,
+				  send_entry->retrycount);
 			if (send_entry->send_retrans) {
 				send_entry->retranscount--;
 				timetosend = (NES_RETRY_TIMEOUT <<
-					(NES_DEFAULT_RETRANS - send_entry->retranscount));
+					      (NES_DEFAULT_RETRANS - send_entry->retranscount));
 
 				send_entry->timetosend = jiffies +
-					min(timetosend, NES_MAX_TIMEOUT);
+							 min(timetosend, NES_MAX_TIMEOUT);
 				if (nexttimeout > send_entry->timetosend ||
-					!settimer) {
+				    !settimer) {
 					nexttimeout = send_entry->timetosend;
 					settimer = 1;
 				}
@@ -696,11 +923,11 @@
 				close_when_complete =
 					send_entry->close_when_complete;
 				nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n",
-					cm_node, cm_node->state);
+					  cm_node, cm_node->state);
 				free_retrans_entry(cm_node);
 				if (close_when_complete)
 					rem_ref_cm_node(cm_node->cm_core,
-						cm_node);
+							cm_node);
 			}
 		} while (0);
 
@@ -710,7 +937,7 @@
 
 	if (settimer) {
 		if (!timer_pending(&cm_core->tcp_timer)) {
-			cm_core->tcp_timer.expires  = nexttimeout;
+			cm_core->tcp_timer.expires = nexttimeout;
 			add_timer(&cm_core->tcp_timer);
 		}
 	}
@@ -721,13 +948,13 @@
  * send_syn
  */
 static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
-	struct sk_buff *skb)
+		    struct sk_buff *skb)
 {
 	int ret;
 	int flags = SET_SYN;
 	char optionsbuffer[sizeof(struct option_mss) +
-		sizeof(struct option_windowscale) + sizeof(struct option_base) +
-		TCP_OPTIONS_PADDING];
+			   sizeof(struct option_windowscale) + sizeof(struct option_base) +
+			   TCP_OPTIONS_PADDING];
 
 	int optionssize = 0;
 	/* Sending MSS option */
@@ -854,7 +1081,7 @@
  * find_node - find a cm node that matches the reference cm node
  */
 static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
-		u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
+				     u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
 {
 	unsigned long flags;
 	struct list_head *hte;
@@ -868,12 +1095,12 @@
 	list_for_each_entry(cm_node, hte, list) {
 		/* compare quad, return node handle if a match */
 		nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
-				cm_node->loc_addr, cm_node->loc_port,
-				loc_addr, loc_port,
-				cm_node->rem_addr, cm_node->rem_port,
-				rem_addr, rem_port);
+			  cm_node->loc_addr, cm_node->loc_port,
+			  loc_addr, loc_port,
+			  cm_node->rem_addr, cm_node->rem_port,
+			  rem_addr, rem_port);
 		if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
-				(cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
+		    (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
 			add_ref_cm_node(cm_node);
 			spin_unlock_irqrestore(&cm_core->ht_lock, flags);
 			return cm_node;
@@ -890,7 +1117,7 @@
  * find_listener - find a cm node listening on this addr-port pair
  */
 static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
-		nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
+					     nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
 {
 	unsigned long flags;
 	struct nes_cm_listener *listen_node;
@@ -900,9 +1127,9 @@
 	list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
 		/* compare node pair, return node handle if a match */
 		if (((listen_node->loc_addr == dst_addr) ||
-				listen_node->loc_addr == 0x00000000) &&
-				(listen_node->loc_port == dst_port) &&
-				(listener_state & listen_node->listener_state)) {
+		     listen_node->loc_addr == 0x00000000) &&
+		    (listen_node->loc_port == dst_port) &&
+		    (listener_state & listen_node->listener_state)) {
 			atomic_inc(&listen_node->ref_count);
 			spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 			return listen_node;
@@ -927,7 +1154,7 @@
 		return -EINVAL;
 
 	nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
-		cm_node);
+		  cm_node);
 
 	spin_lock_irqsave(&cm_core->ht_lock, flags);
 
@@ -946,7 +1173,7 @@
  * mini_cm_dec_refcnt_listen
  */
 static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
-	struct nes_cm_listener *listener, int free_hanging_nodes)
+				     struct nes_cm_listener *listener, int free_hanging_nodes)
 {
 	int ret = -EINVAL;
 	int err = 0;
@@ -957,8 +1184,8 @@
 	struct list_head reset_list;
 
 	nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
-		"refcnt=%d\n", listener, free_hanging_nodes,
-		atomic_read(&listener->ref_count));
+		  "refcnt=%d\n", listener, free_hanging_nodes,
+		  atomic_read(&listener->ref_count));
 	/* free non-accelerated child nodes for this listener */
 	INIT_LIST_HEAD(&reset_list);
 	if (free_hanging_nodes) {
@@ -966,7 +1193,7 @@
 		list_for_each_safe(list_pos, list_temp,
 				   &g_cm_core->connected_nodes) {
 			cm_node = container_of(list_pos, struct nes_cm_node,
-				list);
+					       list);
 			if ((cm_node->listener == listener) &&
 			    (!cm_node->accelerated)) {
 				add_ref_cm_node(cm_node);
@@ -978,7 +1205,7 @@
 
 	list_for_each_safe(list_pos, list_temp, &reset_list) {
 		cm_node = container_of(list_pos, struct nes_cm_node,
-				reset_entry);
+				       reset_entry);
 		{
 			struct nes_cm_node *loopback = cm_node->loopbackpartner;
 			enum nes_cm_node_state old_state;
@@ -990,7 +1217,7 @@
 					err = send_reset(cm_node, NULL);
 					if (err) {
 						cm_node->state =
-							 NES_CM_STATE_CLOSED;
+							NES_CM_STATE_CLOSED;
 						WARN_ON(1);
 					} else {
 						old_state = cm_node->state;
@@ -1035,10 +1262,9 @@
 
 		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
 
-		if (listener->nesvnic) {
+		if (listener->nesvnic)
 			nes_manage_apbvt(listener->nesvnic, listener->loc_port,
-					PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
-		}
+					 PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
 
 		nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
 
@@ -1052,8 +1278,8 @@
 	if (listener) {
 		if (atomic_read(&listener->pend_accepts_cnt) > 0)
 			nes_debug(NES_DBG_CM, "destroying listener (%p)"
-					" with non-zero pending accepts=%u\n",
-					listener, atomic_read(&listener->pend_accepts_cnt));
+				  " with non-zero pending accepts=%u\n",
+				  listener, atomic_read(&listener->pend_accepts_cnt));
 	}
 
 	return ret;
@@ -1064,7 +1290,7 @@
  * mini_cm_del_listen
  */
 static int mini_cm_del_listen(struct nes_cm_core *cm_core,
-		struct nes_cm_listener *listener)
+			      struct nes_cm_listener *listener)
 {
 	listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE;
 	listener->cm_id = NULL; /* going to be destroyed pretty soon */
@@ -1076,9 +1302,10 @@
  * mini_cm_accelerated
  */
 static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
-		struct nes_cm_node *cm_node)
+				      struct nes_cm_node *cm_node)
 {
 	u32 was_timer_set;
+
 	cm_node->accelerated = 1;
 
 	if (cm_node->accept_pend) {
@@ -1112,7 +1339,7 @@
 	rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0);
 	if (IS_ERR(rt)) {
 		printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n",
-				__func__, dst_ip);
+		       __func__, dst_ip);
 		return rc;
 	}
 
@@ -1130,7 +1357,7 @@
 
 			if (arpindex >= 0) {
 				if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
-							neigh->ha, ETH_ALEN)){
+					    neigh->ha, ETH_ALEN)) {
 					/* Mac address same as in nes_arp_table */
 					neigh_release(neigh);
 					ip_rt_put(rt);
@@ -1138,8 +1365,8 @@
 				}
 
 				nes_manage_arp_cache(nesvnic->netdev,
-						nesadapter->arp_table[arpindex].mac_addr,
-						dst_ip, NES_ARP_DELETE);
+						     nesadapter->arp_table[arpindex].mac_addr,
+						     dst_ip, NES_ARP_DELETE);
 			}
 
 			nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
@@ -1161,8 +1388,8 @@
  * make_cm_node - create a new instance of a cm node
  */
 static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
-		struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
-		struct nes_cm_listener *listener)
+					struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
+					struct nes_cm_listener *listener)
 {
 	struct nes_cm_node *cm_node;
 	struct timespec ts;
@@ -1181,7 +1408,12 @@
 	cm_node->rem_addr = cm_info->rem_addr;
 	cm_node->loc_port = cm_info->loc_port;
 	cm_node->rem_port = cm_info->rem_port;
-	cm_node->send_write0 = send_first;
+
+	cm_node->mpa_frame_rev = mpa_version;
+	cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
+	cm_node->ird_size = IETF_NO_IRD_ORD;
+	cm_node->ord_size = IETF_NO_IRD_ORD;
+
 	nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
 		  &cm_node->loc_addr, cm_node->loc_port,
 		  &cm_node->rem_addr, cm_node->rem_port);
@@ -1191,7 +1423,7 @@
 	memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
 
 	nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener,
-			cm_node->cm_id);
+		  cm_node->cm_id);
 
 	spin_lock_init(&cm_node->retrans_list_lock);
 
@@ -1202,11 +1434,11 @@
 	cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID;
 	cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
 	cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
-			NES_CM_DEFAULT_RCV_WND_SCALE;
+				     NES_CM_DEFAULT_RCV_WND_SCALE;
 	ts = current_kernel_time();
 	cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
 	cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
-			sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
+				 sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
 	cm_node->tcp_cntxt.rcv_nxt = 0;
 	/* get a unique session ID , add thread_id to an upcounter to handle race */
 	atomic_inc(&cm_core->node_cnt);
@@ -1222,12 +1454,11 @@
 	cm_node->loopbackpartner = NULL;
 
 	/* get the mac addr for the remote node */
-	if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
+	if (ipv4_is_loopback(htonl(cm_node->rem_addr))) {
 		arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
-	else {
+	} else {
 		oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
 		arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
-
 	}
 	if (arpindex < 0) {
 		kfree(cm_node);
@@ -1260,7 +1491,7 @@
  * rem_ref_cm_node - destroy an instance of a cm node
  */
 static int rem_ref_cm_node(struct nes_cm_core *cm_core,
-	struct nes_cm_node *cm_node)
+			   struct nes_cm_node *cm_node)
 {
 	unsigned long flags;
 	struct nes_qp *nesqp;
@@ -1291,9 +1522,9 @@
 	} else {
 		if (cm_node->apbvt_set && cm_node->nesvnic) {
 			nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
-				PCI_FUNC(
-				cm_node->nesvnic->nesdev->pcidev->devfn),
-				NES_MANAGE_APBVT_DEL);
+					 PCI_FUNC(
+						 cm_node->nesvnic->nesdev->pcidev->devfn),
+					 NES_MANAGE_APBVT_DEL);
 		}
 	}
 
@@ -1314,7 +1545,7 @@
  * process_options
  */
 static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
-	u32 optionsize, u32 syn_packet)
+			   u32 optionsize, u32 syn_packet)
 {
 	u32 tmp;
 	u32 offset = 0;
@@ -1332,15 +1563,15 @@
 			continue;
 		case OPTION_NUMBER_MSS:
 			nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d "
-				"Size: %d\n", __func__,
-				all_options->as_mss.length, offset, optionsize);
+				  "Size: %d\n", __func__,
+				  all_options->as_mss.length, offset, optionsize);
 			got_mss_option = 1;
 			if (all_options->as_mss.length != 4) {
 				return 1;
 			} else {
 				tmp = ntohs(all_options->as_mss.mss);
 				if (tmp > 0 && tmp <
-					cm_node->tcp_cntxt.mss)
+				    cm_node->tcp_cntxt.mss)
 					cm_node->tcp_cntxt.mss = tmp;
 			}
 			break;
@@ -1348,12 +1579,9 @@
 			cm_node->tcp_cntxt.snd_wscale =
 				all_options->as_windowscale.shiftcount;
 			break;
-		case OPTION_NUMBER_WRITE0:
-			cm_node->send_write0 = 1;
-			break;
 		default:
 			nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
-				all_options->as_base.optionnum);
+				  all_options->as_base.optionnum);
 			break;
 		}
 		offset += all_options->as_base.length;
@@ -1372,8 +1600,8 @@
 static void handle_fin_pkt(struct nes_cm_node *cm_node)
 {
 	nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
-		"refcnt=%d\n", cm_node, cm_node->state,
-		atomic_read(&cm_node->ref_count));
+		  "refcnt=%d\n", cm_node, cm_node->state,
+		  atomic_read(&cm_node->ref_count));
 	switch (cm_node->state) {
 	case NES_CM_STATE_SYN_RCVD:
 	case NES_CM_STATE_SYN_SENT:
@@ -1439,7 +1667,20 @@
 		nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
 			"listener=%p state=%d\n", __func__, __LINE__, cm_node,
 			cm_node->listener, cm_node->state);
-		active_open_err(cm_node, skb, reset);
+		switch (cm_node->mpa_frame_rev) {
+		case IETF_MPA_V2:
+			cm_node->mpa_frame_rev = IETF_MPA_V1;
+			/* send a syn and goto syn sent state */
+			cm_node->state = NES_CM_STATE_SYN_SENT;
+			if (send_syn(cm_node, 0, NULL)) {
+				active_open_err(cm_node, skb, reset);
+			}
+			break;
+		case IETF_MPA_V1:
+		default:
+			active_open_err(cm_node, skb, reset);
+			break;
+		}
 		break;
 	case NES_CM_STATE_MPAREQ_RCVD:
 		atomic_inc(&cm_node->passive_state);
@@ -1475,21 +1716,21 @@
 
 static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
 {
-
-	int	ret = 0;
+	int ret = 0;
 	int datasize = skb->len;
 	u8 *dataloc = skb->data;
 
 	enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN;
-	u32     res_type;
+	u32 res_type;
+
 	ret = parse_mpa(cm_node, dataloc, &res_type, datasize);
 	if (ret) {
 		nes_debug(NES_DBG_CM, "didn't like MPA Request\n");
 		if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) {
 			nes_debug(NES_DBG_CM, "%s[%u] create abort for "
-				"cm_node=%p listener=%p state=%d\n", __func__,
-				__LINE__, cm_node, cm_node->listener,
-				cm_node->state);
+				  "cm_node=%p listener=%p state=%d\n", __func__,
+				  __LINE__, cm_node, cm_node->listener,
+				  cm_node->state);
 			active_open_err(cm_node, skb, 1);
 		} else {
 			passive_open_err(cm_node, skb, 1);
@@ -1499,16 +1740,15 @@
 
 	switch (cm_node->state) {
 	case NES_CM_STATE_ESTABLISHED:
-		if (res_type == NES_MPA_REQUEST_REJECT) {
+		if (res_type == NES_MPA_REQUEST_REJECT)
 			/*BIG problem as we are receiving the MPA.. So should
-			* not be REJECT.. This is Passive Open.. We can
-			* only receive it Reject for Active Open...*/
+			 * not be REJECT.. This is Passive Open.. We can
+			 * only receive it Reject for Active Open...*/
 			WARN_ON(1);
-		}
 		cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
 		type = NES_CM_EVENT_MPA_REQ;
 		atomic_set(&cm_node->passive_state,
-				NES_PASSIVE_STATE_INDICATED);
+			   NES_PASSIVE_STATE_INDICATED);
 		break;
 	case NES_CM_STATE_MPAREQ_SENT:
 		cleanup_retrans_entry(cm_node);
@@ -1535,8 +1775,8 @@
 	case NES_CM_STATE_SYN_SENT:
 	case NES_CM_STATE_MPAREQ_SENT:
 		nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
-			"listener=%p state=%d\n", __func__, __LINE__, cm_node,
-			cm_node->listener, cm_node->state);
+			  "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+			  cm_node->listener, cm_node->state);
 		active_open_err(cm_node, skb, 1);
 		break;
 	case NES_CM_STATE_ESTABLISHED:
@@ -1550,11 +1790,11 @@
 }
 
 static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
-	struct sk_buff *skb)
+		     struct sk_buff *skb)
 {
 	int err;
 
-	err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1;
+	err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1;
 	if (err)
 		active_open_err(cm_node, skb, 1);
 
@@ -1562,7 +1802,7 @@
 }
 
 static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
-	struct sk_buff *skb)
+		     struct sk_buff *skb)
 {
 	int err = 0;
 	u32 seq;
@@ -1570,21 +1810,22 @@
 	u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
 	u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
 	u32 rcv_wnd;
+
 	seq = ntohl(tcph->seq);
 	ack_seq = ntohl(tcph->ack_seq);
 	rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
 	if (ack_seq != loc_seq_num)
 		err = 1;
-	else if (!between(seq, rcv_nxt, (rcv_nxt+rcv_wnd)))
+	else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
 		err = 1;
 	if (err) {
 		nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
-			"listener=%p state=%d\n", __func__, __LINE__, cm_node,
-			cm_node->listener, cm_node->state);
+			  "listener=%p state=%d\n", __func__, __LINE__, cm_node,
+			  cm_node->listener, cm_node->state);
 		indicate_pkt_err(cm_node, skb);
 		nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X "
-			"rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
-			rcv_wnd);
+			  "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
+			  rcv_wnd);
 	}
 	return err;
 }
@@ -1594,9 +1835,8 @@
  * is created with a listener or it may comein as rexmitted packet which in
  * that case will be just dropped.
  */
-
 static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
-	struct tcphdr *tcph)
+			   struct tcphdr *tcph)
 {
 	int ret;
 	u32 inc_sequence;
@@ -1615,15 +1855,15 @@
 	case NES_CM_STATE_LISTENING:
 		/* Passive OPEN */
 		if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
-				cm_node->listener->backlog) {
+		    cm_node->listener->backlog) {
 			nes_debug(NES_DBG_CM, "drop syn due to backlog "
-				"pressure \n");
+				  "pressure \n");
 			cm_backlog_drops++;
 			passive_open_err(cm_node, skb, 0);
 			break;
 		}
 		ret = handle_tcp_options(cm_node, tcph, skb, optionsize,
-			1);
+					 1);
 		if (ret) {
 			passive_open_err(cm_node, skb, 0);
 			/* drop pkt */
@@ -1657,9 +1897,8 @@
 }
 
 static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
-	struct tcphdr *tcph)
+			      struct tcphdr *tcph)
 {
-
 	int ret;
 	u32 inc_sequence;
 	int optionsize;
@@ -1678,7 +1917,7 @@
 		ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0);
 		if (ret) {
 			nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n",
-				cm_node);
+				  cm_node);
 			break;
 		}
 		cleanup_retrans_entry(cm_node);
@@ -1717,12 +1956,13 @@
 }
 
 static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
-	struct tcphdr *tcph)
+			  struct tcphdr *tcph)
 {
 	int datasize = 0;
 	u32 inc_sequence;
 	int ret = 0;
 	int optionsize;
+
 	optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
 
 	if (check_seq(cm_node, tcph, skb))
@@ -1743,8 +1983,9 @@
 		if (datasize) {
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
 			handle_rcv_mpa(cm_node, skb);
-		} else  /* rcvd ACK only */
+		} else { /* rcvd ACK only */
 			dev_kfree_skb_any(skb);
+		}
 		break;
 	case NES_CM_STATE_ESTABLISHED:
 		/* Passive OPEN */
@@ -1752,16 +1993,18 @@
 		if (datasize) {
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
 			handle_rcv_mpa(cm_node, skb);
-		} else
+		} else {
 			drop_packet(skb);
+		}
 		break;
 	case NES_CM_STATE_MPAREQ_SENT:
 		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
 		if (datasize) {
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
 			handle_rcv_mpa(cm_node, skb);
-		} else  /* Could be just an ack pkt.. */
+		} else { /* Could be just an ack pkt.. */
 			dev_kfree_skb_any(skb);
+		}
 		break;
 	case NES_CM_STATE_LISTENING:
 		cleanup_retrans_entry(cm_node);
@@ -1802,14 +2045,15 @@
 
 
 static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
-	struct sk_buff *skb, int optionsize, int passive)
+			      struct sk_buff *skb, int optionsize, int passive)
 {
 	u8 *optionsloc = (u8 *)&tcph[1];
+
 	if (optionsize) {
 		if (process_options(cm_node, optionsloc, optionsize,
-			(u32)tcph->syn)) {
+				    (u32)tcph->syn)) {
 			nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
-				__func__, cm_node);
+				  __func__, cm_node);
 			if (passive)
 				passive_open_err(cm_node, skb, 1);
 			else
@@ -1819,7 +2063,7 @@
 	}
 
 	cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
-			cm_node->tcp_cntxt.snd_wscale;
+				     cm_node->tcp_cntxt.snd_wscale;
 
 	if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
 		cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
@@ -1830,18 +2074,18 @@
  * active_open_err() will send reset() if flag set..
  * It will also send ABORT event.
  */
-
 static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
-	int reset)
+			    int reset)
 {
 	cleanup_retrans_entry(cm_node);
 	if (reset) {
 		nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, "
-				"state=%d\n", cm_node, cm_node->state);
+			  "state=%d\n", cm_node, cm_node->state);
 		add_ref_cm_node(cm_node);
 		send_reset(cm_node, skb);
-	} else
+	} else {
 		dev_kfree_skb_any(skb);
+	}
 
 	cm_node->state = NES_CM_STATE_CLOSED;
 	create_event(cm_node, NES_CM_EVENT_ABORTED);
@@ -1851,15 +2095,14 @@
  * passive_open_err() will either do a reset() or will free up the skb and
  * remove the cm_node.
  */
-
 static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
-	int reset)
+			     int reset)
 {
 	cleanup_retrans_entry(cm_node);
 	cm_node->state = NES_CM_STATE_CLOSED;
 	if (reset) {
 		nes_debug(NES_DBG_CM, "passive_open_err sending RST for "
-			"cm_node=%p state =%d\n", cm_node, cm_node->state);
+			  "cm_node=%p state =%d\n", cm_node, cm_node->state);
 		send_reset(cm_node, skb);
 	} else {
 		dev_kfree_skb_any(skb);
@@ -1874,6 +2117,7 @@
 static void free_retrans_entry(struct nes_cm_node *cm_node)
 {
 	struct nes_timer_entry *send_entry;
+
 	send_entry = cm_node->send_entry;
 	if (send_entry) {
 		cm_node->send_entry = NULL;
@@ -1897,26 +2141,28 @@
  * Returns skb if to be freed, else it will return NULL if already used..
  */
 static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
-	struct nes_cm_core *cm_core)
+			   struct nes_cm_core *cm_core)
 {
-	enum nes_tcpip_pkt_type	pkt_type = NES_PKT_TYPE_UNKNOWN;
+	enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
 	struct tcphdr *tcph = tcp_hdr(skb);
-	u32     fin_set = 0;
+	u32 fin_set = 0;
 	int ret = 0;
+
 	skb_pull(skb, ip_hdr(skb)->ihl << 2);
 
 	nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
-		"ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
-		tcph->ack, tcph->rst, tcph->fin);
+		  "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
+		  tcph->ack, tcph->rst, tcph->fin);
 
-	if (tcph->rst)
+	if (tcph->rst) {
 		pkt_type = NES_PKT_TYPE_RST;
-	else if (tcph->syn) {
+	} else if (tcph->syn) {
 		pkt_type = NES_PKT_TYPE_SYN;
 		if (tcph->ack)
 			pkt_type = NES_PKT_TYPE_SYNACK;
-	} else if (tcph->ack)
+	} else if (tcph->ack) {
 		pkt_type = NES_PKT_TYPE_ACK;
+	}
 	if (tcph->fin)
 		fin_set = 1;
 
@@ -1947,17 +2193,17 @@
  * mini_cm_listen - create a listen node with params
  */
 static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
-	struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
+					      struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
 {
 	struct nes_cm_listener *listener;
 	unsigned long flags;
 
 	nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
-		cm_info->loc_addr, cm_info->loc_port);
+		  cm_info->loc_addr, cm_info->loc_port);
 
 	/* cannot have multiple matching listeners */
 	listener = find_listener(cm_core, htonl(cm_info->loc_addr),
-			htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
+				 htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
 	if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
 		/* find automatically incs ref count ??? */
 		atomic_dec(&listener->ref_count);
@@ -2003,9 +2249,9 @@
 	}
 
 	nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x,"
-			" listener = %p, backlog = %d, cm_id = %p.\n",
-			cm_info->loc_addr, cm_info->loc_port,
-			listener, listener->backlog, listener->cm_id);
+		  " listener = %p, backlog = %d, cm_id = %p.\n",
+		  cm_info->loc_addr, cm_info->loc_port,
+		  listener, listener->backlog, listener->cm_id);
 
 	return listener;
 }
@@ -2015,26 +2261,20 @@
  * mini_cm_connect - make a connection node with params
  */
 static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
-	struct nes_vnic *nesvnic, u16 private_data_len,
-	void *private_data, struct nes_cm_info *cm_info)
+					   struct nes_vnic *nesvnic, u16 private_data_len,
+					   void *private_data, struct nes_cm_info *cm_info)
 {
 	int ret = 0;
 	struct nes_cm_node *cm_node;
 	struct nes_cm_listener *loopbackremotelistener;
 	struct nes_cm_node *loopbackremotenode;
 	struct nes_cm_info loopback_cm_info;
-	u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len;
-	struct ietf_mpa_frame *mpa_frame = NULL;
+	u8 *start_buff;
 
 	/* create a CM connection node */
 	cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
 	if (!cm_node)
 		return NULL;
-	mpa_frame = &cm_node->mpa_frame;
-	memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
-	mpa_frame->flags = IETF_MPA_FLAGS_CRC;
-	mpa_frame->rev =  IETF_MPA_VERSION;
-	mpa_frame->priv_data_len = htons(private_data_len);
 
 	/* set our node side to client (active) side */
 	cm_node->tcp_cntxt.client = 1;
@@ -2042,8 +2282,8 @@
 
 	if (cm_info->loc_addr == cm_info->rem_addr) {
 		loopbackremotelistener = find_listener(cm_core,
-				ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
-				NES_CM_LISTENER_ACTIVE_STATE);
+						       ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
+						       NES_CM_LISTENER_ACTIVE_STATE);
 		if (loopbackremotelistener == NULL) {
 			create_event(cm_node, NES_CM_EVENT_ABORTED);
 		} else {
@@ -2052,7 +2292,7 @@
 			loopback_cm_info.rem_port = cm_info->loc_port;
 			loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
 			loopbackremotenode = make_cm_node(cm_core, nesvnic,
-				&loopback_cm_info, loopbackremotelistener);
+							  &loopback_cm_info, loopbackremotelistener);
 			if (!loopbackremotenode) {
 				rem_ref_cm_node(cm_node->cm_core, cm_node);
 				return NULL;
@@ -2063,7 +2303,7 @@
 				NES_CM_DEFAULT_RCV_WND_SCALE;
 			cm_node->loopbackpartner = loopbackremotenode;
 			memcpy(loopbackremotenode->mpa_frame_buf, private_data,
-				private_data_len);
+			       private_data_len);
 			loopbackremotenode->mpa_frame_size = private_data_len;
 
 			/* we are done handling this state. */
@@ -2091,12 +2331,10 @@
 		return cm_node;
 	}
 
-	/* set our node side to client (active) side */
-	cm_node->tcp_cntxt.client = 1;
-	/* init our MPA frame ptr */
-	memcpy(mpa_frame->priv_data, private_data, private_data_len);
+	start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
+	cm_node->mpa_frame_size = private_data_len;
 
-	cm_node->mpa_frame_size = mpa_frame_size;
+	memcpy(start_buff, private_data, private_data_len);
 
 	/* send a syn and goto syn sent state */
 	cm_node->state = NES_CM_STATE_SYN_SENT;
@@ -2105,18 +2343,19 @@
 	if (ret) {
 		/* error in sending the syn free up the cm_node struct */
 		nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest "
-			"addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
-			cm_node->rem_addr, cm_node->rem_port, cm_node,
-			cm_node->cm_id);
+			  "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
+			  cm_node->rem_addr, cm_node->rem_port, cm_node,
+			  cm_node->cm_id);
 		rem_ref_cm_node(cm_node->cm_core, cm_node);
 		cm_node = NULL;
 	}
 
-	if (cm_node)
+	if (cm_node) {
 		nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X,"
-			"port=0x%04x, cm_node=%p, cm_id = %p.\n",
-			cm_node->rem_addr, cm_node->rem_port, cm_node,
-			cm_node->cm_id);
+			  "port=0x%04x, cm_node=%p, cm_id = %p.\n",
+			  cm_node->rem_addr, cm_node->rem_port, cm_node,
+			  cm_node->cm_id);
+	}
 
 	return cm_node;
 }
@@ -2126,8 +2365,7 @@
  * mini_cm_accept - accept a connection
  * This function is never called
  */
-static int mini_cm_accept(struct nes_cm_core *cm_core,
-	struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
+static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
 {
 	return 0;
 }
@@ -2136,8 +2374,7 @@
 /**
  * mini_cm_reject - reject and teardown a connection
  */
-static int mini_cm_reject(struct nes_cm_core *cm_core,
-	struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
+static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node)
 {
 	int ret = 0;
 	int err = 0;
@@ -2147,7 +2384,7 @@
 	struct nes_cm_node *loopback = cm_node->loopbackpartner;
 
 	nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
-		__func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
+		  __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
 
 	if (cm_node->tcp_cntxt.client)
 		return ret;
@@ -2168,8 +2405,9 @@
 					err = send_reset(cm_node, NULL);
 					if (err)
 						WARN_ON(1);
-				} else
+				} else {
 					cm_id->add_ref(cm_id);
+				}
 			}
 		}
 	} else {
@@ -2244,7 +2482,7 @@
 	case NES_CM_STATE_TSA:
 		if (cm_node->send_entry)
 			printk(KERN_ERR "ERROR Close got called from STATE_TSA "
-				"send_entry=%p\n", cm_node->send_entry);
+			       "send_entry=%p\n", cm_node->send_entry);
 		ret = rem_ref_cm_node(cm_core, cm_node);
 		break;
 	}
@@ -2257,7 +2495,7 @@
  * node state machine
  */
 static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
-	struct nes_vnic *nesvnic, struct sk_buff *skb)
+			    struct nes_vnic *nesvnic, struct sk_buff *skb)
 {
 	struct nes_cm_node *cm_node = NULL;
 	struct nes_cm_listener *listener = NULL;
@@ -2269,9 +2507,8 @@
 
 	if (!skb)
 		return 0;
-	if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
+	if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr))
 		return 0;
-	}
 
 	iph = (struct iphdr *)skb->data;
 	tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
@@ -2289,8 +2526,8 @@
 
 	do {
 		cm_node = find_node(cm_core,
-			nfo.rem_port, nfo.rem_addr,
-			nfo.loc_port, nfo.loc_addr);
+				    nfo.rem_port, nfo.rem_addr,
+				    nfo.loc_port, nfo.loc_addr);
 
 		if (!cm_node) {
 			/* Only type of packet accepted are for */
@@ -2300,8 +2537,8 @@
 				break;
 			}
 			listener = find_listener(cm_core, nfo.loc_addr,
-				nfo.loc_port,
-				NES_CM_LISTENER_ACTIVE_STATE);
+						 nfo.loc_port,
+						 NES_CM_LISTENER_ACTIVE_STATE);
 			if (!listener) {
 				nfo.cm_id = NULL;
 				nfo.conn_type = 0;
@@ -2312,10 +2549,10 @@
 			nfo.cm_id = listener->cm_id;
 			nfo.conn_type = listener->conn_type;
 			cm_node = make_cm_node(cm_core, nesvnic, &nfo,
-				listener);
+					       listener);
 			if (!cm_node) {
 				nes_debug(NES_DBG_CM, "Unable to allocate "
-					"node\n");
+					  "node\n");
 				cm_packets_dropped++;
 				atomic_dec(&listener->ref_count);
 				dev_kfree_skb_any(skb);
@@ -2331,9 +2568,13 @@
 			}
 			add_ref_cm_node(cm_node);
 		} else if (cm_node->state == NES_CM_STATE_TSA) {
-			rem_ref_cm_node(cm_core, cm_node);
-			atomic_inc(&cm_accel_dropped_pkts);
-			dev_kfree_skb_any(skb);
+			if (cm_node->nesqp->pau_mode)
+				nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp);
+			else {
+				rem_ref_cm_node(cm_core, cm_node);
+				atomic_inc(&cm_accel_dropped_pkts);
+				dev_kfree_skb_any(skb);
+			}
 			break;
 		}
 		skb_reset_network_header(skb);
@@ -2363,7 +2604,7 @@
 	init_timer(&cm_core->tcp_timer);
 	cm_core->tcp_timer.function = nes_cm_timer_tick;
 
-	cm_core->mtu   = NES_CM_DEFAULT_MTU;
+	cm_core->mtu = NES_CM_DEFAULT_MTU;
 	cm_core->state = NES_CM_STATE_INITED;
 	cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
 
@@ -2401,9 +2642,8 @@
 
 	barrier();
 
-	if (timer_pending(&cm_core->tcp_timer)) {
+	if (timer_pending(&cm_core->tcp_timer))
 		del_timer(&cm_core->tcp_timer);
-	}
 
 	destroy_workqueue(cm_core->event_wq);
 	destroy_workqueue(cm_core->disconn_wq);
@@ -2458,8 +2698,8 @@
 		return -EINVAL;
 
 	nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 |
-			NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
-			NES_QPCONTEXT_MISC_DROS);
+						  NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
+						  NES_QPCONTEXT_MISC_DROS);
 
 	if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale)
 		nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE);
@@ -2469,15 +2709,15 @@
 	nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16);
 
 	nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32(
-			(u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
+		(u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
 
 	nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
-			(cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
-			NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
+		(cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
+		NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
 
 	nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
-			(cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
-			NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
+		(cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
+		NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
 
 	nesqp->nesqp_context->keepalive = cpu_to_le32(0x80);
 	nesqp->nesqp_context->ts_recent = 0;
@@ -2486,24 +2726,24 @@
 	nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
 	nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
 	nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
-			cm_node->tcp_cntxt.rcv_wscale);
+						    cm_node->tcp_cntxt.rcv_wscale);
 	nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
 	nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
 	nesqp->nesqp_context->srtt = 0;
 	nesqp->nesqp_context->rttvar = cpu_to_le32(0x6);
 	nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000);
-	nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss);
+	nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss);
 	nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
 	nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
 	nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
 
 	nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X,"
-			" Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
-			nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
-			le32_to_cpu(nesqp->nesqp_context->snd_nxt),
-			cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
-			le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
-			le32_to_cpu(nesqp->nesqp_context->misc));
+		  " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
+		  nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+		  le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+		  cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
+		  le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
+		  le32_to_cpu(nesqp->nesqp_context->misc));
 	nes_debug(NES_DBG_CM, "  snd_wnd  = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd));
 	nes_debug(NES_DBG_CM, "  snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd));
 	nes_debug(NES_DBG_CM, "  max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd));
@@ -2524,7 +2764,7 @@
 
 	work = kzalloc(sizeof *work, GFP_ATOMIC);
 	if (!work)
-		return -ENOMEM; /* Timer will clean up */
+		return -ENOMEM;  /* Timer will clean up */
 
 	nes_add_ref(&nesqp->ibqp);
 	work->nesqp = nesqp;
@@ -2544,7 +2784,7 @@
 
 	kfree(dwork);
 	nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
-			nesqp->last_aeq, nesqp->hwqp.qp_id);
+		  nesqp->last_aeq, nesqp->hwqp.qp_id);
 	nes_cm_disconn_true(nesqp);
 	nes_rem_ref(&nesqp->ibqp);
 }
@@ -2580,7 +2820,7 @@
 	/* make sure we havent already closed this connection */
 	if (!cm_id) {
 		nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
-				nesqp->hwqp.qp_id);
+			  nesqp->hwqp.qp_id);
 		spin_unlock_irqrestore(&nesqp->lock, flags);
 		return -1;
 	}
@@ -2589,7 +2829,7 @@
 	nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id);
 
 	original_hw_tcp_state = nesqp->hw_tcp_state;
-	original_ibqp_state   = nesqp->ibqp_state;
+	original_ibqp_state = nesqp->ibqp_state;
 	last_ae = nesqp->last_aeq;
 
 	if (nesqp->term_flags) {
@@ -2647,16 +2887,16 @@
 			cm_event.private_data_len = 0;
 
 			nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event"
-				" for  QP%u, SQ Head = %u, SQ Tail = %u. "
-				"cm_id = %p, refcount = %u.\n",
-				nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
-				nesqp->hwqp.sq_tail, cm_id,
-				atomic_read(&nesqp->refcount));
+				  " for  QP%u, SQ Head = %u, SQ Tail = %u. "
+				  "cm_id = %p, refcount = %u.\n",
+				  nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
+				  nesqp->hwqp.sq_tail, cm_id,
+				  atomic_read(&nesqp->refcount));
 
 			ret = cm_id->event_handler(cm_id, &cm_event);
 			if (ret)
 				nes_debug(NES_DBG_CM, "OFA CM event_handler "
-					"returned, ret=%d\n", ret);
+					  "returned, ret=%d\n", ret);
 		}
 
 		if (issue_close) {
@@ -2674,9 +2914,8 @@
 			cm_event.private_data_len = 0;
 
 			ret = cm_id->event_handler(cm_id, &cm_event);
-			if (ret) {
+			if (ret)
 				nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
-			}
 
 			cm_id->rem_ref(cm_id);
 		}
@@ -2716,8 +2955,8 @@
 			if (nesqp->lsmm_mr)
 				nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
 			pci_free_consistent(nesdev->pcidev,
-					nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
-					nesqp->ietf_frame, nesqp->ietf_frame_pbase);
+					    nesqp->private_data_len + nesqp->ietf_frame_size,
+					    nesqp->ietf_frame, nesqp->ietf_frame_pbase);
 		}
 	}
 
@@ -2756,6 +2995,12 @@
 	struct ib_phys_buf ibphysbuf;
 	struct nes_pd *nespd;
 	u64 tagged_offset;
+	u8 mpa_frame_offset = 0;
+	struct ietf_mpa_v2 *mpa_v2_frame;
+	u8 start_addr = 0;
+	u8 *start_ptr = &start_addr;
+	u8 **start_buff = &start_ptr;
+	u16 buff_len = 0;
 
 	ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
 	if (!ibqp)
@@ -2796,53 +3041,49 @@
 	nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
 			netdev_refcnt_read(nesvnic->netdev));
 
+	nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2);
 	/* allocate the ietf frame and space for private data */
 	nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
-		sizeof(struct ietf_mpa_frame) + conn_param->private_data_len,
-		&nesqp->ietf_frame_pbase);
+						 nesqp->ietf_frame_size + conn_param->private_data_len,
+						 &nesqp->ietf_frame_pbase);
 
 	if (!nesqp->ietf_frame) {
-		nes_debug(NES_DBG_CM, "Unable to allocate memory for private "
-			"data\n");
+		nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n");
 		return -ENOMEM;
 	}
+	mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame;
 
+	if (cm_node->mpa_frame_rev == IETF_MPA_V1)
+		mpa_frame_offset = 4;
 
-	/* setup the MPA frame */
+	memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
+	       conn_param->private_data_len);
+
+	cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY);
 	nesqp->private_data_len = conn_param->private_data_len;
-	memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
-
-	memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
-			conn_param->private_data_len);
-
-	nesqp->ietf_frame->priv_data_len =
-		cpu_to_be16(conn_param->private_data_len);
-	nesqp->ietf_frame->rev = mpa_version;
-	nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
 
 	/* setup our first outgoing iWarp send WQE (the IETF frame response) */
 	wqe = &nesqp->hwqp.sq_vbase[0];
 
 	if (cm_id->remote_addr.sin_addr.s_addr !=
-			cm_id->local_addr.sin_addr.s_addr) {
+	    cm_id->local_addr.sin_addr.s_addr) {
 		u64temp = (unsigned long)nesqp;
 		nesibdev = nesvnic->nesibdev;
 		nespd = nesqp->nespd;
-		ibphysbuf.addr = nesqp->ietf_frame_pbase;
-		ibphysbuf.size = conn_param->private_data_len +
-					sizeof(struct ietf_mpa_frame);
-		tagged_offset = (u64)(unsigned long)nesqp->ietf_frame;
+		ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
+		ibphysbuf.size = buff_len;
+		tagged_offset = (u64)(unsigned long)*start_buff;
 		ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
-						&ibphysbuf, 1,
-						IB_ACCESS_LOCAL_WRITE,
-						&tagged_offset);
+						   &ibphysbuf, 1,
+						   IB_ACCESS_LOCAL_WRITE,
+						   &tagged_offset);
 		if (!ibmr) {
 			nes_debug(NES_DBG_CM, "Unable to register memory region"
-					"for lSMM for cm_node = %p \n",
-					cm_node);
+				  "for lSMM for cm_node = %p \n",
+				  cm_node);
 			pci_free_consistent(nesdev->pcidev,
-				nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
-				nesqp->ietf_frame, nesqp->ietf_frame_pbase);
+					    nesqp->private_data_len + nesqp->ietf_frame_size,
+					    nesqp->ietf_frame, nesqp->ietf_frame_pbase);
 			return -ENOMEM;
 		}
 
@@ -2850,22 +3091,20 @@
 		ibmr->device = nespd->ibpd.device;
 		nesqp->lsmm_mr = ibmr;
 
-		u64temp |= NES_SW_CONTEXT_ALIGN>>1;
+		u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
 		set_wqe_64bit_value(wqe->wqe_words,
-			NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
-			u64temp);
+				    NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
+				    u64temp);
 		wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
 			cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING |
-			NES_IWARP_SQ_WQE_WRPDU);
+				    NES_IWARP_SQ_WQE_WRPDU);
 		wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
-			cpu_to_le32(conn_param->private_data_len +
-			sizeof(struct ietf_mpa_frame));
+			cpu_to_le32(buff_len);
 		set_wqe_64bit_value(wqe->wqe_words,
-					NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
-					(u64)(unsigned long)nesqp->ietf_frame);
+				    NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+				    (u64)(unsigned long)(*start_buff));
 		wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
-			cpu_to_le32(conn_param->private_data_len +
-			sizeof(struct ietf_mpa_frame));
+			cpu_to_le32(buff_len);
 		wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
 		if (nesqp->sq_kmapped) {
 			nesqp->sq_kmapped = 0;
@@ -2874,7 +3113,7 @@
 
 		nesqp->nesqp_context->ird_ord_sizes |=
 			cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
-			NES_QPCONTEXT_ORDIRD_WRPDU);
+				    NES_QPCONTEXT_ORDIRD_WRPDU);
 	} else {
 		nesqp->nesqp_context->ird_ord_sizes |=
 			cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU);
@@ -2888,11 +3127,11 @@
 
 	/*  nesqp->cm_node = (void *)cm_id->provider_data; */
 	cm_id->provider_data = nesqp;
-	nesqp->active_conn   = 0;
+	nesqp->active_conn = 0;
 
 	if (cm_node->state == NES_CM_STATE_TSA)
 		nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n",
-			cm_node);
+			  cm_node);
 
 	nes_cm_init_tsa_conn(nesqp, cm_node);
 
@@ -2909,13 +3148,13 @@
 			cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
 
 	nesqp->nesqp_context->misc2 |= cpu_to_le32(
-			(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
-			NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
+		(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
+		NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
 
 	nesqp->nesqp_context->arp_index_vlan |=
 		cpu_to_le32(nes_arp_table(nesdev,
-			le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
-			NES_ARP_RESOLVE) << 16);
+					  le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
+					  NES_ARP_RESOLVE) << 16);
 
 	nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
 		jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
@@ -2941,7 +3180,7 @@
 	crc_value = get_crc_value(&nes_quad);
 	nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
 	nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
-		nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
+		  nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
 
 	nesqp->hte_index &= adapter->hte_index_mask;
 	nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
@@ -2949,17 +3188,15 @@
 	cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
 
 	nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
-			"0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
-			"private data length=%zu.\n", nesqp->hwqp.qp_id,
-			ntohl(cm_id->remote_addr.sin_addr.s_addr),
-			ntohs(cm_id->remote_addr.sin_port),
-			ntohl(cm_id->local_addr.sin_addr.s_addr),
-			ntohs(cm_id->local_addr.sin_port),
-			le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
-			le32_to_cpu(nesqp->nesqp_context->snd_nxt),
-			conn_param->private_data_len +
-			sizeof(struct ietf_mpa_frame));
-
+		  "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
+		  "private data length=%u.\n", nesqp->hwqp.qp_id,
+		  ntohl(cm_id->remote_addr.sin_addr.s_addr),
+		  ntohs(cm_id->remote_addr.sin_port),
+		  ntohl(cm_id->local_addr.sin_addr.s_addr),
+		  ntohs(cm_id->local_addr.sin_port),
+		  le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
+		  le32_to_cpu(nesqp->nesqp_context->snd_nxt),
+		  buff_len);
 
 	/* notify OF layer that accept event was successful */
 	cm_id->add_ref(cm_id);
@@ -2980,12 +3217,12 @@
 			nesqp->private_data_len;
 		/* copy entire MPA frame to our cm_node's frame */
 		memcpy(cm_node->loopbackpartner->mpa_frame_buf,
-			nesqp->ietf_frame->priv_data, nesqp->private_data_len);
+		       conn_param->private_data, conn_param->private_data_len);
 		create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
 	}
 	if (ret)
 		printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
-			"ret=%d\n", __func__, __LINE__, ret);
+		       "ret=%d\n", __func__, __LINE__, ret);
 
 	return 0;
 }
@@ -2998,34 +3235,28 @@
 {
 	struct nes_cm_node *cm_node;
 	struct nes_cm_node *loopback;
-
 	struct nes_cm_core *cm_core;
+	u8 *start_buff;
 
 	atomic_inc(&cm_rejects);
-	cm_node = (struct nes_cm_node *) cm_id->provider_data;
+	cm_node = (struct nes_cm_node *)cm_id->provider_data;
 	loopback = cm_node->loopbackpartner;
 	cm_core = cm_node->cm_core;
 	cm_node->cm_id = cm_id;
-	cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len;
 
-	if (cm_node->mpa_frame_size > MAX_CM_BUFFER)
+	if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER)
 		return -EINVAL;
 
-	memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
 	if (loopback) {
 		memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
 		loopback->mpa_frame.priv_data_len = pdata_len;
-		loopback->mpa_frame_size = sizeof(struct ietf_mpa_frame) +
-				pdata_len;
+		loopback->mpa_frame_size = pdata_len;
 	} else {
-		memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len);
-		cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len);
+		start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2);
+		cm_node->mpa_frame_size = pdata_len;
+		memcpy(start_buff, pdata, pdata_len);
 	}
-
-	cm_node->mpa_frame.rev = mpa_version;
-	cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT;
-
-	return cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node);
+	return cm_core->api->reject(cm_core, cm_node);
 }
 
 
@@ -3052,7 +3283,7 @@
 	nesvnic = to_nesvnic(nesqp->ibqp.device);
 	if (!nesvnic)
 		return -EINVAL;
-	nesdev  = nesvnic->nesdev;
+	nesdev = nesvnic->nesdev;
 	if (!nesdev)
 		return -EINVAL;
 
@@ -3060,12 +3291,12 @@
 		return -EINVAL;
 
 	nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
-		"0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
-		ntohl(nesvnic->local_ipaddr),
-		ntohl(cm_id->remote_addr.sin_addr.s_addr),
-		ntohs(cm_id->remote_addr.sin_port),
-		ntohl(cm_id->local_addr.sin_addr.s_addr),
-		ntohs(cm_id->local_addr.sin_port));
+		  "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
+		  ntohl(nesvnic->local_ipaddr),
+		  ntohl(cm_id->remote_addr.sin_addr.s_addr),
+		  ntohs(cm_id->remote_addr.sin_port),
+		  ntohl(cm_id->local_addr.sin_addr.s_addr),
+		  ntohs(cm_id->local_addr.sin_port));
 
 	atomic_inc(&cm_connects);
 	nesqp->active_conn = 1;
@@ -3079,12 +3310,12 @@
 	nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
 	nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
 	nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
-		conn_param->private_data_len);
+		  conn_param->private_data_len);
 
 	if (cm_id->local_addr.sin_addr.s_addr !=
-		cm_id->remote_addr.sin_addr.s_addr) {
+	    cm_id->remote_addr.sin_addr.s_addr) {
 		nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
-			PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+				 PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
 		apbvt_set = 1;
 	}
 
@@ -3100,13 +3331,13 @@
 
 	/* create a connect CM node connection */
 	cm_node = g_cm_core->api->connect(g_cm_core, nesvnic,
-		conn_param->private_data_len, (void *)conn_param->private_data,
-		&cm_info);
+					  conn_param->private_data_len, (void *)conn_param->private_data,
+					  &cm_info);
 	if (!cm_node) {
 		if (apbvt_set)
 			nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
-				PCI_FUNC(nesdev->pcidev->devfn),
-				NES_MANAGE_APBVT_DEL);
+					 PCI_FUNC(nesdev->pcidev->devfn),
+					 NES_MANAGE_APBVT_DEL);
 
 		cm_id->rem_ref(cm_id);
 		return -ENOMEM;
@@ -3156,7 +3387,7 @@
 	cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
 	if (!cm_node) {
 		printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
-				__func__, __LINE__);
+		       __func__, __LINE__);
 		return -ENOMEM;
 	}
 
@@ -3164,12 +3395,12 @@
 
 	if (!cm_node->reused_node) {
 		err = nes_manage_apbvt(nesvnic,
-			ntohs(cm_id->local_addr.sin_port),
-			PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
-			NES_MANAGE_APBVT_ADD);
+				       ntohs(cm_id->local_addr.sin_port),
+				       PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
+				       NES_MANAGE_APBVT_ADD);
 		if (err) {
 			printk(KERN_ERR "nes_manage_apbvt call returned %d.\n",
-				err);
+			       err);
 			g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
 			return err;
 		}
@@ -3206,13 +3437,13 @@
 int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
 {
 	int rc = 0;
+
 	cm_packets_received++;
-	if ((g_cm_core) && (g_cm_core->api)) {
+	if ((g_cm_core) && (g_cm_core->api))
 		rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
-	} else {
+	else
 		nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
-				" cm is not setup properly.\n");
-	}
+			  " cm is not setup properly.\n");
 
 	return rc;
 }
@@ -3227,11 +3458,10 @@
 	nes_debug(NES_DBG_CM, "\n");
 	/* create the primary CM core, pass this handle to subsequent core inits */
 	g_cm_core = nes_cm_alloc_core();
-	if (g_cm_core) {
+	if (g_cm_core)
 		return 0;
-	} else {
+	else
 		return -ENOMEM;
-	}
 }
 
 
@@ -3252,7 +3482,6 @@
  */
 static void cm_event_connected(struct nes_cm_event *event)
 {
-	u64 u64temp;
 	struct nes_qp *nesqp;
 	struct nes_vnic *nesvnic;
 	struct nes_device *nesdev;
@@ -3261,7 +3490,6 @@
 	struct ib_qp_attr attr;
 	struct iw_cm_id *cm_id;
 	struct iw_cm_event cm_event;
-	struct nes_hw_qp_wqe *wqe;
 	struct nes_v4_quad nes_quad;
 	u32 crc_value;
 	int ret;
@@ -3275,17 +3503,16 @@
 	nesdev = nesvnic->nesdev;
 	nesadapter = nesdev->nesadapter;
 
-	if (nesqp->destroyed) {
+	if (nesqp->destroyed)
 		return;
-	}
 	atomic_inc(&cm_connecteds);
 	nes_debug(NES_DBG_CM, "QP%u attempting to connect to  0x%08X:0x%04X on"
-			" local port 0x%04X. jiffies = %lu.\n",
-			nesqp->hwqp.qp_id,
-			ntohl(cm_id->remote_addr.sin_addr.s_addr),
-			ntohs(cm_id->remote_addr.sin_port),
-			ntohs(cm_id->local_addr.sin_port),
-			jiffies);
+		  " local port 0x%04X. jiffies = %lu.\n",
+		  nesqp->hwqp.qp_id,
+		  ntohl(cm_id->remote_addr.sin_addr.s_addr),
+		  ntohs(cm_id->remote_addr.sin_port),
+		  ntohs(cm_id->local_addr.sin_port),
+		  jiffies);
 
 	nes_cm_init_tsa_conn(nesqp, cm_node);
 
@@ -3316,40 +3543,12 @@
 			NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
 
 	/* Adjust tail for not having a LSMM */
-	nesqp->hwqp.sq_tail = 1;
+	/*nesqp->hwqp.sq_tail = 1;*/
 
-#if defined(NES_SEND_FIRST_WRITE)
-	if (cm_node->send_write0) {
-		nes_debug(NES_DBG_CM, "Sending first write.\n");
-		wqe = &nesqp->hwqp.sq_vbase[0];
-		u64temp = (unsigned long)nesqp;
-		u64temp |= NES_SW_CONTEXT_ALIGN>>1;
-		set_wqe_64bit_value(wqe->wqe_words,
-				NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
-		wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
-			cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
-		wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
-		wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
-		wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
-		wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
-		wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
+	build_rdma0_msg(cm_node, &nesqp);
 
-		if (nesqp->sq_kmapped) {
-			nesqp->sq_kmapped = 0;
-			kunmap(nesqp->page);
-		}
-
-		/* use the reserved spot on the WQ for the extra first WQE */
-		nesqp->nesqp_context->ird_ord_sizes &=
-			cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
-						NES_QPCONTEXT_ORDIRD_WRPDU |
-						NES_QPCONTEXT_ORDIRD_ALSMM));
-		nesqp->skip_lsmm = 1;
-		nesqp->hwqp.sq_tail = 0;
-		nes_write32(nesdev->regs + NES_WQE_ALLOC,
-				(1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-	}
-#endif
+	nes_write32(nesdev->regs + NES_WQE_ALLOC,
+		    (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
 
 	memset(&nes_quad, 0, sizeof(nes_quad));
 
@@ -3366,13 +3565,13 @@
 	crc_value = get_crc_value(&nes_quad);
 	nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
 	nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n",
-			nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
+		  nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
 
 	nesqp->hte_index &= nesadapter->hte_index_mask;
 	nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
 
 	nesqp->ietf_frame = &cm_node->mpa_frame;
-	nesqp->private_data_len = (u8) cm_node->mpa_frame_size;
+	nesqp->private_data_len = (u8)cm_node->mpa_frame_size;
 	cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
 
 	/* notify OF layer we successfully created the requested connection */
@@ -3384,7 +3583,9 @@
 	cm_event.remote_addr = cm_id->remote_addr;
 
 	cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
-	cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size;
+	cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size;
+	cm_event.ird = cm_node->ird_size;
+	cm_event.ord = cm_node->ord_size;
 
 	cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
 	ret = cm_id->event_handler(cm_id, &cm_event);
@@ -3392,12 +3593,12 @@
 
 	if (ret)
 		printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
-			"ret=%d\n", __func__, __LINE__, ret);
+		       "ret=%d\n", __func__, __LINE__, ret);
 	attr.qp_state = IB_QPS_RTS;
 	nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
 
 	nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = "
-		"%lu\n", nesqp->hwqp.qp_id, jiffies);
+		  "%lu\n", nesqp->hwqp.qp_id, jiffies);
 
 	return;
 }
@@ -3418,16 +3619,14 @@
 		return;
 
 	cm_id = event->cm_node->cm_id;
-	if (!cm_id) {
+	if (!cm_id)
 		return;
-	}
 
 	nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id);
 	nesqp = cm_id->provider_data;
 
-	if (!nesqp) {
+	if (!nesqp)
 		return;
-	}
 
 	/* notify OF layer about this connection error event */
 	/* cm_id->rem_ref(cm_id); */
@@ -3442,14 +3641,14 @@
 	cm_event.private_data_len = 0;
 
 	nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, "
-		"remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
-		cm_event.remote_addr.sin_addr.s_addr);
+		  "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
+		  cm_event.remote_addr.sin_addr.s_addr);
 
 	ret = cm_id->event_handler(cm_id, &cm_event);
 	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 	if (ret)
 		printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
-			"ret=%d\n", __func__, __LINE__, ret);
+		       "ret=%d\n", __func__, __LINE__, ret);
 	cm_id->rem_ref(cm_id);
 
 	rem_ref_cm_node(event->cm_node->cm_core, event->cm_node);
@@ -3519,7 +3718,7 @@
  */
 static void cm_event_mpa_req(struct nes_cm_event *event)
 {
-	struct iw_cm_id   *cm_id;
+	struct iw_cm_id *cm_id;
 	struct iw_cm_event cm_event;
 	int ret;
 	struct nes_cm_node *cm_node;
@@ -3531,7 +3730,7 @@
 
 	atomic_inc(&cm_connect_reqs);
 	nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
-			cm_node, cm_id, jiffies);
+		  cm_node, cm_id, jiffies);
 
 	cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
 	cm_event.status = 0;
@@ -3545,19 +3744,21 @@
 	cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
 	cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
 	cm_event.private_data = cm_node->mpa_frame_buf;
-	cm_event.private_data_len  = (u8) cm_node->mpa_frame_size;
+	cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
+	cm_event.ird = cm_node->ird_size;
+	cm_event.ord = cm_node->ord_size;
 
 	ret = cm_id->event_handler(cm_id, &cm_event);
 	if (ret)
 		printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
-				__func__, __LINE__, ret);
+		       __func__, __LINE__, ret);
 	return;
 }
 
 
 static void cm_event_mpa_reject(struct nes_cm_event *event)
 {
-	struct iw_cm_id   *cm_id;
+	struct iw_cm_id *cm_id;
 	struct iw_cm_event cm_event;
 	struct nes_cm_node *cm_node;
 	int ret;
@@ -3569,7 +3770,7 @@
 
 	atomic_inc(&cm_connect_reqs);
 	nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
-			cm_node, cm_id, jiffies);
+		  cm_node, cm_id, jiffies);
 
 	cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
 	cm_event.status = -ECONNREFUSED;
@@ -3584,17 +3785,17 @@
 	cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
 
 	cm_event.private_data = cm_node->mpa_frame_buf;
-	cm_event.private_data_len = (u8) cm_node->mpa_frame_size;
+	cm_event.private_data_len = (u8)cm_node->mpa_frame_size;
 
 	nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, "
-			"remove_addr=%08x\n",
-			cm_event.local_addr.sin_addr.s_addr,
-			cm_event.remote_addr.sin_addr.s_addr);
+		  "remove_addr=%08x\n",
+		  cm_event.local_addr.sin_addr.s_addr,
+		  cm_event.remote_addr.sin_addr.s_addr);
 
 	ret = cm_id->event_handler(cm_id, &cm_event);
 	if (ret)
 		printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
-				__func__, __LINE__, ret);
+		       __func__, __LINE__, ret);
 
 	return;
 }
@@ -3613,7 +3814,7 @@
 	event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
 	INIT_WORK(&event->event_work, nes_cm_event_handler);
 	nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n",
-		event->cm_node, event);
+		  event->cm_node, event);
 
 	queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
 
@@ -3630,7 +3831,7 @@
 static void nes_cm_event_handler(struct work_struct *work)
 {
 	struct nes_cm_event *event = container_of(work, struct nes_cm_event,
-			event_work);
+						  event_work);
 	struct nes_cm_core *cm_core;
 
 	if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core))
@@ -3638,29 +3839,29 @@
 
 	cm_core = event->cm_node->cm_core;
 	nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
-		event, event->type, atomic_read(&cm_core->events_posted));
+		  event, event->type, atomic_read(&cm_core->events_posted));
 
 	switch (event->type) {
 	case NES_CM_EVENT_MPA_REQ:
 		cm_event_mpa_req(event);
 		nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n",
-			event->cm_node);
+			  event->cm_node);
 		break;
 	case NES_CM_EVENT_RESET:
 		nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n",
-			event->cm_node);
+			  event->cm_node);
 		cm_event_reset(event);
 		break;
 	case NES_CM_EVENT_CONNECTED:
 		if ((!event->cm_node->cm_id) ||
-			(event->cm_node->state != NES_CM_STATE_TSA))
+		    (event->cm_node->state != NES_CM_STATE_TSA))
 			break;
 		cm_event_connected(event);
 		nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
 		break;
 	case NES_CM_EVENT_MPA_REJECT:
 		if ((!event->cm_node->cm_id) ||
-				(event->cm_node->state == NES_CM_STATE_TSA))
+		    (event->cm_node->state == NES_CM_STATE_TSA))
 			break;
 		cm_event_mpa_reject(event);
 		nes_debug(NES_DBG_CM, "CM Event: REJECT\n");
@@ -3668,7 +3869,7 @@
 
 	case NES_CM_EVENT_ABORTED:
 		if ((!event->cm_node->cm_id) ||
-			(event->cm_node->state == NES_CM_STATE_TSA))
+		    (event->cm_node->state == NES_CM_STATE_TSA))
 			break;
 		cm_event_connect_error(event);
 		nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index d9825fd..bdfa1fb 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -48,7 +48,16 @@
 #define IETF_MPA_KEY_SIZE 16
 #define IETF_MPA_VERSION  1
 #define IETF_MAX_PRIV_DATA_LEN 512
-#define IETF_MPA_FRAME_SIZE     20
+#define IETF_MPA_FRAME_SIZE    20
+#define IETF_RTR_MSG_SIZE      4
+#define IETF_MPA_V2_FLAG       0x10
+
+/* IETF RTR MSG Fields               */
+#define IETF_PEER_TO_PEER       0x8000
+#define IETF_FLPDU_ZERO_LEN     0x4000
+#define IETF_RDMA0_WRITE        0x8000
+#define IETF_RDMA0_READ         0x4000
+#define IETF_NO_IRD_ORD         0x3FFF
 
 enum ietf_mpa_flags {
 	IETF_MPA_FLAGS_MARKERS = 0x80,	/* receive Markers */
@@ -56,7 +65,7 @@
 	IETF_MPA_FLAGS_REJECT  = 0x20,	/* Reject */
 };
 
-struct ietf_mpa_frame {
+struct ietf_mpa_v1 {
 	u8 key[IETF_MPA_KEY_SIZE];
 	u8 flags;
 	u8 rev;
@@ -66,6 +75,20 @@
 
 #define ietf_mpa_req_resp_frame ietf_mpa_frame
 
+struct ietf_rtr_msg {
+	__be16 ctrl_ird;
+	__be16 ctrl_ord;
+};
+
+struct ietf_mpa_v2 {
+	u8 key[IETF_MPA_KEY_SIZE];
+	u8 flags;
+	u8 rev;
+	 __be16 priv_data_len;
+	struct ietf_rtr_msg rtr_msg;
+	u8 priv_data[0];
+};
+
 struct nes_v4_quad {
 	u32 rsvd0;
 	__le32 DstIpAdrIndex;	/* Only most significant 5 bits are valid */
@@ -171,8 +194,7 @@
 
 #define NES_CM_DEF_SEQ2      0x18ed5740
 #define NES_CM_DEF_LOCAL_ID2 0xb807
-#define	MAX_CM_BUFFER	(IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
-
+#define	MAX_CM_BUFFER	(IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN)
 
 typedef u32 nes_addr_t;
 
@@ -204,6 +226,21 @@
 	NES_CM_STATE_CLOSED
 };
 
+enum mpa_frame_version {
+	IETF_MPA_V1 = 1,
+	IETF_MPA_V2 = 2
+};
+
+enum mpa_frame_key {
+	MPA_KEY_REQUEST,
+	MPA_KEY_REPLY
+};
+
+enum send_rdma0 {
+	SEND_RDMA_READ_ZERO = 1,
+	SEND_RDMA_WRITE_ZERO = 2
+};
+
 enum nes_tcpip_pkt_type {
 	NES_PKT_TYPE_UNKNOWN,
 	NES_PKT_TYPE_SYN,
@@ -245,9 +282,9 @@
 
 
 enum nes_cm_listener_state {
-	NES_CM_LISTENER_PASSIVE_STATE=1,
-	NES_CM_LISTENER_ACTIVE_STATE=2,
-	NES_CM_LISTENER_EITHER_STATE=3
+	NES_CM_LISTENER_PASSIVE_STATE = 1,
+	NES_CM_LISTENER_ACTIVE_STATE = 2,
+	NES_CM_LISTENER_EITHER_STATE = 3
 };
 
 struct nes_cm_listener {
@@ -283,16 +320,20 @@
 
 	struct nes_cm_node        *loopbackpartner;
 
-	struct nes_timer_entry	*send_entry;
-
+	struct nes_timer_entry	  *send_entry;
+	struct nes_timer_entry    *recv_entry;
 	spinlock_t                retrans_list_lock;
-	struct nes_timer_entry  *recv_entry;
+	enum send_rdma0           send_rdma0_op;
 
-	int                       send_write0;
 	union {
-		struct ietf_mpa_frame mpa_frame;
-		u8                    mpa_frame_buf[MAX_CM_BUFFER];
+		struct ietf_mpa_v1 mpa_frame;
+		struct ietf_mpa_v2 mpa_v2_frame;
+		u8                 mpa_frame_buf[MAX_CM_BUFFER];
 	};
+	enum mpa_frame_version    mpa_frame_rev;
+	u16			  ird_size;
+	u16                       ord_size;
+
 	u16                       mpa_frame_size;
 	struct iw_cm_id           *cm_id;
 	struct list_head          list;
@@ -399,10 +440,8 @@
 			struct nes_vnic *, u16, void *,
 			struct nes_cm_info *);
 	int (*close)(struct nes_cm_core *, struct nes_cm_node *);
-	int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *,
-			struct nes_cm_node *);
-	int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
-			struct nes_cm_node *);
+	int (*accept)(struct nes_cm_core *, struct nes_cm_node *);
+	int (*reject)(struct nes_cm_core *, struct nes_cm_node *);
 	int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
 			struct sk_buff *);
 	int (*destroy_cm_core)(struct nes_cm_core *);
@@ -422,5 +461,7 @@
 int nes_cm_recv(struct sk_buff *, struct net_device *);
 int nes_cm_start(void);
 int nes_cm_stop(void);
+int nes_add_ref_cm_node(struct nes_cm_node *cm_node);
+int nes_rem_ref_cm_node(struct nes_cm_node *cm_node);
 
 #endif			/* NES_CM_H */
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index be36cbe..7c0ff19 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -110,6 +110,14 @@
 };
 #endif
 
+static inline void print_ip(struct nes_cm_node *cm_node)
+{
+	unsigned char *rem_addr;
+	if (cm_node) {
+		rem_addr = (unsigned char *)&cm_node->rem_addr;
+		printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr);
+	}
+}
 
 /**
  * nes_nic_init_timer_defaults
@@ -1555,6 +1563,7 @@
 	struct nes_hw_nic_rq_wqe *nic_rqe;
 	struct nes_hw_nic *nesnic;
 	struct nes_device *nesdev;
+	struct nes_rskb_cb *cb;
 	u32 rx_wqes_posted = 0;
 
 	nesnic = &nesvnic->nic;
@@ -1580,6 +1589,9 @@
 
 			bus_address = pci_map_single(nesdev->pcidev,
 					skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+			cb = (struct nes_rskb_cb *)&skb->cb[0];
+			cb->busaddr = bus_address;
+			cb->maplen = nesvnic->max_frame_size;
 
 			nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head];
 			nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
@@ -1669,6 +1681,7 @@
 	u32 cqp_head;
 	u32 counter;
 	u32 wqe_count;
+	struct nes_rskb_cb *cb;
 	u8 jumbomode=0;
 
 	/* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */
@@ -1845,6 +1858,9 @@
 
 		pmem = pci_map_single(nesdev->pcidev, skb->data,
 				nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+		cb = (struct nes_rskb_cb *)&skb->cb[0];
+		cb->busaddr = pmem;
+		cb->maplen = nesvnic->max_frame_size;
 
 		nic_rqe = &nesvnic->nic.rq_vbase[counter];
 		nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
@@ -1873,6 +1889,13 @@
 			jumbomode = 1;
 		nes_nic_init_timer_defaults(nesdev, jumbomode);
 	}
+	if ((nesdev->nesadapter->allow_unaligned_fpdus) &&
+		(nes_init_mgt_qp(nesdev, netdev, nesvnic))) {
+			nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", netdev->name);
+			nes_destroy_nic_qp(nesvnic);
+		return -ENOMEM;
+	}
+
 	nesvnic->lro_mgr.max_aggr       = nes_lro_max_aggr;
 	nesvnic->lro_mgr.max_desc       = NES_MAX_LRO_DESCRIPTORS;
 	nesvnic->lro_mgr.lro_arr        = nesvnic->lro_desc;
@@ -1895,28 +1918,29 @@
 	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_hw_cqp_wqe *cqp_wqe;
 	struct nes_hw_nic_sq_wqe *nic_sqe;
-	struct nes_hw_nic_rq_wqe *nic_rqe;
 	__le16 *wqe_fragment_length;
 	u16  wqe_fragment_index;
-	u64 wqe_frag;
 	u32 cqp_head;
 	u32 wqm_cfg0;
 	unsigned long flags;
+	struct sk_buff *rx_skb;
+	struct nes_rskb_cb *cb;
 	int ret;
 
+	if (nesdev->nesadapter->allow_unaligned_fpdus)
+		nes_destroy_mgt(nesvnic);
+
 	/* clear wqe stall before destroying NIC QP */
 	wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0);
 	nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF);
 
 	/* Free remaining NIC receive buffers */
 	while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
-		nic_rqe   = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
-		wqe_frag  = (u64)le32_to_cpu(
-			nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
-		wqe_frag |= ((u64)le32_to_cpu(
-			nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX]))<<32;
-		pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
-				nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+		rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail];
+		cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
+		pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen,
+			PCI_DMA_FROMDEVICE);
+
 		dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]);
 		nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1);
 	}
@@ -2775,6 +2799,7 @@
 	struct nes_hw_nic_sq_wqe *nic_sqe;
 	struct sk_buff *skb;
 	struct sk_buff *rx_skb;
+	struct nes_rskb_cb *cb;
 	__le16 *wqe_fragment_length;
 	u32 head;
 	u32 cq_size;
@@ -2859,6 +2884,8 @@
 				bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
 				pci_unmap_single(nesdev->pcidev, bus_address,
 						nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+				cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
+				cb->busaddr = 0;
 				/* rx_skb->tail = rx_skb->data + rx_pkt_size; */
 				/* rx_skb->len = rx_pkt_size; */
 				rx_skb->len = 0;  /* TODO: see if this is necessary */
@@ -2983,6 +3010,7 @@
 }
 
 
+
 /**
  * nes_cqp_ce_handler
  */
@@ -2997,6 +3025,8 @@
 	u32 cq_size;
 	u32 cqe_count=0;
 	u32 error_code;
+	u32 opcode;
+	u32 ctx_index;
 	/* u32 counter; */
 
 	head = cq->cq_head;
@@ -3007,12 +3037,9 @@
 		/* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head,
 			  le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */
 
-		if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
-			u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
-					cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
-					((u64)(le32_to_cpu(cq->cq_vbase[head].
-					cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
-			cqp = *((struct nes_hw_cqp **)&u64temp);
+		opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]);
+		if (opcode & NES_CQE_VALID) {
+			cqp = &nesdev->cqp;
 
 			error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]);
 			if (error_code) {
@@ -3021,15 +3048,14 @@
 						le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f,
 						(u16)(error_code >> 16),
 						(u16)error_code);
-				nes_debug(NES_DBG_CQP, "cqp: qp_id=%u, sq_head=%u, sq_tail=%u\n",
-						cqp->qp_id, cqp->sq_head, cqp->sq_tail);
 			}
 
-			u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
-					wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
-					((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
-					wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX])));
-			cqp_request = *((struct nes_cqp_request **)&u64temp);
+			u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
+					cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
+					((u64)(le32_to_cpu(cq->cq_vbase[head].
+					cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
+
+			cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp;
 			if (cqp_request) {
 				if (cqp_request->waiting) {
 					/* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */
@@ -3075,9 +3101,15 @@
 		cqp_wqe = &nesdev->cqp.sq_vbase[head];
 		memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
 		barrier();
-		cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] =
+
+		opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX];
+		if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
+			ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
+		else
+			ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
+		cqp_wqe->wqe_words[ctx_index] =
 			cpu_to_le32((u32)((unsigned long)cqp_request));
-		cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] =
+		cqp_wqe->wqe_words[ctx_index + 1] =
 			cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request)));
 		nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n",
 				cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head);
@@ -3093,7 +3125,6 @@
 	nes_read32(nesdev->regs+NES_CQE_ALLOC);
 }
 
-
 static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
 {
 	if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
@@ -3553,9 +3584,9 @@
 
 	aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
 	if (aeq_info & NES_AEQE_QP) {
-		if ((!nes_is_resource_allocated(nesadapter, nesadapter->allocated_qps,
-				aeqe_cq_id)) ||
-				(atomic_read(&nesqp->close_timer_started)))
+		if (!nes_is_resource_allocated(nesadapter,
+				nesadapter->allocated_qps,
+				aeqe_cq_id))
 			return;
 	}
 
@@ -3566,8 +3597,7 @@
 
 			if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
 				if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) &&
-					(nesqp->ibqp_state == IB_QPS_RTS) &&
-					((nesadapter->eeprom_version >> 16) != NES_A0)) {
+					(nesqp->ibqp_state == IB_QPS_RTS)) {
 					spin_lock_irqsave(&nesqp->lock, flags);
 					nesqp->hw_iwarp_state = iwarp_state;
 					nesqp->hw_tcp_state = tcp_state;
@@ -3594,9 +3624,10 @@
 				return;
 			}
 			spin_lock_irqsave(&nesqp->lock, flags);
-			nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
+			nesqp->hw_iwarp_state = iwarp_state;
+			nesqp->hw_tcp_state = tcp_state;
+			nesqp->last_aeq = async_event_id;
 			spin_unlock_irqrestore(&nesqp->lock, flags);
-			nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_CLOSING, 0, 0);
 			nes_cm_disconn(nesqp);
 			break;
 
@@ -3694,7 +3725,9 @@
 		case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
 			printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
 					nesqp->hwqp.qp_id, async_event_id);
-			nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
+			print_ip(nesqp->cm_node);
+			if (!atomic_read(&nesqp->close_timer_started))
+				nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
 			break;
 
 		case NES_AEQE_AEID_CQ_OPERATION_ERROR:
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index c324147..0b590e1 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -47,6 +47,11 @@
 #define NES_MULTICAST_PF_MAX 8
 #define NES_A0 3
 
+#define NES_ENABLE_PAU 0x07000001
+#define NES_DISABLE_PAU 0x07000000
+#define NES_PAU_COUNTER 10
+#define NES_CQP_OPCODE_MASK 0x3f
+
 enum pci_regs {
 	NES_INT_STAT = 0x0000,
 	NES_INT_MASK = 0x0004,
@@ -73,8 +78,10 @@
 	NES_IDX_QP_CONTROL = 0x0040,
 	NES_IDX_FLM_CONTROL = 0x0080,
 	NES_IDX_INT_CPU_STATUS = 0x00a0,
+	NES_IDX_GPR_TRIGGER = 0x00bc,
 	NES_IDX_GPIO_CONTROL = 0x00f0,
 	NES_IDX_GPIO_DATA = 0x00f4,
+	NES_IDX_GPR2 = 0x010c,
 	NES_IDX_TCP_CONFIG0 = 0x01e4,
 	NES_IDX_TCP_TIMER_CONFIG = 0x01ec,
 	NES_IDX_TCP_NOW = 0x01f0,
@@ -202,6 +209,7 @@
 	NES_CQP_REGISTER_SHARED_STAG = 0x0c,
 	NES_CQP_DEALLOCATE_STAG = 0x0d,
 	NES_CQP_MANAGE_ARP_CACHE = 0x0f,
+	NES_CQP_DOWNLOAD_SEGMENT = 0x10,
 	NES_CQP_SUSPEND_QPS = 0x11,
 	NES_CQP_UPLOAD_CONTEXT = 0x13,
 	NES_CQP_CREATE_CEQ = 0x16,
@@ -210,7 +218,8 @@
 	NES_CQP_DESTROY_AEQ = 0x1b,
 	NES_CQP_LMI_ACCESS = 0x20,
 	NES_CQP_FLUSH_WQES = 0x22,
-	NES_CQP_MANAGE_APBVT = 0x23
+	NES_CQP_MANAGE_APBVT = 0x23,
+	NES_CQP_MANAGE_QUAD_HASH = 0x25
 };
 
 enum nes_cqp_wqe_word_idx {
@@ -222,6 +231,14 @@
 	NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5,
 };
 
+enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */
+	NES_CQP_WQE_DL_OPCODE_IDX = 0,
+	NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1,
+	NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2,
+	NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3
+	/* For index values 4-15 use NES_NIC_SQ_WQE_ values */
+};
+
 enum nes_cqp_cq_wqeword_idx {
 	NES_CQP_CQ_WQE_PBL_LOW_IDX = 6,
 	NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7,
@@ -242,6 +259,7 @@
 	NES_CQP_STAG_WQE_PBL_LEN_IDX = 14
 };
 
+#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26
 #define NES_CQP_OP_IWARP_STATE_SHIFT 28
 #define NES_CQP_OP_TERMLEN_SHIFT     28
 
@@ -599,6 +617,7 @@
 
 enum nes_nic_cqe_word_idx {
 	NES_NIC_CQE_ACCQP_ID_IDX = 0,
+	NES_NIC_CQE_HASH_RCVNXT = 1,
 	NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2,
 	NES_NIC_CQE_MISC_IDX = 3,
 };
@@ -1005,6 +1024,11 @@
 #define NES_NIC_CQ_DOWNWARD_TREND   16
 #define NES_PFT_SIZE		    48
 
+#define NES_MGT_WQ_COUNT 32
+#define NES_MGT_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_32) | (NES_NIC_CTX_SQ_SIZE_32))
+#define NES_MGT_QP_OFFSET 36
+#define NES_MGT_QP_COUNT 4
+
 struct nes_hw_tune_timer {
     /* u16 cq_count; */
     u16 threshold_low;
@@ -1118,6 +1142,7 @@
 	u32 et_rate_sample_interval;
 	u32 timer_int_limit;
 	u32 wqm_quanta;
+	u8 allow_unaligned_fpdus;
 
 	/* Adapter base MAC address */
 	u32 mac_addr_low;
@@ -1251,6 +1276,14 @@
 	enum ib_event_type delayed_event;
 	enum ib_event_type last_dispatched_event;
 	spinlock_t port_ibevent_lock;
+	u32 mgt_mem_size;
+	void *mgt_vbase;
+	dma_addr_t mgt_pbase;
+	struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT];
+	struct task_struct *mgt_thread;
+	wait_queue_head_t mgt_wait_queue;
+	struct sk_buff_head mgt_skb_list;
+
 };
 
 struct nes_ib_device {
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
new file mode 100644
index 0000000..b3b2a24
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -0,0 +1,1162 @@
+/*
+ * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/kthread.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+#include "nes.h"
+#include "nes_mgt.h"
+
+atomic_t pau_qps_created;
+atomic_t pau_qps_destroyed;
+
+static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic)
+{
+	unsigned long flags;
+	dma_addr_t bus_address;
+	struct sk_buff *skb;
+	struct nes_hw_nic_rq_wqe *nic_rqe;
+	struct nes_hw_mgt *nesmgt;
+	struct nes_device *nesdev;
+	struct nes_rskb_cb *cb;
+	u32 rx_wqes_posted = 0;
+
+	nesmgt = &mgtvnic->mgt;
+	nesdev = mgtvnic->nesvnic->nesdev;
+	spin_lock_irqsave(&nesmgt->rq_lock, flags);
+	if (nesmgt->replenishing_rq != 0) {
+		if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
+		    (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
+			atomic_set(&mgtvnic->rx_skb_timer_running, 1);
+			spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+			mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2);      /* 1/2 second */
+			add_timer(&mgtvnic->rq_wqes_timer);
+		} else {
+			spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+		}
+		return;
+	}
+	nesmgt->replenishing_rq = 1;
+	spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+	do {
+		skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size);
+		if (skb) {
+			skb->dev = mgtvnic->nesvnic->netdev;
+
+			bus_address = pci_map_single(nesdev->pcidev,
+						     skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+			cb = (struct nes_rskb_cb *)&skb->cb[0];
+			cb->busaddr = bus_address;
+			cb->maplen = mgtvnic->nesvnic->max_frame_size;
+
+			nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head];
+			nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
+				cpu_to_le32(mgtvnic->nesvnic->max_frame_size);
+			nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
+			nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
+				cpu_to_le32((u32)bus_address);
+			nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
+				cpu_to_le32((u32)((u64)bus_address >> 32));
+			nesmgt->rx_skb[nesmgt->rq_head] = skb;
+			nesmgt->rq_head++;
+			nesmgt->rq_head &= nesmgt->rq_size - 1;
+			atomic_dec(&mgtvnic->rx_skbs_needed);
+			barrier();
+			if (++rx_wqes_posted == 255) {
+				nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
+				rx_wqes_posted = 0;
+			}
+		} else {
+			spin_lock_irqsave(&nesmgt->rq_lock, flags);
+			if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
+			    (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
+				atomic_set(&mgtvnic->rx_skb_timer_running, 1);
+				spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+				mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2);      /* 1/2 second */
+				add_timer(&mgtvnic->rq_wqes_timer);
+			} else {
+				spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
+			}
+			break;
+		}
+	} while (atomic_read(&mgtvnic->rx_skbs_needed));
+	barrier();
+	if (rx_wqes_posted)
+		nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
+	nesmgt->replenishing_rq = 0;
+}
+
+/**
+ * nes_mgt_rq_wqes_timeout
+ */
+static void nes_mgt_rq_wqes_timeout(unsigned long parm)
+{
+	struct nes_vnic_mgt *mgtvnic = (struct nes_vnic_mgt *)parm;
+
+	atomic_set(&mgtvnic->rx_skb_timer_running, 0);
+	if (atomic_read(&mgtvnic->rx_skbs_needed))
+		nes_replenish_mgt_rq(mgtvnic);
+}
+
+/**
+ * nes_mgt_free_skb - unmap and free skb
+ */
+static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir)
+{
+	struct nes_rskb_cb *cb;
+
+	cb = (struct nes_rskb_cb *)&skb->cb[0];
+	pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir);
+	cb->busaddr = 0;
+	dev_kfree_skb_any(skb);
+}
+
+/**
+ * nes_download_callback - handle download completions
+ */
+static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
+{
+	struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer;
+	struct nes_qp *nesqp = fpdu_info->nesqp;
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < fpdu_info->frag_cnt; i++) {
+		skb = fpdu_info->frags[i].skb;
+		if (fpdu_info->frags[i].cmplt) {
+			nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
+			nes_rem_ref_cm_node(nesqp->cm_node);
+		}
+	}
+
+	if (fpdu_info->hdr_vbase)
+		pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len,
+				    fpdu_info->hdr_vbase, fpdu_info->hdr_pbase);
+	kfree(fpdu_info);
+}
+
+/**
+ * nes_get_seq - Get the seq, ack_seq and window from the packet
+ */
+static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
+{
+	struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0];
+	struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
+	struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+
+	*ack = be32_to_cpu(tcph->ack_seq);
+	*wnd = be16_to_cpu(tcph->window);
+	*fin_rcvd = tcph->fin;
+	*rst_rcvd = tcph->rst;
+	return be32_to_cpu(tcph->seq);
+}
+
+/**
+ * nes_get_next_skb - Get the next skb based on where current skb is in the queue
+ */
+static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp,
+					struct sk_buff *skb, u32 nextseq, u32 *ack,
+					u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
+{
+	u32 seq;
+	bool processacks;
+	struct sk_buff *old_skb;
+
+	if (skb) {
+		/* Continue processing fpdu */
+		if (skb->next == (struct sk_buff *)&nesqp->pau_list)
+			goto out;
+		skb = skb->next;
+		processacks = false;
+	} else {
+		/* Starting a new one */
+		if (skb_queue_empty(&nesqp->pau_list))
+			goto out;
+		skb = skb_peek(&nesqp->pau_list);
+		processacks = true;
+	}
+
+	while (1) {
+		seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd);
+		if (seq == nextseq) {
+			if (skb->len || processacks)
+				break;
+		} else if (after(seq, nextseq)) {
+			goto out;
+		}
+
+		if (skb->next == (struct sk_buff *)&nesqp->pau_list)
+			goto out;
+
+		old_skb = skb;
+		skb = skb->next;
+		skb_unlink(old_skb, &nesqp->pau_list);
+		nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
+		nes_rem_ref_cm_node(nesqp->cm_node);
+	}
+	return skb;
+
+out:
+	return NULL;
+}
+
+/**
+ * get_fpdu_info - Find the next complete fpdu and return its fragments.
+ */
+static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
+			 struct pau_fpdu_info **pau_fpdu_info)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *tcph;
+	struct nes_rskb_cb *cb;
+	struct pau_fpdu_info *fpdu_info = NULL;
+	struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
+	unsigned long flags;
+	u32 fpdu_len = 0;
+	u32 tmp_len;
+	int frag_cnt = 0;
+	u32 tot_len;
+	u32 frag_tot;
+	u32 ack;
+	u32 fin_rcvd;
+	u32 rst_rcvd;
+	u16 wnd;
+	int i;
+	int rc = 0;
+
+	*pau_fpdu_info = NULL;
+
+	spin_lock_irqsave(&nesqp->pau_lock, flags);
+	skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd);
+	if (!skb) {
+		spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+		goto out;
+	}
+	cb = (struct nes_rskb_cb *)&skb->cb[0];
+	if (skb->len) {
+		fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING;
+		fpdu_len = (fpdu_len + 3) & 0xfffffffc;
+		tmp_len = fpdu_len;
+
+		/* See if we have all of the fpdu */
+		frag_tot = 0;
+		memset(&frags, 0, sizeof frags);
+		for (i = 0; i < MAX_FPDU_FRAGS; i++) {
+			frags[i].physaddr = cb->busaddr;
+			frags[i].physaddr += skb->data - cb->data_start;
+			frags[i].frag_len = min(tmp_len, skb->len);
+			frags[i].skb = skb;
+			frags[i].cmplt = (skb->len == frags[i].frag_len);
+			frag_tot += frags[i].frag_len;
+			frag_cnt++;
+
+			tmp_len -= frags[i].frag_len;
+			if (tmp_len == 0)
+				break;
+
+			skb = nes_get_next_skb(nesdev, nesqp, skb,
+					       nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd);
+			if (!skb) {
+				spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+				goto out;
+			} else if (rst_rcvd) {
+				/* rst received in the middle of fpdu */
+				for (; i >= 0; i--) {
+					skb_unlink(frags[i].skb, &nesqp->pau_list);
+					nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE);
+				}
+				cb = (struct nes_rskb_cb *)&skb->cb[0];
+				frags[0].physaddr = cb->busaddr;
+				frags[0].physaddr += skb->data - cb->data_start;
+				frags[0].frag_len = skb->len;
+				frags[0].skb = skb;
+				frags[0].cmplt = true;
+				frag_cnt = 1;
+				break;
+			}
+
+			cb = (struct nes_rskb_cb *)&skb->cb[0];
+		}
+	} else {
+		/* no data */
+		frags[0].physaddr = cb->busaddr;
+		frags[0].frag_len = 0;
+		frags[0].skb = skb;
+		frags[0].cmplt = true;
+		frag_cnt = 1;
+	}
+
+	spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+
+	/* Found one */
+	fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
+	if (fpdu_info == NULL) {
+		nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	fpdu_info->cqp_request = nes_get_cqp_request(nesdev);
+	if (fpdu_info->cqp_request == NULL) {
+		nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0];
+	iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
+	tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+	fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start;
+	fpdu_info->data_len = fpdu_len;
+	tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN;
+
+	if (frags[0].cmplt) {
+		fpdu_info->hdr_pbase = cb->busaddr;
+		fpdu_info->hdr_vbase = NULL;
+	} else {
+		fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev,
+							    fpdu_info->hdr_len, &fpdu_info->hdr_pbase);
+		if (!fpdu_info->hdr_vbase) {
+			nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n");
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		/* Copy hdrs, adjusting len and seqnum */
+		memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len);
+		iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN);
+		tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+	}
+
+	iph->tot_len = cpu_to_be16(tot_len);
+	iph->saddr = cpu_to_be32(0x7f000001);
+
+	tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
+	tcph->ack_seq = cpu_to_be32(ack);
+	tcph->window = cpu_to_be16(wnd);
+
+	nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd;
+
+	memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags));
+	fpdu_info->frag_cnt = frag_cnt;
+	fpdu_info->nesqp = nesqp;
+	*pau_fpdu_info = fpdu_info;
+
+	/* Update skb's for next pass */
+	for (i = 0; i < frag_cnt; i++) {
+		cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0];
+		skb_pull(frags[i].skb, frags[i].frag_len);
+
+		if (frags[i].skb->len == 0) {
+			/* Pull skb off the list - it will be freed in the callback */
+			spin_lock_irqsave(&nesqp->pau_lock, flags);
+			skb_unlink(frags[i].skb, &nesqp->pau_list);
+			spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+		} else {
+			/* Last skb still has data so update the seq */
+			iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
+			tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+			tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
+		}
+	}
+
+out:
+	if (rc) {
+		if (fpdu_info) {
+			if (fpdu_info->cqp_request)
+				nes_put_cqp_request(nesdev, fpdu_info->cqp_request);
+			kfree(fpdu_info);
+		}
+	}
+	return rc;
+}
+
+/**
+ * forward_fpdu - send complete fpdus, one at a time
+ */
+static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct pau_fpdu_info *fpdu_info;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	u64 u64tmp;
+	u32 u32tmp;
+	int rc;
+
+	while (1) {
+		rc = get_fpdu_info(nesdev, nesqp, &fpdu_info);
+		if (fpdu_info == NULL)
+			return rc;
+
+		cqp_request = fpdu_info->cqp_request;
+		cqp_wqe = &cqp_request->cqp_wqe;
+		nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX,
+				    NES_CQP_DOWNLOAD_SEGMENT |
+				    (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT));
+
+		u32tmp = fpdu_info->hdr_len << 16;
+		u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len;
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX,
+				    u32tmp);
+
+		u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len;
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX,
+				    u32tmp);
+
+		u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len;
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX,
+				    u32tmp);
+
+		u64tmp = (u64)fpdu_info->hdr_pbase;
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX,
+				    lower_32_bits(u64tmp));
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX,
+				    upper_32_bits(u64tmp >> 32));
+
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
+				    lower_32_bits(fpdu_info->frags[0].physaddr));
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX,
+				    upper_32_bits(fpdu_info->frags[0].physaddr));
+
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX,
+				    lower_32_bits(fpdu_info->frags[1].physaddr));
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX,
+				    upper_32_bits(fpdu_info->frags[1].physaddr));
+
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX,
+				    lower_32_bits(fpdu_info->frags[2].physaddr));
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX,
+				    upper_32_bits(fpdu_info->frags[2].physaddr));
+
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX,
+				    lower_32_bits(fpdu_info->frags[3].physaddr));
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX,
+				    upper_32_bits(fpdu_info->frags[3].physaddr));
+
+		cqp_request->cqp_callback_pointer = fpdu_info;
+		cqp_request->callback = 1;
+		cqp_request->cqp_callback = nes_download_callback;
+
+		atomic_set(&cqp_request->refcount, 1);
+		nes_post_cqp_request(nesdev, cqp_request);
+	}
+
+	return 0;
+}
+
+static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+	int again = 1;
+	unsigned long flags;
+
+	do {
+		/* Ignore rc - if it failed, tcp retries will cause it to try again */
+		forward_fpdus(nesvnic, nesqp);
+
+		spin_lock_irqsave(&nesqp->pau_lock, flags);
+		if (nesqp->pau_pending) {
+			nesqp->pau_pending = 0;
+		} else {
+			nesqp->pau_busy = 0;
+			again = 0;
+		}
+
+		spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+	} while (again);
+}
+
+/**
+ * queue_fpdus - Handle fpdu's that hw passed up to sw
+ */
+static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+	struct sk_buff *tmpskb;
+	struct nes_rskb_cb *cb;
+	struct iphdr *iph;
+	struct tcphdr *tcph;
+	unsigned char *tcph_end;
+	u32 rcv_nxt;
+	u32 rcv_wnd;
+	u32 seqnum;
+	u32 len;
+	bool process_it = false;
+	unsigned long flags;
+
+	/* Move data ptr to after tcp header */
+	iph = (struct iphdr *)skb->data;
+	tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
+	seqnum = be32_to_cpu(tcph->seq);
+	tcph_end = (((char *)tcph) + (4 * tcph->doff));
+
+	len = be16_to_cpu(iph->tot_len);
+	if (skb->len > len)
+		skb_trim(skb, len);
+	skb_pull(skb, tcph_end - skb->data);
+
+	/* Initialize tracking values */
+	cb = (struct nes_rskb_cb *)&skb->cb[0];
+	cb->seqnum = seqnum;
+
+	/* Make sure data is in the receive window */
+	rcv_nxt = nesqp->pau_rcv_nxt;
+	rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd);
+	if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) {
+		nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE);
+		nes_rem_ref_cm_node(nesqp->cm_node);
+		return;
+	}
+
+	spin_lock_irqsave(&nesqp->pau_lock, flags);
+
+	if (nesqp->pau_busy)
+		nesqp->pau_pending = 1;
+	else
+		nesqp->pau_busy = 1;
+
+	/* Queue skb by sequence number */
+	if (skb_queue_len(&nesqp->pau_list) == 0) {
+		skb_queue_head(&nesqp->pau_list, skb);
+	} else {
+		tmpskb = nesqp->pau_list.next;
+		while (tmpskb != (struct sk_buff *)&nesqp->pau_list) {
+			cb = (struct nes_rskb_cb *)&tmpskb->cb[0];
+			if (before(seqnum, cb->seqnum))
+				break;
+			tmpskb = tmpskb->next;
+		}
+		skb_insert(tmpskb, skb, &nesqp->pau_list);
+	}
+	if (nesqp->pau_state == PAU_READY)
+		process_it = true;
+	spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+
+	if (process_it)
+		process_fpdus(nesvnic, nesqp);
+
+	return;
+}
+
+/**
+ * mgt_thread - Handle mgt skbs in a safe context
+ */
+static int mgt_thread(void *context)
+{
+	struct nes_vnic *nesvnic = context;
+	struct sk_buff *skb;
+	struct nes_rskb_cb *cb;
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(nesvnic->mgt_wait_queue,
+					 skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop());
+		while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) {
+			skb = skb_dequeue(&nesvnic->mgt_skb_list);
+			cb = (struct nes_rskb_cb *)&skb->cb[0];
+			cb->data_start = skb->data - ETH_HLEN;
+			cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start,
+						     nesvnic->max_frame_size, PCI_DMA_TODEVICE);
+			queue_fpdus(skb, nesvnic, cb->nesqp);
+		}
+	}
+
+	/* Closing down so delete any entries on the queue */
+	while (skb_queue_len(&nesvnic->mgt_skb_list)) {
+		skb = skb_dequeue(&nesvnic->mgt_skb_list);
+		cb = (struct nes_rskb_cb *)&skb->cb[0];
+		nes_rem_ref_cm_node(cb->nesqp->cm_node);
+		dev_kfree_skb_any(skb);
+	}
+	return 0;
+}
+
+/**
+ * nes_queue_skbs - Queue skb so it can be handled in a thread context
+ */
+void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+	struct nes_rskb_cb *cb;
+
+	cb = (struct nes_rskb_cb *)&skb->cb[0];
+	cb->nesqp = nesqp;
+	skb_queue_tail(&nesvnic->mgt_skb_list, skb);
+	wake_up_interruptible(&nesvnic->mgt_wait_queue);
+}
+
+void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp)
+{
+	struct sk_buff *skb;
+	unsigned long flags;
+	atomic_inc(&pau_qps_destroyed);
+
+	/* Free packets that have not yet been forwarded */
+	/* Lock is acquired by skb_dequeue when removing the skb */
+	spin_lock_irqsave(&nesqp->pau_lock, flags);
+	while (skb_queue_len(&nesqp->pau_list)) {
+		skb = skb_dequeue(&nesqp->pau_list);
+		nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
+		nes_rem_ref_cm_node(nesqp->cm_node);
+	}
+	spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+}
+
+static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
+{
+	struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer;
+	struct nes_cqp_request *new_request;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_adapter *nesadapter;
+	struct nes_qp *nesqp;
+	struct nes_v4_quad nes_quad;
+	u32 crc_value;
+	u64 u64temp;
+
+	nesadapter = nesdev->nesadapter;
+	nesqp = qh_chg->nesqp;
+
+	/* Should we handle the bad completion */
+	if (cqp_request->major_code) {
+		printk(KERN_ERR PFX "Invalid cqp_request major_code=0x%x\n",
+		       cqp_request->major_code);
+		WARN_ON(1);
+	}
+
+	switch (nesqp->pau_state) {
+	case PAU_DEL_QH:
+		/* Old hash code deleted, now set the new one */
+		nesqp->pau_state = PAU_ADD_LB_QH;
+		new_request = nes_get_cqp_request(nesdev);
+		if (new_request == NULL) {
+			nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n");
+			WARN_ON(1);
+			return;
+		}
+
+		memset(&nes_quad, 0, sizeof(nes_quad));
+		nes_quad.DstIpAdrIndex =
+			cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
+		nes_quad.SrcIpadr = cpu_to_be32(0x7f000001);
+		nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]);
+		nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]);
+
+		/* Produce hash key */
+		crc_value = get_crc_value(&nes_quad);
+		nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
+		nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n",
+			  nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
+
+		nesqp->hte_index &= nesadapter->hte_index_mask;
+		nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
+		nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001);
+		nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt);
+
+		cqp_wqe = &new_request->cqp_wqe;
+		nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+		set_wqe_32bit_value(cqp_wqe->wqe_words,
+				    NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH |
+				    NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+		u64temp = (u64)nesqp->nesqp_context_pbase;
+		set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+		nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n");
+
+		new_request->cqp_callback_pointer = qh_chg;
+		new_request->callback = 1;
+		new_request->cqp_callback = nes_chg_qh_handler;
+		atomic_set(&new_request->refcount, 1);
+		nes_post_cqp_request(nesdev, new_request);
+		break;
+
+	case PAU_ADD_LB_QH:
+		/* Start processing the queued fpdu's */
+		nesqp->pau_state = PAU_READY;
+		process_fpdus(qh_chg->nesvnic, qh_chg->nesqp);
+		kfree(qh_chg);
+		break;
+	}
+}
+
+/**
+ * nes_change_quad_hash
+ */
+static int nes_change_quad_hash(struct nes_device *nesdev,
+				struct nes_vnic *nesvnic, struct nes_qp *nesqp)
+{
+	struct nes_cqp_request *cqp_request = NULL;
+	struct pau_qh_chg *qh_chg = NULL;
+	u64 u64temp;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	int ret = 0;
+
+	cqp_request = nes_get_cqp_request(nesdev);
+	if (cqp_request == NULL) {
+		nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+		ret = -ENOMEM;
+		goto chg_qh_err;
+	}
+
+	qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC);
+	if (qh_chg == NULL) {
+		nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+		ret = -ENOMEM;
+		goto chg_qh_err;
+	}
+	qh_chg->nesdev = nesdev;
+	qh_chg->nesvnic = nesvnic;
+	qh_chg->nesqp = nesqp;
+	nesqp->pau_state = PAU_DEL_QH;
+
+	cqp_wqe = &cqp_request->cqp_wqe;
+	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+	set_wqe_32bit_value(cqp_wqe->wqe_words,
+			    NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE |
+			    NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
+	u64temp = (u64)nesqp->nesqp_context_pbase;
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+	nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n");
+
+	cqp_request->cqp_callback_pointer = qh_chg;
+	cqp_request->callback = 1;
+	cqp_request->cqp_callback = nes_chg_qh_handler;
+	atomic_set(&cqp_request->refcount, 1);
+	nes_post_cqp_request(nesdev, cqp_request);
+
+	return ret;
+
+chg_qh_err:
+	kfree(qh_chg);
+	if (cqp_request)
+		nes_put_cqp_request(nesdev, cqp_request);
+	return ret;
+}
+
+/**
+ * nes_mgt_ce_handler
+ * This management code deals with any packed and unaligned (pau) fpdu's
+ * that the hardware cannot handle.
+ */
+static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
+{
+	struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq);
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 head;
+	u32 cq_size;
+	u32 cqe_count = 0;
+	u32 cqe_misc;
+	u32 qp_id = 0;
+	u32 skbs_needed;
+	unsigned long context;
+	struct nes_qp *nesqp;
+	struct sk_buff *rx_skb;
+	struct nes_rskb_cb *cb;
+
+	head = cq->cq_head;
+	cq_size = cq->cq_size;
+
+	while (1) {
+		cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
+		if (!(cqe_misc & NES_NIC_CQE_VALID))
+			break;
+
+		nesqp = NULL;
+		if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) {
+			qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]);
+			qp_id &= 0x001fffff;
+			if (qp_id < nesadapter->max_qp) {
+				context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN];
+				nesqp = (struct nes_qp *)context;
+			}
+		}
+
+		if (nesqp) {
+			if (nesqp->pau_mode == false) {
+				nesqp->pau_mode = true; /* First time for this qp */
+				nesqp->pau_rcv_nxt = le32_to_cpu(
+					cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]);
+				skb_queue_head_init(&nesqp->pau_list);
+				spin_lock_init(&nesqp->pau_lock);
+				atomic_inc(&pau_qps_created);
+				nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp);
+			}
+
+			rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
+			rx_skb->len = 0;
+			skb_put(rx_skb, cqe_misc & 0x0000ffff);
+			rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev);
+			cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
+			pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE);
+			cb->busaddr = 0;
+			mgtvnic->mgt.rq_tail++;
+			mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1;
+
+			nes_add_ref_cm_node(nesqp->cm_node);
+			nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp);
+		} else {
+			printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id);
+		}
+
+		cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
+		cqe_count++;
+		if (++head >= cq_size)
+			head = 0;
+
+		if (cqe_count == 255) {
+			/* Replenish mgt CQ */
+			nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16));
+			nesdev->currcq_count += cqe_count;
+			cqe_count = 0;
+		}
+
+		skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed);
+		if (skbs_needed > (mgtvnic->mgt.rq_size >> 1))
+			nes_replenish_mgt_rq(mgtvnic);
+	}
+
+	cq->cq_head = head;
+	nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+		    cq->cq_number | (cqe_count << 16));
+	nes_read32(nesdev->regs + NES_CQE_ALLOC);
+	nesdev->currcq_count += cqe_count;
+}
+
+/**
+ * nes_init_mgt_qp
+ */
+int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic)
+{
+	struct nes_vnic_mgt *mgtvnic;
+	u32 counter;
+	void *vmem;
+	dma_addr_t pmem;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	u32 cqp_head;
+	unsigned long flags;
+	struct nes_hw_nic_qp_context *mgt_context;
+	u64 u64temp;
+	struct nes_hw_nic_rq_wqe *mgt_rqe;
+	struct sk_buff *skb;
+	u32 wqe_count;
+	struct nes_rskb_cb *cb;
+	u32 mgt_mem_size;
+	void *mgt_vbase;
+	dma_addr_t mgt_pbase;
+	int i;
+	int ret;
+
+	/* Allocate space the all mgt QPs once */
+	mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL);
+	if (mgtvnic == NULL) {
+		nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n");
+		return -ENOMEM;
+	}
+
+	/* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */
+	/* We are not sending from this NIC so sq is not allocated */
+	mgt_mem_size = 256 +
+		       (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) +
+		       (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) +
+		       sizeof(struct nes_hw_nic_qp_context);
+	mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+	mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase);
+	if (!mgt_vbase) {
+		kfree(mgtvnic);
+		nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n");
+		return -ENOMEM;
+	}
+
+	nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size;
+	nesvnic->mgt_vbase = mgt_vbase;
+	nesvnic->mgt_pbase = mgt_pbase;
+
+	skb_queue_head_init(&nesvnic->mgt_skb_list);
+	init_waitqueue_head(&nesvnic->mgt_wait_queue);
+	nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread");
+
+	for (i = 0; i < NES_MGT_QP_COUNT; i++) {
+		mgtvnic->nesvnic = nesvnic;
+		mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i;
+		memset(mgt_vbase, 0, mgt_mem_size);
+		nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n",
+			  mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size);
+
+		vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) &
+				~(unsigned long)(256 - 1));
+		pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) &
+				    ~(unsigned long long)(256 - 1));
+
+		spin_lock_init(&mgtvnic->mgt.rq_lock);
+
+		/* setup the RQ */
+		mgtvnic->mgt.rq_vbase = vmem;
+		mgtvnic->mgt.rq_pbase = pmem;
+		mgtvnic->mgt.rq_head = 0;
+		mgtvnic->mgt.rq_tail = 0;
+		mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT;
+
+		/* setup the CQ */
+		vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
+		pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
+
+		mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id;
+		mgtvnic->mgt_cq.cq_vbase = vmem;
+		mgtvnic->mgt_cq.cq_pbase = pmem;
+		mgtvnic->mgt_cq.cq_head = 0;
+		mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT;
+
+		mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler;
+
+		/* Send CreateCQ request to CQP */
+		spin_lock_irqsave(&nesdev->cqp.lock, flags);
+		cqp_head = nesdev->cqp.sq_head;
+
+		cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+		nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+		cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
+			NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
+			((u32)mgtvnic->mgt_cq.cq_size << 16));
+		cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
+			mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16));
+		u64temp = (u64)mgtvnic->mgt_cq.cq_pbase;
+		set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
+		cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
+		u64temp = (unsigned long)&mgtvnic->mgt_cq;
+		cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1));
+		cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
+			cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
+		cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
+
+		if (++cqp_head >= nesdev->cqp.sq_size)
+			cqp_head = 0;
+		cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+		nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+		/* Send CreateQP request to CQP */
+		mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]);
+		mgt_context->context_words[NES_NIC_CTX_MISC_IDX] =
+			cpu_to_le32((u32)NES_MGT_CTX_SIZE |
+				    ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
+		nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
+			  nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
+			  nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
+		if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0)
+			mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
+
+		u64temp = (u64)mgtvnic->mgt.rq_pbase;
+		mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+		mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
+		u64temp = (u64)mgtvnic->mgt.rq_pbase;
+		mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+		mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
+
+		cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
+									 NES_CQP_QP_TYPE_NIC);
+		cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id);
+		u64temp = (u64)mgtvnic->mgt_cq.cq_pbase +
+			  (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
+		set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
+
+		if (++cqp_head >= nesdev->cqp.sq_size)
+			cqp_head = 0;
+		nesdev->cqp.sq_head = cqp_head;
+
+		barrier();
+
+		/* Ring doorbell (2 WQEs) */
+		nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+
+		spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+		nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n",
+			  mgtvnic->mgt.qp_id);
+
+		ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
+					 NES_EVENT_TIMEOUT);
+		nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n",
+			  mgtvnic->mgt.qp_id, ret);
+		if (!ret) {
+			nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id);
+			if (i == 0) {
+				pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
+						    nesvnic->mgt_pbase);
+				kfree(mgtvnic);
+			} else {
+				nes_destroy_mgt(nesvnic);
+			}
+			return -EIO;
+		}
+
+		/* Populate the RQ */
+		for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) {
+			skb = dev_alloc_skb(nesvnic->max_frame_size);
+			if (!skb) {
+				nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
+				return -ENOMEM;
+			}
+
+			skb->dev = netdev;
+
+			pmem = pci_map_single(nesdev->pcidev, skb->data,
+					      nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
+			cb = (struct nes_rskb_cb *)&skb->cb[0];
+			cb->busaddr = pmem;
+			cb->maplen = nesvnic->max_frame_size;
+
+			mgt_rqe = &mgtvnic->mgt.rq_vbase[counter];
+			mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size);
+			mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
+			mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
+			mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
+			mgtvnic->mgt.rx_skb[counter] = skb;
+		}
+
+		init_timer(&mgtvnic->rq_wqes_timer);
+		mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout;
+		mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic;
+
+		wqe_count = NES_MGT_WQ_COUNT - 1;
+		mgtvnic->mgt.rq_head = wqe_count;
+		barrier();
+		do {
+			counter = min(wqe_count, ((u32)255));
+			wqe_count -= counter;
+			nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id);
+		} while (wqe_count);
+
+		nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+			    mgtvnic->mgt_cq.cq_number);
+		nes_read32(nesdev->regs + NES_CQE_ALLOC);
+
+		mgt_vbase += mgt_mem_size;
+		mgt_pbase += mgt_mem_size;
+		nesvnic->mgtvnic[i] = mgtvnic++;
+	}
+	return 0;
+}
+
+
+void nes_destroy_mgt(struct nes_vnic *nesvnic)
+{
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_vnic_mgt *mgtvnic;
+	struct nes_vnic_mgt *first_mgtvnic;
+	unsigned long flags;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	u32 cqp_head;
+	struct sk_buff *rx_skb;
+	int i;
+	int ret;
+
+	kthread_stop(nesvnic->mgt_thread);
+
+	/* Free remaining NIC receive buffers */
+	first_mgtvnic = nesvnic->mgtvnic[0];
+	for (i = 0; i < NES_MGT_QP_COUNT; i++) {
+		mgtvnic = nesvnic->mgtvnic[i];
+		if (mgtvnic == NULL)
+			continue;
+
+		while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) {
+			rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
+			nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE);
+			mgtvnic->mgt.rq_tail++;
+			mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1);
+		}
+
+		spin_lock_irqsave(&nesdev->cqp.lock, flags);
+
+		/* Destroy NIC QP */
+		cqp_head = nesdev->cqp.sq_head;
+		cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+		nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+				    (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+				    mgtvnic->mgt.qp_id);
+
+		if (++cqp_head >= nesdev->cqp.sq_size)
+			cqp_head = 0;
+
+		cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
+
+		/* Destroy NIC CQ */
+		nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
+				    (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16)));
+		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
+				    (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
+
+		if (++cqp_head >= nesdev->cqp.sq_size)
+			cqp_head = 0;
+
+		nesdev->cqp.sq_head = cqp_head;
+		barrier();
+
+		/* Ring doorbell (2 WQEs) */
+		nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
+
+		spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+		nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
+			  " cqp.sq_tail=%u, cqp.sq_size=%u\n",
+			  cqp_head, nesdev->cqp.sq_head,
+			  nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
+
+		ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
+					 NES_EVENT_TIMEOUT);
+
+		nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
+			  " cqp.sq_head=%u, cqp.sq_tail=%u\n",
+			  ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
+		if (!ret)
+			nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n",
+				  mgtvnic->mgt.qp_id);
+
+		nesvnic->mgtvnic[i] = NULL;
+	}
+
+	if (nesvnic->mgt_vbase) {
+		pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
+				    nesvnic->mgt_pbase);
+		nesvnic->mgt_vbase = NULL;
+		nesvnic->mgt_pbase = 0;
+	}
+
+	kfree(first_mgtvnic);
+}
diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h
new file mode 100644
index 0000000..8c8af25
--- /dev/null
+++ b/drivers/infiniband/hw/nes/nes_mgt.h
@@ -0,0 +1,97 @@
+/*
+* Copyright (c) 2010 Intel-NE, Inc.  All rights reserved.
+*
+* This software is available to you under a choice of one of two
+* licenses.  You may choose to be licensed under the terms of the GNU
+* General Public License (GPL) Version 2, available from the file
+* COPYING in the main directory of this source tree, or the
+* OpenIB.org BSD license below:
+*
+*     Redistribution and use in source and binary forms, with or
+*     without modification, are permitted provided that the following
+*     conditions are met:
+*
+*      - Redistributions of source code must retain the above
+*        copyright notice, this list of conditions and the following
+*        disclaimer.
+*
+*      - Redistributions in binary form must reproduce the above
+*        copyright notice, this list of conditions and the following
+*        disclaimer in the documentation and/or other materials
+*        provided with the distribution.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+
+#ifndef __NES_MGT_H
+#define __NES_MGT_H
+
+#define MPA_FRAMING 6	/* length is 2 bytes, crc is 4 bytes */
+
+int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic);
+void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp);
+void nes_destroy_mgt(struct nes_vnic *nesvnic);
+void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp);
+
+struct nes_hw_mgt {
+	struct nes_hw_nic_rq_wqe *rq_vbase;	/* virtual address of rq */
+	dma_addr_t rq_pbase;			/* PCI memory for host rings */
+	struct sk_buff *rx_skb[NES_NIC_WQ_SIZE];
+	u16 qp_id;
+	u16 sq_head;
+	u16 rq_head;
+	u16 rq_tail;
+	u16 rq_size;
+	u8 replenishing_rq;
+	u8 reserved;
+	spinlock_t rq_lock;
+};
+
+struct nes_vnic_mgt {
+	struct nes_vnic        *nesvnic;
+	struct nes_hw_mgt      mgt;
+	struct nes_hw_nic_cq   mgt_cq;
+	atomic_t               rx_skbs_needed;
+	struct timer_list      rq_wqes_timer;
+	atomic_t               rx_skb_timer_running;
+};
+
+#define MAX_FPDU_FRAGS 4
+struct pau_fpdu_frag {
+	struct sk_buff         *skb;
+	u64                    physaddr;
+	u32                    frag_len;
+	bool                   cmplt;
+};
+
+struct pau_fpdu_info {
+	struct nes_qp          *nesqp;
+	struct nes_cqp_request *cqp_request;
+	void                   *hdr_vbase;
+	dma_addr_t             hdr_pbase;
+	int                    hdr_len;
+	u16                    data_len;
+	u16                    frag_cnt;
+	struct pau_fpdu_frag   frags[MAX_FPDU_FRAGS];
+};
+
+enum pau_qh_state {
+	PAU_DEL_QH,
+	PAU_ADD_LB_QH,
+	PAU_READY
+};
+
+struct pau_qh_chg {
+	struct nes_device *nesdev;
+	struct nes_vnic *nesvnic;
+	struct nes_qp *nesqp;
+};
+
+#endif          /* __NES_MGT_H */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 9d7ffeb..64f91d8 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1090,6 +1090,8 @@
 	"LRO aggregated",
 	"LRO flushed",
 	"LRO no_desc",
+	"PAU CreateQPs",
+	"PAU DestroyQPs",
 };
 #define NES_ETHTOOL_STAT_COUNT  ARRAY_SIZE(nes_ethtool_stringset)
 
@@ -1305,6 +1307,8 @@
 	target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated;
 	target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed;
 	target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc;
+	target_stat_values[++index] = atomic_read(&pau_qps_created);
+	target_stat_values[++index] = atomic_read(&pau_qps_destroyed);
 }
 
 /**
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index f9c417c..cd10968 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -51,13 +51,34 @@
 
 #include "nes.h"
 
-
-
 static u16 nes_read16_eeprom(void __iomem *addr, u16 offset);
 
 u32 mh_detected;
 u32 mh_pauses_sent;
 
+u32 nes_set_pau(struct nes_device *nesdev)
+{
+	u32 ret = 0;
+	u32 counter;
+
+	nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU);
+	nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
+
+	for (counter = 0; counter < NES_PAU_COUNTER; counter++) {
+		udelay(30);
+		if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) {
+			printk(KERN_INFO PFX "PAU is supported.\n");
+			break;
+		}
+		nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
+	}
+	if (counter == NES_PAU_COUNTER) {
+		printk(KERN_INFO PFX "PAU is not supported.\n");
+		return -EPERM;
+	}
+	return ret;
+}
+
 /**
  * nes_read_eeprom_values -
  */
@@ -187,6 +208,11 @@
 		if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
 			nesadapter->send_term_ok = 1;
 
+		if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) {
+			if (!nes_set_pau(nesdev))
+				nesadapter->allow_unaligned_fpdus = 1;
+		}
+
 		nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8))  <<  16) +
 				(u32)((u8)eeprom_data);
 
@@ -594,6 +620,7 @@
 		nes_free_cqp_request(nesdev, cqp_request);
 }
 
+
 /**
  * nes_post_cqp_request
  */
@@ -604,6 +631,8 @@
 	unsigned long flags;
 	u32 cqp_head;
 	u64 u64temp;
+	u32 opcode;
+	int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
 
 	spin_lock_irqsave(&nesdev->cqp.lock, flags);
 
@@ -614,17 +643,20 @@
 		nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
 		cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
 		memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
+		opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
+		if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
+			ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
 		barrier();
 		u64temp = (unsigned long)cqp_request;
-		set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_SCRATCH_LOW_IDX,
-				    u64temp);
+		set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp);
 		nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ,"
-				" request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
-				" waiting = %d, refcount = %d.\n",
-				le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
-				le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
-				nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
-				cqp_request->waiting, atomic_read(&cqp_request->refcount));
+			" request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
+			" waiting = %d, refcount = %d.\n",
+			opcode & NES_CQP_OPCODE_MASK,
+			le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
+			nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
+			cqp_request->waiting, atomic_read(&cqp_request->refcount));
+
 		barrier();
 
 		/* Ring doorbell (1 WQEs) */
@@ -645,7 +677,6 @@
 	return;
 }
 
-
 /**
  * nes_arp_table
  */
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 9f2f7d4b..5095bc4 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1458,7 +1458,7 @@
 	struct ib_qp_attr attr;
 	struct iw_cm_id *cm_id;
 	struct iw_cm_event cm_event;
-	int ret;
+	int ret = 0;
 
 	atomic_inc(&sw_qps_destroyed);
 	nesqp->destroyed = 1;
@@ -1511,7 +1511,6 @@
 		if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
 			nes_clean_cq(nesqp, nesqp->nesrcq);
 	}
-
 	nes_rem_ref(&nesqp->ibqp);
 	return 0;
 }
@@ -2338,8 +2337,10 @@
 
 	skip_pages = ((u32)region->offset) >> 12;
 
-	if (ib_copy_from_udata(&req, udata, sizeof(req)))
+	if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+		ib_umem_release(region);
 		return ERR_PTR(-EFAULT);
+	}
 	nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type);
 
 	switch (req.reg_type) {
@@ -2631,6 +2632,7 @@
 			return &nesmr->ibmr;
 	}
 
+	ib_umem_release(region);
 	return ERR_PTR(-ENOSYS);
 }
 
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 2df9993e..fe6b6e9 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -139,7 +139,8 @@
 	struct nes_cq         *nesrcq;
 	struct nes_pd         *nespd;
 	void *cm_node; /* handle of the node this QP is associated with */
-	struct ietf_mpa_frame *ietf_frame;
+	void                  *ietf_frame;
+	u8                    ietf_frame_size;
 	dma_addr_t            ietf_frame_pbase;
 	struct ib_mr          *lsmm_mr;
 	struct nes_hw_qp      hwqp;
@@ -154,6 +155,7 @@
 	u32                   mmap_sq_db_index;
 	u32                   mmap_rq_db_index;
 	spinlock_t            lock;
+	spinlock_t            pau_lock;
 	struct nes_qp_context *nesqp_context;
 	dma_addr_t            nesqp_context_pbase;
 	void	              *pbl_vbase;
@@ -161,6 +163,8 @@
 	struct page           *page;
 	struct timer_list     terminate_timer;
 	enum ib_event_type    terminate_eventtype;
+	struct sk_buff_head   pau_list;
+	u32                   pau_rcv_nxt;
 	u16                   active_conn:1;
 	u16                   skip_lsmm:1;
 	u16                   user_mode:1;
@@ -168,7 +172,8 @@
 	u16                   flush_issued:1;
 	u16                   destroyed:1;
 	u16                   sig_all:1;
-	u16                   rsvd:9;
+	u16                   pau_mode:1;
+	u16                   rsvd:8;
 	u16                   private_data_len;
 	u16                   term_sq_flush_code;
 	u16                   term_rq_flush_code;
@@ -176,5 +181,8 @@
 	u8                    hw_tcp_state;
 	u8                    term_flags;
 	u8                    sq_kmapped;
+	u8                    pau_busy;
+	u8                    pau_pending;
+	u8                    pau_state;
 };
 #endif			/* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
index c3ec8ef..d623593 100644
--- a/drivers/infiniband/hw/qib/qib_srq.c
+++ b/drivers/infiniband/hw/qib/qib_srq.c
@@ -107,6 +107,11 @@
 	u32 sz;
 	struct ib_srq *ret;
 
+	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
+		ret = ERR_PTR(-ENOSYS);
+		goto done;
+	}
+
 	if (srq_init_attr->attr.max_sge == 0 ||
 	    srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
 	    srq_init_attr->attr.max_wr == 0 ||
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 39913a0..fe48677 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -84,7 +84,7 @@
 	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
 
 	for (i = 0; i < frags; ++i)
-		ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
+		ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
 }
 
 static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
@@ -183,7 +183,7 @@
 	ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
 
 	for (; i > 0; --i)
-		ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
+		ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
 
 	dev_kfree_skb_any(skb);
 	return NULL;
@@ -1496,6 +1496,7 @@
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_srq_init_attr srq_init_attr = {
+		.srq_type = IB_SRQT_BASIC,
 		.attr = {
 			.max_wr  = ipoib_recvq_size,
 			.max_sge = max_sge
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 86eae22..0e2fe463 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -212,16 +212,15 @@
 		   gid_buf, path.pathrec.dlid ? "yes" : "no");
 
 	if (path.pathrec.dlid) {
-		rate = ib_rate_to_mult(path.pathrec.rate) * 25;
+		rate = ib_rate_to_mbps(path.pathrec.rate);
 
 		seq_printf(file,
 			   "  DLID:     0x%04x\n"
 			   "  SL: %12d\n"
-			   "  rate: %*d%s Gb/sec\n",
+			   "  rate: %8d.%d Gb/sec\n",
 			   be16_to_cpu(path.pathrec.dlid),
 			   path.pathrec.sl,
-			   10 - ((rate % 10) ? 2 : 0),
-			   rate / 10, rate % 10 ? ".5" : "");
+			   rate / 1000, rate % 1000);
 	}
 
 	seq_putc(file, '\n');
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 1ad1f60..869a2c2 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -484,7 +484,7 @@
 
 	mlx4_mtt_cleanup(dev, &eq->mtt);
 	for (i = 0; i < npages; ++i)
-		pci_free_consistent(dev->pdev, PAGE_SIZE,
+		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
 				    eq->page_list[i].buf,
 				    eq->page_list[i].map);
 
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 7eb8ba8..875838b 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -204,6 +204,8 @@
 #define QUERY_DEV_CAP_MAX_MCG_OFFSET		0x63
 #define QUERY_DEV_CAP_RSVD_PD_OFFSET		0x64
 #define QUERY_DEV_CAP_MAX_PD_OFFSET		0x65
+#define QUERY_DEV_CAP_RSVD_XRC_OFFSET		0x66
+#define QUERY_DEV_CAP_MAX_XRC_OFFSET		0x67
 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET	0x68
 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET	0x80
 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET	0x82
@@ -318,6 +320,10 @@
 	dev_cap->reserved_pds = field >> 4;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
 	dev_cap->max_pds = 1 << (field & 0x3f);
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET);
+	dev_cap->reserved_xrcds = field >> 4;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
+	dev_cap->max_xrcds = 1 << (field & 0x1f);
 
 	MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET);
 	dev_cap->rdmarc_entry_sz = size;
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index 1e8ecc3..bf5ec22 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -93,6 +93,8 @@
 	int max_mcgs;
 	int reserved_pds;
 	int max_pds;
+	int reserved_xrcds;
+	int max_xrcds;
 	int qpc_entry_sz;
 	int rdmarc_entry_sz;
 	int altc_entry_sz;
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index f0ee35d..94bbc85 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -96,6 +96,8 @@
 static int log_num_vlan;
 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
+/* Log2 max number of VLANs per ETH port (0-7) */
+#define MLX4_LOG_NUM_VLANS 7
 
 static int use_prio;
 module_param_named(use_prio, use_prio, bool, 0444);
@@ -220,6 +222,10 @@
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
+	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+					dev_cap->reserved_xrcds : 0;
+	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
+					dev_cap->max_xrcds : 0;
 	dev->caps.mtt_entry_sz	     = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
@@ -230,7 +236,7 @@
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
 	dev->caps.log_num_macs  = log_num_mac;
-	dev->caps.log_num_vlans = log_num_vlan;
+	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
 	dev->caps.log_num_prios = use_prio ? 3 : 0;
 
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
@@ -912,11 +918,18 @@
 		goto err_kar_unmap;
 	}
 
+	err = mlx4_init_xrcd_table(dev);
+	if (err) {
+		mlx4_err(dev, "Failed to initialize "
+			 "reliable connection domain table, aborting.\n");
+		goto err_pd_table_free;
+	}
+
 	err = mlx4_init_mr_table(dev);
 	if (err) {
 		mlx4_err(dev, "Failed to initialize "
 			 "memory region table, aborting.\n");
-		goto err_pd_table_free;
+		goto err_xrcd_table_free;
 	}
 
 	err = mlx4_init_eq_table(dev);
@@ -998,6 +1011,13 @@
 				  "ib capabilities (%d). Continuing with "
 				  "caps = 0\n", port, err);
 		dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
+
+		err = mlx4_check_ext_port_caps(dev, port);
+		if (err)
+			mlx4_warn(dev, "failed to get port %d extended "
+				  "port capabilities support info (%d)."
+				  " Assuming not supported\n", port, err);
+
 		err = mlx4_SET_PORT(dev, port);
 		if (err) {
 			mlx4_err(dev, "Failed to set port %d, aborting\n",
@@ -1033,6 +1053,9 @@
 err_mr_table_free:
 	mlx4_cleanup_mr_table(dev);
 
+err_xrcd_table_free:
+	mlx4_cleanup_xrcd_table(dev);
+
 err_pd_table_free:
 	mlx4_cleanup_pd_table(dev);
 
@@ -1355,6 +1378,7 @@
 	mlx4_cmd_use_polling(dev);
 	mlx4_cleanup_eq_table(dev);
 	mlx4_cleanup_mr_table(dev);
+	mlx4_cleanup_xrcd_table(dev);
 	mlx4_cleanup_pd_table(dev);
 	mlx4_cleanup_uar_table(dev);
 
@@ -1416,6 +1440,7 @@
 		mlx4_cmd_use_polling(dev);
 		mlx4_cleanup_eq_table(dev);
 		mlx4_cleanup_mr_table(dev);
+		mlx4_cleanup_xrcd_table(dev);
 		mlx4_cleanup_pd_table(dev);
 
 		iounmap(priv->kar);
@@ -1489,10 +1514,9 @@
 		return -1;
 	}
 
-	if ((log_num_vlan < 0) || (log_num_vlan > 7)) {
-		pr_warning("mlx4_core: bad num_vlan: %d\n", log_num_vlan);
-		return -1;
-	}
+	if (log_num_vlan != 0)
+		pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
+			   MLX4_LOG_NUM_VLANS);
 
 	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
 		pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index a2fcd84..5dfa68f 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -335,6 +335,7 @@
 	struct mlx4_cmd		cmd;
 
 	struct mlx4_bitmap	pd_bitmap;
+	struct mlx4_bitmap	xrcd_bitmap;
 	struct mlx4_uar_table	uar_table;
 	struct mlx4_mr_table	mr_table;
 	struct mlx4_cq_table	cq_table;
@@ -384,6 +385,7 @@
 void mlx4_free_eq_table(struct mlx4_dev *dev);
 
 int mlx4_init_pd_table(struct mlx4_dev *dev);
+int mlx4_init_xrcd_table(struct mlx4_dev *dev);
 int mlx4_init_uar_table(struct mlx4_dev *dev);
 int mlx4_init_mr_table(struct mlx4_dev *dev);
 int mlx4_init_eq_table(struct mlx4_dev *dev);
@@ -393,6 +395,7 @@
 int mlx4_init_mcg_table(struct mlx4_dev *dev);
 
 void mlx4_cleanup_pd_table(struct mlx4_dev *dev);
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev);
 void mlx4_cleanup_uar_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mr_table(struct mlx4_dev *dev);
 void mlx4_cleanup_eq_table(struct mlx4_dev *dev);
@@ -450,6 +453,7 @@
 
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
 int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
+int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port);
 
 int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  enum mlx4_protocol prot, enum mlx4_steer_type steer);
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 9c188bd..ab639cf 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -139,7 +139,7 @@
 
 	buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
 			      GFP_KERNEL);
-	buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
+	buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
 				  GFP_KERNEL);
 	if (!buddy->bits || !buddy->num_free)
 		goto err_out;
diff --git a/drivers/net/mlx4/pd.c b/drivers/net/mlx4/pd.c
index 1286b88..3736163 100644
--- a/drivers/net/mlx4/pd.c
+++ b/drivers/net/mlx4/pd.c
@@ -61,6 +61,24 @@
 }
 EXPORT_SYMBOL_GPL(mlx4_pd_free);
 
+int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	*xrcdn = mlx4_bitmap_alloc(&priv->xrcd_bitmap);
+	if (*xrcdn == -1)
+		return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_alloc);
+
+void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn)
+{
+	mlx4_bitmap_free(&mlx4_priv(dev)->xrcd_bitmap, xrcdn);
+}
+EXPORT_SYMBOL_GPL(mlx4_xrcd_free);
+
 int mlx4_init_pd_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -74,6 +92,18 @@
 	mlx4_bitmap_cleanup(&mlx4_priv(dev)->pd_bitmap);
 }
 
+int mlx4_init_xrcd_table(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	return mlx4_bitmap_init(&priv->xrcd_bitmap, (1 << 16),
+				(1 << 16) - 1, dev->caps.reserved_xrcds + 1, 0);
+}
+
+void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev)
+{
+	mlx4_bitmap_cleanup(&mlx4_priv(dev)->xrcd_bitmap);
+}
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
 {
diff --git a/drivers/net/mlx4/port.c b/drivers/net/mlx4/port.c
index 609e0ec..881592e 100644
--- a/drivers/net/mlx4/port.c
+++ b/drivers/net/mlx4/port.c
@@ -148,22 +148,26 @@
 
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
 		err = mlx4_uc_steer_add(dev, port, mac, qpn, 1);
-		if (!err) {
-			entry = kmalloc(sizeof *entry, GFP_KERNEL);
-			if (!entry) {
-				mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
-				return -ENOMEM;
-			}
-			entry->mac = mac;
-			err = radix_tree_insert(&info->mac_tree, *qpn, entry);
-			if (err) {
-				mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
-				return err;
-			}
-		} else
+		if (err)
 			return err;
+
+		entry = kmalloc(sizeof *entry, GFP_KERNEL);
+		if (!entry) {
+			mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
+			return -ENOMEM;
+		}
+
+		entry->mac = mac;
+		err = radix_tree_insert(&info->mac_tree, *qpn, entry);
+		if (err) {
+			kfree(entry);
+			mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
+			return err;
+		}
 	}
+
 	mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
+
 	mutex_lock(&table->mutex);
 	for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
 		if (free < 0 && !table->refs[i]) {
@@ -464,6 +468,48 @@
 	return err;
 }
 
+int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port)
+{
+	struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
+	u8 *inbuf, *outbuf;
+	int err, packet_error;
+
+	inmailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(inmailbox))
+		return PTR_ERR(inmailbox);
+
+	outmailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(outmailbox)) {
+		mlx4_free_cmd_mailbox(dev, inmailbox);
+		return PTR_ERR(outmailbox);
+	}
+
+	inbuf = inmailbox->buf;
+	outbuf = outmailbox->buf;
+	memset(inbuf, 0, 256);
+	memset(outbuf, 0, 256);
+	inbuf[0] = 1;
+	inbuf[1] = 1;
+	inbuf[2] = 1;
+	inbuf[3] = 1;
+
+	*(__be16 *) (&inbuf[16]) = MLX4_ATTR_EXTENDED_PORT_INFO;
+	*(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
+
+	err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+
+	packet_error = be16_to_cpu(*(__be16 *) (outbuf + 4));
+
+	dev->caps.ext_port_cap[port] = (!err && !packet_error) ?
+				       MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO
+				       : 0;
+
+	mlx4_free_cmd_mailbox(dev, inmailbox);
+	mlx4_free_cmd_mailbox(dev, outmailbox);
+	return err;
+}
+
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
 {
 	struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/mlx4/qp.c b/drivers/net/mlx4/qp.c
index ec9350e..51c5389 100644
--- a/drivers/net/mlx4/qp.c
+++ b/drivers/net/mlx4/qp.c
@@ -280,6 +280,9 @@
 	 * We reserve 2 extra QPs per port for the special QPs.  The
 	 * block of special QPs must be aligned to a multiple of 8, so
 	 * round up.
+	 *
+	 * We also reserve the MSB of the 24-bit QP number to indicate
+	 * that a QP is an XRC QP.
 	 */
 	dev->caps.sqp_start =
 		ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
diff --git a/drivers/net/mlx4/srq.c b/drivers/net/mlx4/srq.c
index 3b07b80..a20b141 100644
--- a/drivers/net/mlx4/srq.c
+++ b/drivers/net/mlx4/srq.c
@@ -40,20 +40,20 @@
 struct mlx4_srq_context {
 	__be32			state_logsize_srqn;
 	u8			logstride;
-	u8			reserved1[3];
-	u8			pg_offset;
-	u8			reserved2[3];
-	u32			reserved3;
+	u8			reserved1;
+	__be16			xrcd;
+	__be32			pg_offset_cqn;
+	u32			reserved2;
 	u8			log_page_size;
-	u8			reserved4[2];
+	u8			reserved3[2];
 	u8			mtt_base_addr_h;
 	__be32			mtt_base_addr_l;
 	__be32			pd;
 	__be16			limit_watermark;
 	__be16			wqe_cnt;
-	u16			reserved5;
+	u16			reserved4;
 	__be16			wqe_counter;
-	u32			reserved6;
+	u32			reserved5;
 	__be64			db_rec_addr;
 };
 
@@ -109,8 +109,8 @@
 			    MLX4_CMD_TIME_CLASS_A);
 }
 
-int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
-		   u64 db_rec, struct mlx4_srq *srq)
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
+		   struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq)
 {
 	struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
 	struct mlx4_cmd_mailbox *mailbox;
@@ -148,6 +148,8 @@
 	srq_context->state_logsize_srqn = cpu_to_be32((ilog2(srq->max) << 24) |
 						      srq->srqn);
 	srq_context->logstride          = srq->wqe_shift - 4;
+	srq_context->xrcd		= cpu_to_be16(xrcd);
+	srq_context->pg_offset_cqn	= cpu_to_be32(cqn & 0xffffff);
 	srq_context->log_page_size      = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
 
 	mtt_addr = mlx4_mtt_addr(dev, mtt);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 53ef894..ff3ccd5 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -61,6 +61,7 @@
 	MLX4_DEV_CAP_FLAG_RC		= 1LL <<  0,
 	MLX4_DEV_CAP_FLAG_UC		= 1LL <<  1,
 	MLX4_DEV_CAP_FLAG_UD		= 1LL <<  2,
+	MLX4_DEV_CAP_FLAG_XRC		= 1LL <<  3,
 	MLX4_DEV_CAP_FLAG_SRQ		= 1LL <<  6,
 	MLX4_DEV_CAP_FLAG_IPOIB_CSUM	= 1LL <<  7,
 	MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1LL <<  8,
@@ -82,6 +83,12 @@
 	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48
 };
 
+#define MLX4_ATTR_EXTENDED_PORT_INFO	cpu_to_be16(0xff90)
+
+enum {
+	MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO	= 1 <<  0
+};
+
 enum {
 	MLX4_BMME_FLAG_LOCAL_INV	= 1 <<  6,
 	MLX4_BMME_FLAG_REMOTE_INV	= 1 <<  7,
@@ -256,6 +263,8 @@
 	int			num_qp_per_mgm;
 	int			num_pds;
 	int			reserved_pds;
+	int			max_xrcds;
+	int			reserved_xrcds;
 	int			mtt_entry_sz;
 	u32			max_msg_sz;
 	u32			page_size_cap;
@@ -276,6 +285,7 @@
 	u32			port_mask;
 	enum mlx4_port_type	possible_type[MLX4_MAX_PORTS + 1];
 	u32			max_counters;
+	u8			ext_port_cap[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_buf_list {
@@ -499,6 +509,8 @@
 
 int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn);
 void mlx4_pd_free(struct mlx4_dev *dev, u32 pdn);
+int mlx4_xrcd_alloc(struct mlx4_dev *dev, u32 *xrcdn);
+void mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
 void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
@@ -538,8 +550,8 @@
 int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
-int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
-		   u64 db_rec, struct mlx4_srq *srq);
+int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
+		   struct mlx4_mtt *mtt, u64 db_rec, struct mlx4_srq *srq);
 void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
 int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
 int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 4001c82..48cc4cb 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -75,6 +75,7 @@
 	MLX4_QP_ST_UC				= 0x1,
 	MLX4_QP_ST_RD				= 0x2,
 	MLX4_QP_ST_UD				= 0x3,
+	MLX4_QP_ST_XRC				= 0x6,
 	MLX4_QP_ST_MLX				= 0x7
 };
 
@@ -137,7 +138,7 @@
 	__be32			ssn;
 	__be32			params2;
 	__be32			rnr_nextrecvpsn;
-	__be32			srcd;
+	__be32			xrcd;
 	__be32			cqn_recv;
 	__be64			db_rec_addr;
 	__be32			qkey;
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index fe5b051..81aba3a 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -81,7 +81,11 @@
 	IB_USER_VERBS_CMD_MODIFY_SRQ,
 	IB_USER_VERBS_CMD_QUERY_SRQ,
 	IB_USER_VERBS_CMD_DESTROY_SRQ,
-	IB_USER_VERBS_CMD_POST_SRQ_RECV
+	IB_USER_VERBS_CMD_POST_SRQ_RECV,
+	IB_USER_VERBS_CMD_OPEN_XRCD,
+	IB_USER_VERBS_CMD_CLOSE_XRCD,
+	IB_USER_VERBS_CMD_CREATE_XSRQ,
+	IB_USER_VERBS_CMD_OPEN_QP
 };
 
 /*
@@ -222,6 +226,21 @@
 	__u32 pd_handle;
 };
 
+struct ib_uverbs_open_xrcd {
+	__u64 response;
+	__u32 fd;
+	__u32 oflags;
+	__u64 driver_data[0];
+};
+
+struct ib_uverbs_open_xrcd_resp {
+	__u32 xrcd_handle;
+};
+
+struct ib_uverbs_close_xrcd {
+	__u32 xrcd_handle;
+};
+
 struct ib_uverbs_reg_mr {
 	__u64 response;
 	__u64 start;
@@ -404,6 +423,17 @@
 	__u64 driver_data[0];
 };
 
+struct ib_uverbs_open_qp {
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 qpn;
+	__u8  qp_type;
+	__u8  reserved[7];
+	__u64 driver_data[0];
+};
+
+/* also used for open response */
 struct ib_uverbs_create_qp_resp {
 	__u32 qp_handle;
 	__u32 qpn;
@@ -648,11 +678,25 @@
 	__u64 driver_data[0];
 };
 
+struct ib_uverbs_create_xsrq {
+	__u64 response;
+	__u64 user_handle;
+	__u32 srq_type;
+	__u32 pd_handle;
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 srq_limit;
+	__u32 reserved;
+	__u32 xrcd_handle;
+	__u32 cq_handle;
+	__u64 driver_data[0];
+};
+
 struct ib_uverbs_create_srq_resp {
 	__u32 srq_handle;
 	__u32 max_wr;
 	__u32 max_sge;
-	__u32 reserved;
+	__u32 srqn;
 };
 
 struct ib_uverbs_modify_srq {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 228be3e..bf5daaf 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -112,6 +112,7 @@
 	 */
 	IB_DEVICE_UD_IP_CSUM		= (1<<18),
 	IB_DEVICE_UD_TSO		= (1<<19),
+	IB_DEVICE_XRC			= (1<<20),
 	IB_DEVICE_MEM_MGT_EXTENSIONS	= (1<<21),
 	IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
 };
@@ -207,6 +208,7 @@
 	IB_PORT_SM_DISABLED			= 1 << 10,
 	IB_PORT_SYS_IMAGE_GUID_SUP		= 1 << 11,
 	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP	= 1 << 12,
+	IB_PORT_EXTENDED_SPEEDS_SUP             = 1 << 14,
 	IB_PORT_CM_SUP				= 1 << 16,
 	IB_PORT_SNMP_TUNNEL_SUP			= 1 << 17,
 	IB_PORT_REINIT_SUP			= 1 << 18,
@@ -415,7 +417,15 @@
 	IB_RATE_40_GBPS  = 7,
 	IB_RATE_60_GBPS  = 8,
 	IB_RATE_80_GBPS  = 9,
-	IB_RATE_120_GBPS = 10
+	IB_RATE_120_GBPS = 10,
+	IB_RATE_14_GBPS  = 11,
+	IB_RATE_56_GBPS  = 12,
+	IB_RATE_112_GBPS = 13,
+	IB_RATE_168_GBPS = 14,
+	IB_RATE_25_GBPS  = 15,
+	IB_RATE_100_GBPS = 16,
+	IB_RATE_200_GBPS = 17,
+	IB_RATE_300_GBPS = 18
 };
 
 /**
@@ -427,6 +437,13 @@
 int ib_rate_to_mult(enum ib_rate rate) __attribute_const__;
 
 /**
+ * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
+ * For example, IB_RATE_2_5_GBPS will be converted to 2500.
+ * @rate: rate to convert.
+ */
+int ib_rate_to_mbps(enum ib_rate rate) __attribute_const__;
+
+/**
  * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
  * enum.
  * @mult: multiple to convert.
@@ -522,6 +539,11 @@
 	IB_CQ_REPORT_MISSED_EVENTS	= 1 << 2,
 };
 
+enum ib_srq_type {
+	IB_SRQT_BASIC,
+	IB_SRQT_XRC
+};
+
 enum ib_srq_attr_mask {
 	IB_SRQ_MAX_WR	= 1 << 0,
 	IB_SRQ_LIMIT	= 1 << 1,
@@ -537,6 +559,14 @@
 	void		      (*event_handler)(struct ib_event *, void *);
 	void		       *srq_context;
 	struct ib_srq_attr	attr;
+	enum ib_srq_type	srq_type;
+
+	union {
+		struct {
+			struct ib_xrcd *xrcd;
+			struct ib_cq   *cq;
+		} xrc;
+	} ext;
 };
 
 struct ib_qp_cap {
@@ -565,7 +595,11 @@
 	IB_QPT_UC,
 	IB_QPT_UD,
 	IB_QPT_RAW_IPV6,
-	IB_QPT_RAW_ETHERTYPE
+	IB_QPT_RAW_ETHERTYPE,
+	/* Save 8 for RAW_PACKET */
+	IB_QPT_XRC_INI = 9,
+	IB_QPT_XRC_TGT,
+	IB_QPT_MAX
 };
 
 enum ib_qp_create_flags {
@@ -579,6 +613,7 @@
 	struct ib_cq	       *send_cq;
 	struct ib_cq	       *recv_cq;
 	struct ib_srq	       *srq;
+	struct ib_xrcd	       *xrcd;     /* XRC TGT QPs only */
 	struct ib_qp_cap	cap;
 	enum ib_sig_type	sq_sig_type;
 	enum ib_qp_type		qp_type;
@@ -586,6 +621,13 @@
 	u8			port_num; /* special QP types only */
 };
 
+struct ib_qp_open_attr {
+	void                  (*event_handler)(struct ib_event *, void *);
+	void		       *qp_context;
+	u32			qp_num;
+	enum ib_qp_type		qp_type;
+};
+
 enum ib_rnr_timeout {
 	IB_RNR_TIMER_655_36 =  0,
 	IB_RNR_TIMER_000_01 =  1,
@@ -770,6 +812,7 @@
 			u32				rkey;
 		} fast_reg;
 	} wr;
+	u32			xrc_remote_srq_num;	/* XRC TGT QPs only */
 };
 
 struct ib_recv_wr {
@@ -831,6 +874,7 @@
 	struct list_head	qp_list;
 	struct list_head	srq_list;
 	struct list_head	ah_list;
+	struct list_head	xrcd_list;
 	int			closing;
 };
 
@@ -858,6 +902,15 @@
 	atomic_t          	usecnt; /* count all resources */
 };
 
+struct ib_xrcd {
+	struct ib_device       *device;
+	atomic_t		usecnt; /* count all exposed resources */
+	struct inode	       *inode;
+
+	struct mutex		tgt_qp_mutex;
+	struct list_head	tgt_qp_list;
+};
+
 struct ib_ah {
 	struct ib_device	*device;
 	struct ib_pd		*pd;
@@ -882,7 +935,16 @@
 	struct ib_uobject      *uobject;
 	void		      (*event_handler)(struct ib_event *, void *);
 	void		       *srq_context;
+	enum ib_srq_type	srq_type;
 	atomic_t		usecnt;
+
+	union {
+		struct {
+			struct ib_xrcd *xrcd;
+			struct ib_cq   *cq;
+			u32		srq_num;
+		} xrc;
+	} ext;
 };
 
 struct ib_qp {
@@ -891,6 +953,11 @@
 	struct ib_cq	       *send_cq;
 	struct ib_cq	       *recv_cq;
 	struct ib_srq	       *srq;
+	struct ib_xrcd	       *xrcd; /* XRC TGT QPs only */
+	struct list_head	xrcd_list;
+	atomic_t		usecnt; /* count times opened */
+	struct list_head	open_list;
+	struct ib_qp           *real_qp;
 	struct ib_uobject      *uobject;
 	void                  (*event_handler)(struct ib_event *, void *);
 	void		       *qp_context;
@@ -1149,6 +1216,10 @@
 						  struct ib_grh *in_grh,
 						  struct ib_mad *in_mad,
 						  struct ib_mad *out_mad);
+	struct ib_xrcd *	   (*alloc_xrcd)(struct ib_device *device,
+						 struct ib_ucontext *ucontext,
+						 struct ib_udata *udata);
+	int			   (*dealloc_xrcd)(struct ib_xrcd *xrcd);
 
 	struct ib_dma_mapping_ops   *dma_ops;
 
@@ -1443,6 +1514,25 @@
 int ib_destroy_qp(struct ib_qp *qp);
 
 /**
+ * ib_open_qp - Obtain a reference to an existing sharable QP.
+ * @xrcd - XRC domain
+ * @qp_open_attr: Attributes identifying the QP to open.
+ *
+ * Returns a reference to a sharable QP.
+ */
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+			 struct ib_qp_open_attr *qp_open_attr);
+
+/**
+ * ib_close_qp - Release an external reference to a QP.
+ * @qp: The QP handle to release
+ *
+ * The opened QP handle is released by the caller.  The underlying
+ * shared QP is not destroyed until all internal references are released.
+ */
+int ib_close_qp(struct ib_qp *qp);
+
+/**
  * ib_post_send - Posts a list of work requests to the send queue of
  *   the specified QP.
  * @qp: The QP to post the work request on.
@@ -2060,4 +2150,16 @@
  */
 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
 
+/**
+ * ib_alloc_xrcd - Allocates an XRC domain.
+ * @device: The device on which to allocate the XRC domain.
+ */
+struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device);
+
+/**
+ * ib_dealloc_xrcd - Deallocates an XRC domain.
+ * @xrcd: The XRC domain to deallocate.
+ */
+int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+
 #endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 2d0191c..1a046b1 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -52,8 +52,10 @@
 	struct sockaddr_in local_addr;
 	struct sockaddr_in remote_addr;
 	void *private_data;
-	u8 private_data_len;
 	void *provider_data;
+	u8 private_data_len;
+	u8 ord;
+	u8 ird;
 };
 
 /**
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 26977c1..51988f8 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -65,6 +65,7 @@
 enum rdma_port_space {
 	RDMA_PS_SDP   = 0x0001,
 	RDMA_PS_IPOIB = 0x0002,
+	RDMA_PS_IB    = 0x013F,
 	RDMA_PS_TCP   = 0x0106,
 	RDMA_PS_UDP   = 0x0111,
 };
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index fc82c18..5348a00 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -77,7 +77,8 @@
 	__u64 uid;
 	__u64 response;
 	__u16 ps;
-	__u8  reserved[6];
+	__u8  qp_type;
+	__u8  reserved[5];
 };
 
 struct rdma_ucm_create_id_resp {