IB: split struct ib_send_wr

This patch split up struct ib_send_wr so that all non-trivial verbs
use their own structure which embedds struct ib_send_wr.  This dramaticly
shrinks the size of a WR for most common operations:

sizeof(struct ib_send_wr) (old):	96

sizeof(struct ib_send_wr):		48
sizeof(struct ib_rdma_wr):		64
sizeof(struct ib_atomic_wr):		96
sizeof(struct ib_ud_wr):		88
sizeof(struct ib_fast_reg_wr):		88
sizeof(struct ib_bind_mw_wr):		96
sizeof(struct ib_sig_handover_wr):	80

And with Sagi's pending MR rework the fast registration WR will also be
down to a reasonable size:

sizeof(struct ib_fastreg_wr):		64

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> [srp, srpt]
Reviewed-by: Chuck Lever <chuck.lever@oracle.com> [sunrpc]
Tested-by: Haggai Eran <haggaie@mellanox.com>
Tested-by: Sagi Grimberg <sagig@mellanox.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 5afaa21..eaf139a 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -338,12 +338,13 @@
 /*
  * Initialize the memory region specified by the work reqeust.
  */
-int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
+int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *send_wr)
 {
+	struct ib_fast_reg_wr *wr = fast_reg_wr(send_wr);
 	struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
 	struct qib_pd *pd = to_ipd(qp->ibqp.pd);
 	struct qib_mregion *mr;
-	u32 rkey = wr->wr.fast_reg.rkey;
+	u32 rkey = wr->rkey;
 	unsigned i, n, m;
 	int ret = -EINVAL;
 	unsigned long flags;
@@ -360,22 +361,22 @@
 	if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
 		goto bail;
 
-	if (wr->wr.fast_reg.page_list_len > mr->max_segs)
+	if (wr->page_list_len > mr->max_segs)
 		goto bail;
 
-	ps = 1UL << wr->wr.fast_reg.page_shift;
-	if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
+	ps = 1UL << wr->page_shift;
+	if (wr->length > ps * wr->page_list_len)
 		goto bail;
 
-	mr->user_base = wr->wr.fast_reg.iova_start;
-	mr->iova = wr->wr.fast_reg.iova_start;
+	mr->user_base = wr->iova_start;
+	mr->iova = wr->iova_start;
 	mr->lkey = rkey;
-	mr->length = wr->wr.fast_reg.length;
-	mr->access_flags = wr->wr.fast_reg.access_flags;
-	page_list = wr->wr.fast_reg.page_list->page_list;
+	mr->length = wr->length;
+	mr->access_flags = wr->access_flags;
+	page_list = wr->page_list->page_list;
 	m = 0;
 	n = 0;
-	for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+	for (i = 0; i < wr->page_list_len; i++) {
 		mr->map[m]->segs[n].vaddr = (void *) page_list[i];
 		mr->map[m]->segs[n].length = ps;
 		if (++n == QIB_SEGSZ) {
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 4fa88ba..40f85bb 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -436,7 +436,7 @@
 			if (qp->ibqp.qp_type == IB_QPT_UD ||
 			    qp->ibqp.qp_type == IB_QPT_SMI ||
 			    qp->ibqp.qp_type == IB_QPT_GSI)
-				atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+				atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
 			if (++qp->s_last >= qp->s_size)
 				qp->s_last = 0;
 		}
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 4544d6f..e6b7556 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -373,10 +373,11 @@
 				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
+
 			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+				cpu_to_be64(wqe->rdma_wr.remote_addr);
 			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			hwords += sizeof(struct ib_reth) / sizeof(u32);
 			wqe->lpsn = wqe->psn;
@@ -386,15 +387,15 @@
 				len = pmtu;
 				break;
 			}
-			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
 				qp->s_state = OP(RDMA_WRITE_ONLY);
 			else {
-				qp->s_state =
-					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+				qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
 				/* Immediate data comes after RETH */
-				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+				ohdr->u.rc.imm_data =
+					wqe->rdma_wr.wr.ex.imm_data;
 				hwords += 1;
-				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+				if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
 					bth0 |= IB_BTH_SOLICITED;
 			}
 			bth2 |= IB_BTH_REQ_ACK;
@@ -424,10 +425,11 @@
 					qp->s_next_psn += (len - 1) / pmtu;
 				wqe->lpsn = qp->s_next_psn++;
 			}
+
 			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+				cpu_to_be64(wqe->rdma_wr.remote_addr);
 			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			qp->s_state = OP(RDMA_READ_REQUEST);
 			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -455,24 +457,24 @@
 					qp->s_lsn++;
 				wqe->lpsn = wqe->psn;
 			}
-			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
 				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->wr.wr.atomic.swap);
+					wqe->atomic_wr.swap);
 				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-					wqe->wr.wr.atomic.compare_add);
+					wqe->atomic_wr.compare_add);
 			} else {
 				qp->s_state = OP(FETCH_ADD);
 				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->wr.wr.atomic.compare_add);
+					wqe->atomic_wr.compare_add);
 				ohdr->u.atomic_eth.compare_data = 0;
 			}
 			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-				wqe->wr.wr.atomic.remote_addr >> 32);
+				wqe->atomic_wr.remote_addr >> 32);
 			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-				wqe->wr.wr.atomic.remote_addr);
+				wqe->atomic_wr.remote_addr);
 			ohdr->u.atomic_eth.rkey = cpu_to_be32(
-				wqe->wr.wr.atomic.rkey);
+				wqe->atomic_wr.rkey);
 			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
 			ss = NULL;
 			len = 0;
@@ -597,9 +599,9 @@
 		 */
 		len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
 		ohdr->u.rc.reth.vaddr =
-			cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+			cpu_to_be64(wqe->rdma_wr.remote_addr + len);
 		ohdr->u.rc.reth.rkey =
-			cpu_to_be32(wqe->wr.wr.rdma.rkey);
+			cpu_to_be32(wqe->rdma_wr.rkey);
 		ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
 		qp->s_state = OP(RDMA_READ_REQUEST);
 		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 22e356c..b1aa21b 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -459,8 +459,8 @@
 		if (wqe->length == 0)
 			break;
 		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
-					  wqe->wr.wr.rdma.remote_addr,
-					  wqe->wr.wr.rdma.rkey,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
 					  IB_ACCESS_REMOTE_WRITE)))
 			goto acc_err;
 		qp->r_sge.sg_list = NULL;
@@ -472,8 +472,8 @@
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 			goto inv_err;
 		if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
-					  wqe->wr.wr.rdma.remote_addr,
-					  wqe->wr.wr.rdma.rkey,
+					  wqe->rdma_wr.remote_addr,
+					  wqe->rdma_wr.rkey,
 					  IB_ACCESS_REMOTE_READ)))
 			goto acc_err;
 		release = 0;
@@ -490,18 +490,18 @@
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 			goto inv_err;
 		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-					  wqe->wr.wr.atomic.remote_addr,
-					  wqe->wr.wr.atomic.rkey,
+					  wqe->atomic_wr.remote_addr,
+					  wqe->atomic_wr.rkey,
 					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto acc_err;
 		/* Perform atomic OP and save result. */
 		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-		sdata = wqe->wr.wr.atomic.compare_add;
+		sdata = wqe->atomic_wr.compare_add;
 		*(u64 *) sqp->s_sge.sge.vaddr =
-			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+			(wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
 			(u64) atomic64_add_return(sdata, maddr) - sdata :
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-				      sdata, wqe->wr.wr.atomic.swap);
+				      sdata, wqe->atomic_wr.swap);
 		qib_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		goto send_comp;
@@ -785,7 +785,7 @@
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+		atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
 
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
 	if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index aa3a803..06a5645 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -129,9 +129,9 @@
 		case IB_WR_RDMA_WRITE:
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+				cpu_to_be64(wqe->rdma_wr.remote_addr);
 			ohdr->u.rc.reth.rkey =
-				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			hwords += sizeof(struct ib_reth) / 4;
 			if (len > pmtu) {
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 26243b7..59193f6 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -59,7 +59,7 @@
 	u32 length;
 	enum ib_qp_type sqptype, dqptype;
 
-	qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+	qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
 	if (!qp) {
 		ibp->n_pkt_drops++;
 		return;
@@ -76,7 +76,7 @@
 		goto drop;
 	}
 
-	ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
 	ppd = ppd_from_ibp(ibp);
 
 	if (qp->ibqp.qp_num > 1) {
@@ -106,8 +106,8 @@
 	if (qp->ibqp.qp_num) {
 		u32 qkey;
 
-		qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
-			sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+		qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
+			sqp->qkey : swqe->ud_wr.remote_qkey;
 		if (unlikely(qkey != qp->qkey)) {
 			u16 lid;
 
@@ -210,7 +210,7 @@
 	wc.qp = &qp->ibqp;
 	wc.src_qp = sqp->ibqp.qp_num;
 	wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
-		swqe->wr.wr.ud.pkey_index : 0;
+		swqe->ud_wr.pkey_index : 0;
 	wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
 	wc.sl = ah_attr->sl;
 	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
@@ -277,7 +277,7 @@
 	/* Construct the header. */
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ppd = ppd_from_ibp(ibp);
-	ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
 	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
 		if (ah_attr->dlid != QIB_PERMISSIVE_LID)
 			this_cpu_inc(ibp->pmastats->n_multicast_xmit);
@@ -363,7 +363,7 @@
 	bth0 |= extra_bytes << 20;
 	bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
 		qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
-			     wqe->wr.wr.ud.pkey_index : qp->s_pkey_index);
+			     wqe->ud_wr.pkey_index : qp->s_pkey_index);
 	ohdr->bth[0] = cpu_to_be32(bth0);
 	/*
 	 * Use the multicast QP if the destination LID is a multicast LID.
@@ -371,14 +371,14 @@
 	ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
 		ah_attr->dlid != QIB_PERMISSIVE_LID ?
 		cpu_to_be32(QIB_MULTICAST_QPN) :
-		cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+		cpu_to_be32(wqe->ud_wr.remote_qpn);
 	ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
 	/*
 	 * Qkeys with the high order bit set mean use the
 	 * qkey from the QP context instead of the WR (see 10.2.5).
 	 */
-	ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
-					 qp->qkey : wqe->wr.wr.ud.remote_qkey);
+	ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+					 qp->qkey : wqe->ud_wr.remote_qkey);
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 
 done:
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 3dcc498..a6b0b09 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -374,7 +374,7 @@
 		    wr->opcode != IB_WR_SEND_WITH_IMM)
 			goto bail_inval;
 		/* Check UD destination address PD */
-		if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
 			goto bail_inval;
 	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
 		goto bail_inval;
@@ -397,7 +397,23 @@
 	rkt = &to_idev(qp->ibqp.device)->lk_table;
 	pd = to_ipd(qp->ibqp.pd);
 	wqe = get_swqe_ptr(qp, qp->s_head);
-	wqe->wr = *wr;
+
+	if (qp->ibqp.qp_type != IB_QPT_UC &&
+	    qp->ibqp.qp_type != IB_QPT_RC)
+		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+	else if (wr->opcode == IB_WR_FAST_REG_MR)
+		memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr),
+			sizeof(wqe->fast_reg_wr));
+	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+		 wr->opcode == IB_WR_RDMA_WRITE ||
+		 wr->opcode == IB_WR_RDMA_READ)
+		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+	else
+		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
 	wqe->length = 0;
 	j = 0;
 	if (wr->num_sge) {
@@ -426,7 +442,7 @@
 				  qp->port_num - 1)->ibmtu)
 		goto bail_inval_free;
 	else
-		atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
+		atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
 	wqe->ssn = qp->s_ssn++;
 	qp->s_head = next;
 
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index a08df70..8aa1685 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -338,7 +338,13 @@
  * in qp->s_max_sge.
  */
 struct qib_swqe {
-	struct ib_send_wr wr;   /* don't use wr.sg_list */
+	union {
+		struct ib_send_wr wr;   /* don't use wr.sg_list */
+		struct ib_ud_wr ud_wr;
+		struct ib_fast_reg_wr fast_reg_wr;
+		struct ib_rdma_wr rdma_wr;
+		struct ib_atomic_wr atomic_wr;
+	};
 	u32 psn;                /* first packet sequence number */
 	u32 lpsn;               /* last packet sequence number */
 	u32 ssn;                /* send sequence number */