IB: split struct ib_send_wr

This patch split up struct ib_send_wr so that all non-trivial verbs
use their own structure which embedds struct ib_send_wr.  This dramaticly
shrinks the size of a WR for most common operations:

sizeof(struct ib_send_wr) (old):	96

sizeof(struct ib_send_wr):		48
sizeof(struct ib_rdma_wr):		64
sizeof(struct ib_atomic_wr):		96
sizeof(struct ib_ud_wr):		88
sizeof(struct ib_fast_reg_wr):		88
sizeof(struct ib_bind_mw_wr):		96
sizeof(struct ib_sig_handover_wr):	80

And with Sagi's pending MR rework the fast registration WR will also be
down to a reasonable size:

sizeof(struct ib_fastreg_wr):		64

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> [srp, srpt]
Reviewed-by: Chuck Lever <chuck.lever@oracle.com> [sunrpc]
Tested-by: Haggai Eran <haggaie@mellanox.com>
Tested-by: Sagi Grimberg <sagig@mellanox.com>
Tested-by: Steve Wise <swise@opengridcomputing.com>
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 3dcc498..a6b0b09 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -374,7 +374,7 @@
 		    wr->opcode != IB_WR_SEND_WITH_IMM)
 			goto bail_inval;
 		/* Check UD destination address PD */
-		if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
 			goto bail_inval;
 	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
 		goto bail_inval;
@@ -397,7 +397,23 @@
 	rkt = &to_idev(qp->ibqp.device)->lk_table;
 	pd = to_ipd(qp->ibqp.pd);
 	wqe = get_swqe_ptr(qp, qp->s_head);
-	wqe->wr = *wr;
+
+	if (qp->ibqp.qp_type != IB_QPT_UC &&
+	    qp->ibqp.qp_type != IB_QPT_RC)
+		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+	else if (wr->opcode == IB_WR_FAST_REG_MR)
+		memcpy(&wqe->fast_reg_wr, fast_reg_wr(wr),
+			sizeof(wqe->fast_reg_wr));
+	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+		 wr->opcode == IB_WR_RDMA_WRITE ||
+		 wr->opcode == IB_WR_RDMA_READ)
+		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+	else
+		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
 	wqe->length = 0;
 	j = 0;
 	if (wr->num_sge) {
@@ -426,7 +442,7 @@
 				  qp->port_num - 1)->ibmtu)
 		goto bail_inval_free;
 	else
-		atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
+		atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
 	wqe->ssn = qp->s_ssn++;
 	qp->s_head = next;