RDS: break out rdma and data ops into nested structs in rds_message
Clearly separate rdma-related variables in rm from data-related ones.
This is in anticipation of adding atomic support.
Signed-off-by: Andy Grover <andy.grover@oracle.com>
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 0b0090d..5375020 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -83,11 +83,11 @@
rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
ib_dma_unmap_sg(ic->i_cm_id->device,
- rm->m_sg, rm->m_nents,
- DMA_TO_DEVICE);
+ rm->data.m_sg, rm->data.m_nents,
+ DMA_TO_DEVICE);
- if (rm->m_rdma_op) {
- rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
+ if (rm->rdma.m_rdma_op) {
+ rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op);
/* If the user asked for a completion notification on this
* message, we can implement three different semantics:
@@ -111,10 +111,10 @@
*/
rds_ib_send_rdma_complete(rm, wc_status);
- if (rm->m_rdma_op->r_write)
- rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
+ if (rm->rdma.m_rdma_op->r_write)
+ rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
else
- rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
+ rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
}
/* If anyone waited for this message to get flushed out, wake
@@ -244,8 +244,8 @@
rm = rds_send_get_message(conn, send->s_op);
if (rm) {
- if (rm->m_rdma_op)
- rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
+ if (rm->rdma.m_rdma_op)
+ rds_ib_send_unmap_rdma(ic, rm->rdma.m_rdma_op);
rds_ib_send_rdma_complete(rm, wc.status);
rds_message_put(rm);
}
@@ -532,18 +532,20 @@
rm->m_inc.i_hdr.h_flags,
be32_to_cpu(rm->m_inc.i_hdr.h_len));
*/
- if (rm->m_nents) {
- rm->m_count = ib_dma_map_sg(dev,
- rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
- rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
- if (rm->m_count == 0) {
+ if (rm->data.m_nents) {
+ rm->data.m_count = ib_dma_map_sg(dev,
+ rm->data.m_sg,
+ rm->data.m_nents,
+ DMA_TO_DEVICE);
+ rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.m_count);
+ if (rm->data.m_count == 0) {
rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
ret = -ENOMEM; /* XXX ? */
goto out;
}
} else {
- rm->m_count = 0;
+ rm->data.m_count = 0;
}
ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
@@ -559,10 +561,10 @@
/* If it has a RDMA op, tell the peer we did it. This is
* used by the peer to release use-once RDMA MRs. */
- if (rm->m_rdma_op) {
+ if (rm->rdma.m_rdma_op) {
struct rds_ext_header_rdma ext_hdr;
- ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
+ ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key);
rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
}
@@ -590,7 +592,7 @@
send = &ic->i_sends[pos];
first = send;
prev = NULL;
- scat = &rm->m_sg[sg];
+ scat = &rm->data.m_sg[sg];
sent = 0;
i = 0;
@@ -600,7 +602,7 @@
* or when requested by the user. Right now, we let
* the application choose.
*/
- if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
+ if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence)
send_flags = IB_SEND_FENCE;
/*
@@ -619,7 +621,7 @@
}
/* if there's data reference it with a chain of work reqs */
- for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
+ for (; i < work_alloc && scat != &rm->data.m_sg[rm->data.m_count]; i++) {
unsigned int len;
send = &ic->i_sends[pos];
@@ -697,7 +699,7 @@
sent += sizeof(struct rds_header);
/* if we finished the message then send completion owns it */
- if (scat == &rm->m_sg[rm->m_count]) {
+ if (scat == &rm->data.m_sg[rm->data.m_count]) {
prev->s_rm = ic->i_rm;
prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index dced532..c187e8f 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -83,11 +83,11 @@
rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
ib_dma_unmap_sg(ic->i_cm_id->device,
- rm->m_sg, rm->m_nents,
+ rm->data.m_sg, rm->data.m_nents,
DMA_TO_DEVICE);
- if (rm->m_rdma_op) {
- rds_iw_send_unmap_rdma(ic, rm->m_rdma_op);
+ if (rm->rdma.m_rdma_op) {
+ rds_iw_send_unmap_rdma(ic, rm->rdma.m_rdma_op);
/* If the user asked for a completion notification on this
* message, we can implement three different semantics:
@@ -111,10 +111,10 @@
*/
rds_iw_send_rdma_complete(rm, wc_status);
- if (rm->m_rdma_op->r_write)
- rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
+ if (rm->rdma.m_rdma_op->r_write)
+ rds_stats_add(s_send_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
else
- rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
+ rds_stats_add(s_recv_rdma_bytes, rm->rdma.m_rdma_op->r_bytes);
}
/* If anyone waited for this message to get flushed out, wake
@@ -563,18 +563,20 @@
rm->m_inc.i_hdr.h_flags,
be32_to_cpu(rm->m_inc.i_hdr.h_len));
*/
- if (rm->m_nents) {
- rm->m_count = ib_dma_map_sg(dev,
- rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
- rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
- if (rm->m_count == 0) {
+ if (rm->data.m_nents) {
+ rm->data.m_count = ib_dma_map_sg(dev,
+ rm->data.m_sg,
+ rm->data.m_nents,
+ DMA_TO_DEVICE);
+ rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.m_count);
+ if (rm->data.m_count == 0) {
rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
ret = -ENOMEM; /* XXX ? */
goto out;
}
} else {
- rm->m_count = 0;
+ rm->data.m_count = 0;
}
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
@@ -590,10 +592,10 @@
/* If it has a RDMA op, tell the peer we did it. This is
* used by the peer to release use-once RDMA MRs. */
- if (rm->m_rdma_op) {
+ if (rm->rdma.m_rdma_op) {
struct rds_ext_header_rdma ext_hdr;
- ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
+ ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.m_rdma_op->r_key);
rds_message_add_extension(&rm->m_inc.i_hdr,
RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
}
@@ -621,7 +623,7 @@
send = &ic->i_sends[pos];
first = send;
prev = NULL;
- scat = &rm->m_sg[sg];
+ scat = &rm->data.m_sg[sg];
sent = 0;
i = 0;
@@ -631,7 +633,7 @@
* or when requested by the user. Right now, we let
* the application choose.
*/
- if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
+ if (rm->rdma.m_rdma_op && rm->rdma.m_rdma_op->r_fence)
send_flags = IB_SEND_FENCE;
/*
@@ -650,7 +652,7 @@
}
/* if there's data reference it with a chain of work reqs */
- for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
+ for (; i < work_alloc && scat != &rm->data.m_sg[rm->data.m_count]; i++) {
unsigned int len;
send = &ic->i_sends[pos];
@@ -728,7 +730,7 @@
sent += sizeof(struct rds_header);
/* if we finished the message then send completion owns it */
- if (scat == &rm->m_sg[rm->m_count]) {
+ if (scat == &rm->data.m_sg[rm->data.m_count]) {
prev->s_rm = ic->i_rm;
prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
diff --git a/net/rds/message.c b/net/rds/message.c
index 809656c..4421d16 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -63,17 +63,17 @@
if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
return;
- for (i = 0; i < rm->m_nents; i++) {
- rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i]));
+ for (i = 0; i < rm->data.m_nents; i++) {
+ rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.m_sg[i]));
/* XXX will have to put_page for page refs */
- __free_page(sg_page(&rm->m_sg[i]));
+ __free_page(sg_page(&rm->data.m_sg[i]));
}
- rm->m_nents = 0;
+ rm->data.m_nents = 0;
- if (rm->m_rdma_op)
- rds_rdma_free_op(rm->m_rdma_op);
- if (rm->m_rdma_mr)
- rds_mr_put(rm->m_rdma_mr);
+ if (rm->rdma.m_rdma_op)
+ rds_rdma_free_op(rm->rdma.m_rdma_op);
+ if (rm->rdma.m_rdma_mr)
+ rds_mr_put(rm->rdma.m_rdma_mr);
}
void rds_message_inc_purge(struct rds_incoming *inc)
@@ -224,7 +224,7 @@
goto out;
if (nents)
- sg_init_table(rm->m_sg, nents);
+ sg_init_table(rm->data.m_sg, nents);
atomic_set(&rm->m_refcount, 1);
INIT_LIST_HEAD(&rm->m_sock_item);
INIT_LIST_HEAD(&rm->m_conn_item);
@@ -245,10 +245,10 @@
set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
- rm->m_nents = ceil(total_len, PAGE_SIZE);
+ rm->data.m_nents = ceil(total_len, PAGE_SIZE);
- for (i = 0; i < rm->m_nents; ++i) {
- sg_set_page(&rm->m_sg[i],
+ for (i = 0; i < rm->data.m_nents; ++i) {
+ sg_set_page(&rm->data.m_sg[i],
virt_to_page(page_addrs[i]),
PAGE_SIZE, 0);
}
@@ -278,7 +278,7 @@
/*
* now allocate and copy in the data payload.
*/
- sg = rm->m_sg;
+ sg = rm->data.m_sg;
iov = first_iov;
iov_off = 0;
sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
@@ -289,7 +289,7 @@
GFP_HIGHUSER);
if (ret)
goto out;
- rm->m_nents++;
+ rm->data.m_nents++;
sg_off = 0;
}
@@ -348,7 +348,7 @@
iov = first_iov;
iov_off = 0;
- sg = rm->m_sg;
+ sg = rm->data.m_sg;
vec_off = 0;
copied = 0;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index dee698b..24274bb9 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -643,14 +643,14 @@
struct rds_rdma_op *op;
if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
- rm->m_rdma_op)
+ rm->rdma.m_rdma_op)
return -EINVAL;
op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
if (IS_ERR(op))
return PTR_ERR(op);
rds_stats_inc(s_send_rdma);
- rm->m_rdma_op = op;
+ rm->rdma.m_rdma_op = op;
return 0;
}
@@ -679,6 +679,7 @@
*/
r_key = rds_rdma_cookie_key(rm->m_rdma_cookie);
+
spin_lock_irqsave(&rs->rs_rdma_lock, flags);
mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
if (!mr)
@@ -689,7 +690,7 @@
if (mr) {
mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
- rm->m_rdma_mr = mr;
+ rm->rdma.m_rdma_mr = mr;
}
return err;
}
@@ -707,5 +708,5 @@
rm->m_rdma_cookie != 0)
return -EINVAL;
- return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr);
+ return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.m_rdma_mr);
}
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 1d3eef6..07a750b 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -259,12 +259,18 @@
*/
spinlock_t m_rs_lock;
struct rds_sock *m_rs;
- struct rds_rdma_op *m_rdma_op;
rds_rdma_cookie_t m_rdma_cookie;
- struct rds_mr *m_rdma_mr;
- unsigned int m_nents;
- unsigned int m_count;
- struct scatterlist m_sg[0];
+ struct {
+ struct {
+ struct rds_rdma_op *m_rdma_op;
+ struct rds_mr *m_rdma_mr;
+ } rdma;
+ struct {
+ unsigned int m_nents;
+ unsigned int m_count;
+ struct scatterlist m_sg[0];
+ } data;
+ };
};
/*
diff --git a/net/rds/send.c b/net/rds/send.c
index 817997d..19dfd02 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -166,7 +166,7 @@
rm = conn->c_xmit_rm;
if (rm &&
conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
- conn->c_xmit_sg == rm->m_nents) {
+ conn->c_xmit_sg == rm->data.m_nents) {
conn->c_xmit_rm = NULL;
conn->c_xmit_sg = 0;
conn->c_xmit_hdr_off = 0;
@@ -236,7 +236,7 @@
* connection.
* Therefore, we never retransmit messages with RDMA ops.
*/
- if (rm->m_rdma_op &&
+ if (rm->rdma.m_rdma_op &&
test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
spin_lock_irqsave(&conn->c_lock, flags);
if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
@@ -268,8 +268,8 @@
* keep this simple and require that the transport either
* send the whole rdma or none of it.
*/
- if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
- ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
+ if (rm->rdma.m_rdma_op && !conn->c_xmit_rdma_sent) {
+ ret = conn->c_trans->xmit_rdma(conn, rm->rdma.m_rdma_op);
if (ret)
break;
conn->c_xmit_rdma_sent = 1;
@@ -279,7 +279,7 @@
}
if (conn->c_xmit_hdr_off < sizeof(struct rds_header) ||
- conn->c_xmit_sg < rm->m_nents) {
+ conn->c_xmit_sg < rm->data.m_nents) {
ret = conn->c_trans->xmit(conn, rm,
conn->c_xmit_hdr_off,
conn->c_xmit_sg,
@@ -295,7 +295,7 @@
ret -= tmp;
}
- sg = &rm->m_sg[conn->c_xmit_sg];
+ sg = &rm->data.m_sg[conn->c_xmit_sg];
while (ret) {
tmp = min_t(int, ret, sg->length -
conn->c_xmit_data_off);
@@ -306,7 +306,7 @@
sg++;
conn->c_xmit_sg++;
BUG_ON(ret != 0 &&
- conn->c_xmit_sg == rm->m_nents);
+ conn->c_xmit_sg == rm->data.m_nents);
}
}
}
@@ -419,7 +419,7 @@
spin_lock_irqsave(&rm->m_rs_lock, flags);
- ro = rm->m_rdma_op;
+ ro = rm->rdma.m_rdma_op;
if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
ro && ro->r_notify && ro->r_notifier) {
notifier = ro->r_notifier;
@@ -453,7 +453,7 @@
{
struct rds_rdma_op *ro;
- ro = rm->m_rdma_op;
+ ro = rm->rdma.m_rdma_op;
if (ro && ro->r_notify && ro->r_notifier) {
ro->r_notifier->n_status = status;
list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue);
@@ -477,7 +477,7 @@
spin_lock_irqsave(&conn->c_lock, flags);
list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
- if (rm->m_rdma_op == op) {
+ if (rm->rdma.m_rdma_op == op) {
atomic_inc(&rm->m_refcount);
found = rm;
goto out;
@@ -485,7 +485,7 @@
}
list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
- if (rm->m_rdma_op == op) {
+ if (rm->rdma.m_rdma_op == op) {
atomic_inc(&rm->m_refcount);
found = rm;
break;
@@ -545,7 +545,7 @@
spin_lock(&rs->rs_lock);
if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
- struct rds_rdma_op *ro = rm->m_rdma_op;
+ struct rds_rdma_op *ro = rm->rdma.m_rdma_op;
struct rds_notifier *notifier;
list_del_init(&rm->m_sock_item);
@@ -557,7 +557,7 @@
&rs->rs_notify_queue);
if (!notifier->n_status)
notifier->n_status = status;
- rm->m_rdma_op->r_notifier = NULL;
+ rm->rdma.m_rdma_op->r_notifier = NULL;
}
was_on_sock = 1;
rm->m_rs = NULL;
@@ -874,11 +874,11 @@
if (ret)
goto out;
- if ((rm->m_rdma_cookie || rm->m_rdma_op) &&
+ if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op) &&
!conn->c_trans->xmit_rdma) {
if (printk_ratelimit())
printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
- rm->m_rdma_op, conn->c_trans->xmit_rdma);
+ rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma);
ret = -EOPNOTSUPP;
goto out;
}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index e5f6cce..d63aa35 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -166,21 +166,21 @@
goto out;
}
- while (sg < rm->m_nents) {
+ while (sg < rm->data.m_nents) {
ret = tc->t_sock->ops->sendpage(tc->t_sock,
- sg_page(&rm->m_sg[sg]),
- rm->m_sg[sg].offset + off,
- rm->m_sg[sg].length - off,
+ sg_page(&rm->data.m_sg[sg]),
+ rm->data.m_sg[sg].offset + off,
+ rm->data.m_sg[sg].length - off,
MSG_DONTWAIT|MSG_NOSIGNAL);
- rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]),
- rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off,
+ rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.m_sg[sg]),
+ rm->data.m_sg[sg].offset + off, rm->data.m_sg[sg].length - off,
ret);
if (ret <= 0)
break;
off += ret;
done += ret;
- if (off == rm->m_sg[sg].length) {
+ if (off == rm->data.m_sg[sg].length) {
off = 0;
sg++;
}