Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2015 Oracle. All rights reserved. |
| 3 | * |
| 4 | * Support for backward direction RPCs on RPC/RDMA. |
| 5 | */ |
| 6 | |
| 7 | #include <linux/module.h> |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 8 | #include <linux/sunrpc/xprt.h> |
| 9 | #include <linux/sunrpc/svc.h> |
Chuck Lever | 7656677 | 2015-10-24 17:28:32 -0400 | [diff] [blame] | 10 | #include <linux/sunrpc/svc_xprt.h> |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 11 | |
| 12 | #include "xprt_rdma.h" |
| 13 | |
| 14 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
| 15 | # define RPCDBG_FACILITY RPCDBG_TRANS |
| 16 | #endif |
| 17 | |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 18 | #undef RPCRDMA_BACKCHANNEL_DEBUG |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 19 | |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 20 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, |
| 21 | struct rpc_rqst *rqst) |
| 22 | { |
| 23 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
| 24 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 25 | |
| 26 | spin_lock(&buf->rb_reqslock); |
| 27 | list_del(&req->rl_all); |
| 28 | spin_unlock(&buf->rb_reqslock); |
| 29 | |
| 30 | rpcrdma_destroy_req(&r_xprt->rx_ia, req); |
| 31 | |
| 32 | kfree(rqst); |
| 33 | } |
| 34 | |
| 35 | static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, |
| 36 | struct rpc_rqst *rqst) |
| 37 | { |
| 38 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 39 | struct rpcrdma_regbuf *rb; |
| 40 | struct rpcrdma_req *req; |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 41 | size_t size; |
| 42 | |
| 43 | req = rpcrdma_create_req(r_xprt); |
Dan Carpenter | abfb689 | 2015-11-05 11:39:52 +0300 | [diff] [blame] | 44 | if (IS_ERR(req)) |
| 45 | return PTR_ERR(req); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 46 | req->rl_backchannel = true; |
| 47 | |
Chuck Lever | eb342e9 | 2016-09-15 10:55:04 -0400 | [diff] [blame] | 48 | size = r_xprt->rx_data.inline_wsize; |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 49 | rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); |
| 50 | if (IS_ERR(rb)) |
| 51 | goto out_fail; |
| 52 | req->rl_rdmabuf = rb; |
| 53 | |
Chuck Lever | eb342e9 | 2016-09-15 10:55:04 -0400 | [diff] [blame] | 54 | size += r_xprt->rx_data.inline_rsize; |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 55 | rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); |
| 56 | if (IS_ERR(rb)) |
| 57 | goto out_fail; |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 58 | req->rl_sendbuf = rb; |
Chuck Lever | 5a6d1db | 2016-09-15 10:55:45 -0400 | [diff] [blame^] | 59 | xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, size); |
| 60 | rpcrdma_set_xprtdata(rqst, req); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 61 | return 0; |
| 62 | |
| 63 | out_fail: |
| 64 | rpcrdma_bc_free_rqst(r_xprt, rqst); |
| 65 | return -ENOMEM; |
| 66 | } |
| 67 | |
| 68 | /* Allocate and add receive buffers to the rpcrdma_buffer's |
| 69 | * existing list of rep's. These are released when the |
| 70 | * transport is destroyed. |
| 71 | */ |
| 72 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, |
| 73 | unsigned int count) |
| 74 | { |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 75 | struct rpcrdma_rep *rep; |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 76 | int rc = 0; |
| 77 | |
| 78 | while (count--) { |
| 79 | rep = rpcrdma_create_rep(r_xprt); |
| 80 | if (IS_ERR(rep)) { |
| 81 | pr_err("RPC: %s: reply buffer alloc failed\n", |
| 82 | __func__); |
| 83 | rc = PTR_ERR(rep); |
| 84 | break; |
| 85 | } |
| 86 | |
Chuck Lever | 9b06688 | 2015-12-16 17:22:06 -0500 | [diff] [blame] | 87 | rpcrdma_recv_buffer_put(rep); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 88 | } |
| 89 | |
| 90 | return rc; |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests |
| 95 | * @xprt: transport associated with these backchannel resources |
| 96 | * @reqs: number of concurrent incoming requests to expect |
| 97 | * |
| 98 | * Returns 0 on success; otherwise a negative errno |
| 99 | */ |
| 100 | int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) |
| 101 | { |
| 102 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 103 | struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; |
| 104 | struct rpc_rqst *rqst; |
| 105 | unsigned int i; |
| 106 | int rc; |
| 107 | |
| 108 | /* The backchannel reply path returns each rpc_rqst to the |
| 109 | * bc_pa_list _after_ the reply is sent. If the server is |
| 110 | * faster than the client, it can send another backward |
| 111 | * direction request before the rpc_rqst is returned to the |
| 112 | * list. The client rejects the request in this case. |
| 113 | * |
| 114 | * Twice as many rpc_rqsts are prepared to ensure there is |
| 115 | * always an rpc_rqst available as soon as a reply is sent. |
| 116 | */ |
Chuck Lever | 124fa17 | 2015-10-24 17:27:51 -0400 | [diff] [blame] | 117 | if (reqs > RPCRDMA_BACKWARD_WRS >> 1) |
| 118 | goto out_err; |
| 119 | |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 120 | for (i = 0; i < (reqs << 1); i++) { |
| 121 | rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); |
| 122 | if (!rqst) { |
| 123 | pr_err("RPC: %s: Failed to create bc rpc_rqst\n", |
| 124 | __func__); |
| 125 | goto out_free; |
| 126 | } |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 127 | dprintk("RPC: %s: new rqst %p\n", __func__, rqst); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 128 | |
| 129 | rqst->rq_xprt = &r_xprt->rx_xprt; |
| 130 | INIT_LIST_HEAD(&rqst->rq_list); |
| 131 | INIT_LIST_HEAD(&rqst->rq_bc_list); |
| 132 | |
| 133 | if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) |
| 134 | goto out_free; |
| 135 | |
| 136 | spin_lock_bh(&xprt->bc_pa_lock); |
| 137 | list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); |
| 138 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 139 | } |
| 140 | |
| 141 | rc = rpcrdma_bc_setup_reps(r_xprt, reqs); |
| 142 | if (rc) |
| 143 | goto out_free; |
| 144 | |
| 145 | rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); |
| 146 | if (rc) |
| 147 | goto out_free; |
| 148 | |
| 149 | buffer->rb_bc_srv_max_requests = reqs; |
| 150 | request_module("svcrdma"); |
| 151 | |
| 152 | return 0; |
| 153 | |
| 154 | out_free: |
| 155 | xprt_rdma_bc_destroy(xprt, reqs); |
| 156 | |
Chuck Lever | 124fa17 | 2015-10-24 17:27:51 -0400 | [diff] [blame] | 157 | out_err: |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 158 | pr_err("RPC: %s: setup backchannel transport failed\n", __func__); |
| 159 | return -ENOMEM; |
| 160 | } |
| 161 | |
| 162 | /** |
Chuck Lever | 7656677 | 2015-10-24 17:28:32 -0400 | [diff] [blame] | 163 | * xprt_rdma_bc_up - Create transport endpoint for backchannel service |
| 164 | * @serv: server endpoint |
| 165 | * @net: network namespace |
| 166 | * |
| 167 | * The "xprt" is an implied argument: it supplies the name of the |
| 168 | * backchannel transport class. |
| 169 | * |
| 170 | * Returns zero on success, negative errno on failure |
| 171 | */ |
| 172 | int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net) |
| 173 | { |
| 174 | int ret; |
| 175 | |
| 176 | ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0); |
| 177 | if (ret < 0) |
| 178 | return ret; |
| 179 | return 0; |
| 180 | } |
| 181 | |
| 182 | /** |
Chuck Lever | 6b26cc8 | 2016-05-02 14:40:40 -0400 | [diff] [blame] | 183 | * xprt_rdma_bc_maxpayload - Return maximum backchannel message size |
| 184 | * @xprt: transport |
| 185 | * |
| 186 | * Returns maximum size, in bytes, of a backchannel message |
| 187 | */ |
| 188 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) |
| 189 | { |
| 190 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 191 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
| 192 | size_t maxmsg; |
| 193 | |
| 194 | maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize); |
| 195 | return maxmsg - RPCRDMA_HDRLEN_MIN; |
| 196 | } |
| 197 | |
| 198 | /** |
Chuck Lever | 83128a6 | 2015-10-24 17:27:59 -0400 | [diff] [blame] | 199 | * rpcrdma_bc_marshal_reply - Send backwards direction reply |
| 200 | * @rqst: buffer containing RPC reply data |
| 201 | * |
| 202 | * Returns zero on success. |
| 203 | */ |
| 204 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) |
| 205 | { |
| 206 | struct rpc_xprt *xprt = rqst->rq_xprt; |
| 207 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 208 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 209 | struct rpcrdma_msg *headerp; |
| 210 | size_t rpclen; |
| 211 | |
| 212 | headerp = rdmab_to_msg(req->rl_rdmabuf); |
| 213 | headerp->rm_xid = rqst->rq_xid; |
| 214 | headerp->rm_vers = rpcrdma_version; |
| 215 | headerp->rm_credit = |
| 216 | cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests); |
| 217 | headerp->rm_type = rdma_msg; |
| 218 | headerp->rm_body.rm_chunks[0] = xdr_zero; |
| 219 | headerp->rm_body.rm_chunks[1] = xdr_zero; |
| 220 | headerp->rm_body.rm_chunks[2] = xdr_zero; |
| 221 | |
| 222 | rpclen = rqst->rq_svec[0].iov_len; |
| 223 | |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 224 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG |
Chuck Lever | 83128a6 | 2015-10-24 17:27:59 -0400 | [diff] [blame] | 225 | pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", |
| 226 | __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); |
| 227 | pr_info("RPC: %s: RPC/RDMA: %*ph\n", |
| 228 | __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); |
| 229 | pr_info("RPC: %s: RPC: %*ph\n", |
| 230 | __func__, (int)rpclen, rqst->rq_svec[0].iov_base); |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 231 | #endif |
Chuck Lever | 83128a6 | 2015-10-24 17:27:59 -0400 | [diff] [blame] | 232 | |
| 233 | req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); |
| 234 | req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; |
| 235 | req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); |
| 236 | |
| 237 | req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); |
| 238 | req->rl_send_iov[1].length = rpclen; |
| 239 | req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); |
| 240 | |
| 241 | req->rl_niovs = 2; |
| 242 | return 0; |
| 243 | } |
| 244 | |
| 245 | /** |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 246 | * xprt_rdma_bc_destroy - Release resources for handling backchannel requests |
| 247 | * @xprt: transport associated with these backchannel resources |
| 248 | * @reqs: number of incoming requests to destroy; ignored |
| 249 | */ |
| 250 | void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) |
| 251 | { |
| 252 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 253 | struct rpc_rqst *rqst, *tmp; |
| 254 | |
| 255 | spin_lock_bh(&xprt->bc_pa_lock); |
| 256 | list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { |
| 257 | list_del(&rqst->rq_bc_pa_list); |
| 258 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 259 | |
| 260 | rpcrdma_bc_free_rqst(r_xprt, rqst); |
| 261 | |
| 262 | spin_lock_bh(&xprt->bc_pa_lock); |
| 263 | } |
| 264 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 265 | } |
| 266 | |
| 267 | /** |
| 268 | * xprt_rdma_bc_free_rqst - Release a backchannel rqst |
| 269 | * @rqst: request to release |
| 270 | */ |
| 271 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) |
| 272 | { |
| 273 | struct rpc_xprt *xprt = rqst->rq_xprt; |
| 274 | |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 275 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", |
| 276 | __func__, rqst, rpcr_to_rdmar(rqst)); |
| 277 | |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 278 | smp_mb__before_atomic(); |
| 279 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); |
| 280 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); |
| 281 | smp_mb__after_atomic(); |
| 282 | |
| 283 | spin_lock_bh(&xprt->bc_pa_lock); |
| 284 | list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); |
| 285 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 286 | } |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 287 | |
| 288 | /** |
| 289 | * rpcrdma_bc_receive_call - Handle a backward direction call |
| 290 | * @xprt: transport receiving the call |
| 291 | * @rep: receive buffer containing the call |
| 292 | * |
| 293 | * Called in the RPC reply handler, which runs in a tasklet. |
| 294 | * Be quick about it. |
| 295 | * |
| 296 | * Operational assumptions: |
| 297 | * o Backchannel credits are ignored, just as the NFS server |
| 298 | * forechannel currently does |
| 299 | * o The ULP manages a replay cache (eg, NFSv4.1 sessions). |
| 300 | * No replay detection is done at the transport level |
| 301 | */ |
| 302 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, |
| 303 | struct rpcrdma_rep *rep) |
| 304 | { |
| 305 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
| 306 | struct rpcrdma_msg *headerp; |
| 307 | struct svc_serv *bc_serv; |
| 308 | struct rpcrdma_req *req; |
| 309 | struct rpc_rqst *rqst; |
| 310 | struct xdr_buf *buf; |
| 311 | size_t size; |
| 312 | __be32 *p; |
| 313 | |
| 314 | headerp = rdmab_to_msg(rep->rr_rdmabuf); |
| 315 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG |
| 316 | pr_info("RPC: %s: callback XID %08x, length=%u\n", |
| 317 | __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len); |
| 318 | pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp); |
| 319 | #endif |
| 320 | |
| 321 | /* Sanity check: |
| 322 | * Need at least enough bytes for RPC/RDMA header, as code |
| 323 | * here references the header fields by array offset. Also, |
| 324 | * backward calls are always inline, so ensure there |
| 325 | * are some bytes beyond the RPC/RDMA header. |
| 326 | */ |
| 327 | if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24) |
| 328 | goto out_short; |
| 329 | p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN); |
| 330 | size = rep->rr_len - RPCRDMA_HDRLEN_MIN; |
| 331 | |
| 332 | /* Grab a free bc rqst */ |
| 333 | spin_lock(&xprt->bc_pa_lock); |
| 334 | if (list_empty(&xprt->bc_pa_list)) { |
| 335 | spin_unlock(&xprt->bc_pa_lock); |
| 336 | goto out_overflow; |
| 337 | } |
| 338 | rqst = list_first_entry(&xprt->bc_pa_list, |
| 339 | struct rpc_rqst, rq_bc_pa_list); |
| 340 | list_del(&rqst->rq_bc_pa_list); |
| 341 | spin_unlock(&xprt->bc_pa_lock); |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 342 | dprintk("RPC: %s: using rqst %p\n", __func__, rqst); |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 343 | |
| 344 | /* Prepare rqst */ |
| 345 | rqst->rq_reply_bytes_recvd = 0; |
| 346 | rqst->rq_bytes_sent = 0; |
| 347 | rqst->rq_xid = headerp->rm_xid; |
Chuck Lever | 9f74660 | 2016-02-15 10:23:59 -0500 | [diff] [blame] | 348 | |
| 349 | rqst->rq_private_buf.len = size; |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 350 | set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); |
| 351 | |
| 352 | buf = &rqst->rq_rcv_buf; |
| 353 | memset(buf, 0, sizeof(*buf)); |
| 354 | buf->head[0].iov_base = p; |
| 355 | buf->head[0].iov_len = size; |
| 356 | buf->len = size; |
| 357 | |
| 358 | /* The receive buffer has to be hooked to the rpcrdma_req |
| 359 | * so that it can be reposted after the server is done |
| 360 | * parsing it but just before sending the backward |
| 361 | * direction reply. |
| 362 | */ |
| 363 | req = rpcr_to_rdmar(rqst); |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 364 | dprintk("RPC: %s: attaching rep %p to req %p\n", |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 365 | __func__, rep, req); |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 366 | req->rl_reply = rep; |
| 367 | |
| 368 | /* Defeat the retransmit detection logic in send_request */ |
| 369 | req->rl_connect_cookie = 0; |
| 370 | |
| 371 | /* Queue rqst for ULP's callback service */ |
| 372 | bc_serv = xprt->bc_serv; |
| 373 | spin_lock(&bc_serv->sv_cb_lock); |
| 374 | list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list); |
| 375 | spin_unlock(&bc_serv->sv_cb_lock); |
| 376 | |
| 377 | wake_up(&bc_serv->sv_cb_waitq); |
| 378 | |
| 379 | r_xprt->rx_stats.bcall_count++; |
| 380 | return; |
| 381 | |
| 382 | out_overflow: |
| 383 | pr_warn("RPC/RDMA backchannel overflow\n"); |
| 384 | xprt_disconnect_done(xprt); |
| 385 | /* This receive buffer gets reposted automatically |
| 386 | * when the connection is re-established. |
| 387 | */ |
| 388 | return; |
| 389 | |
| 390 | out_short: |
| 391 | pr_warn("RPC/RDMA short backward direction call\n"); |
| 392 | |
| 393 | if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) |
| 394 | xprt_disconnect_done(xprt); |
| 395 | else |
| 396 | pr_warn("RPC: %s: reposting rep %p\n", |
| 397 | __func__, rep); |
| 398 | } |