Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2015 Oracle. All rights reserved. |
| 3 | * |
| 4 | * Support for backward direction RPCs on RPC/RDMA. |
| 5 | */ |
| 6 | |
| 7 | #include <linux/module.h> |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 8 | #include <linux/sunrpc/xprt.h> |
| 9 | #include <linux/sunrpc/svc.h> |
Chuck Lever | 7656677 | 2015-10-24 17:28:32 -0400 | [diff] [blame] | 10 | #include <linux/sunrpc/svc_xprt.h> |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 11 | |
| 12 | #include "xprt_rdma.h" |
| 13 | |
| 14 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
| 15 | # define RPCDBG_FACILITY RPCDBG_TRANS |
| 16 | #endif |
| 17 | |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 18 | #undef RPCRDMA_BACKCHANNEL_DEBUG |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 19 | |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 20 | static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, |
| 21 | struct rpc_rqst *rqst) |
| 22 | { |
| 23 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
| 24 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 25 | |
| 26 | spin_lock(&buf->rb_reqslock); |
| 27 | list_del(&req->rl_all); |
| 28 | spin_unlock(&buf->rb_reqslock); |
| 29 | |
Chuck Lever | 13650c2 | 2016-09-15 10:56:26 -0400 | [diff] [blame] | 30 | rpcrdma_destroy_req(req); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 31 | |
| 32 | kfree(rqst); |
| 33 | } |
| 34 | |
| 35 | static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, |
| 36 | struct rpc_rqst *rqst) |
| 37 | { |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 38 | struct rpcrdma_regbuf *rb; |
| 39 | struct rpcrdma_req *req; |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 40 | size_t size; |
| 41 | |
| 42 | req = rpcrdma_create_req(r_xprt); |
Dan Carpenter | abfb689 | 2015-11-05 11:39:52 +0300 | [diff] [blame] | 43 | if (IS_ERR(req)) |
| 44 | return PTR_ERR(req); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 45 | req->rl_backchannel = true; |
| 46 | |
Chuck Lever | 13650c2 | 2016-09-15 10:56:26 -0400 | [diff] [blame] | 47 | rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, |
Chuck Lever | 99ef4db | 2016-09-15 10:56:10 -0400 | [diff] [blame] | 48 | DMA_TO_DEVICE, GFP_KERNEL); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 49 | if (IS_ERR(rb)) |
| 50 | goto out_fail; |
| 51 | req->rl_rdmabuf = rb; |
| 52 | |
Chuck Lever | 08cf2ef | 2016-09-15 10:56:02 -0400 | [diff] [blame] | 53 | size = r_xprt->rx_data.inline_rsize; |
Chuck Lever | 13650c2 | 2016-09-15 10:56:26 -0400 | [diff] [blame] | 54 | rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 55 | if (IS_ERR(rb)) |
| 56 | goto out_fail; |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 57 | req->rl_sendbuf = rb; |
Chuck Lever | 62aee0e | 2016-11-29 10:52:08 -0500 | [diff] [blame] | 58 | xdr_buf_init(&rqst->rq_snd_buf, rb->rg_base, |
| 59 | min_t(size_t, size, PAGE_SIZE)); |
Chuck Lever | 5a6d1db | 2016-09-15 10:55:45 -0400 | [diff] [blame] | 60 | rpcrdma_set_xprtdata(rqst, req); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 61 | return 0; |
| 62 | |
| 63 | out_fail: |
| 64 | rpcrdma_bc_free_rqst(r_xprt, rqst); |
| 65 | return -ENOMEM; |
| 66 | } |
| 67 | |
| 68 | /* Allocate and add receive buffers to the rpcrdma_buffer's |
| 69 | * existing list of rep's. These are released when the |
| 70 | * transport is destroyed. |
| 71 | */ |
| 72 | static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, |
| 73 | unsigned int count) |
| 74 | { |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 75 | struct rpcrdma_rep *rep; |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 76 | int rc = 0; |
| 77 | |
| 78 | while (count--) { |
| 79 | rep = rpcrdma_create_rep(r_xprt); |
| 80 | if (IS_ERR(rep)) { |
| 81 | pr_err("RPC: %s: reply buffer alloc failed\n", |
| 82 | __func__); |
| 83 | rc = PTR_ERR(rep); |
| 84 | break; |
| 85 | } |
| 86 | |
Chuck Lever | 9b06688 | 2015-12-16 17:22:06 -0500 | [diff] [blame] | 87 | rpcrdma_recv_buffer_put(rep); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 88 | } |
| 89 | |
| 90 | return rc; |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests |
| 95 | * @xprt: transport associated with these backchannel resources |
| 96 | * @reqs: number of concurrent incoming requests to expect |
| 97 | * |
| 98 | * Returns 0 on success; otherwise a negative errno |
| 99 | */ |
| 100 | int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) |
| 101 | { |
| 102 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 103 | struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; |
| 104 | struct rpc_rqst *rqst; |
| 105 | unsigned int i; |
| 106 | int rc; |
| 107 | |
| 108 | /* The backchannel reply path returns each rpc_rqst to the |
| 109 | * bc_pa_list _after_ the reply is sent. If the server is |
| 110 | * faster than the client, it can send another backward |
| 111 | * direction request before the rpc_rqst is returned to the |
| 112 | * list. The client rejects the request in this case. |
| 113 | * |
| 114 | * Twice as many rpc_rqsts are prepared to ensure there is |
| 115 | * always an rpc_rqst available as soon as a reply is sent. |
| 116 | */ |
Chuck Lever | 124fa17 | 2015-10-24 17:27:51 -0400 | [diff] [blame] | 117 | if (reqs > RPCRDMA_BACKWARD_WRS >> 1) |
| 118 | goto out_err; |
| 119 | |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 120 | for (i = 0; i < (reqs << 1); i++) { |
| 121 | rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); |
| 122 | if (!rqst) { |
| 123 | pr_err("RPC: %s: Failed to create bc rpc_rqst\n", |
| 124 | __func__); |
| 125 | goto out_free; |
| 126 | } |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 127 | dprintk("RPC: %s: new rqst %p\n", __func__, rqst); |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 128 | |
| 129 | rqst->rq_xprt = &r_xprt->rx_xprt; |
| 130 | INIT_LIST_HEAD(&rqst->rq_list); |
| 131 | INIT_LIST_HEAD(&rqst->rq_bc_list); |
| 132 | |
| 133 | if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) |
| 134 | goto out_free; |
| 135 | |
| 136 | spin_lock_bh(&xprt->bc_pa_lock); |
| 137 | list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); |
| 138 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 139 | } |
| 140 | |
| 141 | rc = rpcrdma_bc_setup_reps(r_xprt, reqs); |
| 142 | if (rc) |
| 143 | goto out_free; |
| 144 | |
| 145 | rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); |
| 146 | if (rc) |
| 147 | goto out_free; |
| 148 | |
| 149 | buffer->rb_bc_srv_max_requests = reqs; |
| 150 | request_module("svcrdma"); |
| 151 | |
| 152 | return 0; |
| 153 | |
| 154 | out_free: |
| 155 | xprt_rdma_bc_destroy(xprt, reqs); |
| 156 | |
Chuck Lever | 124fa17 | 2015-10-24 17:27:51 -0400 | [diff] [blame] | 157 | out_err: |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 158 | pr_err("RPC: %s: setup backchannel transport failed\n", __func__); |
| 159 | return -ENOMEM; |
| 160 | } |
| 161 | |
| 162 | /** |
Chuck Lever | 7656677 | 2015-10-24 17:28:32 -0400 | [diff] [blame] | 163 | * xprt_rdma_bc_up - Create transport endpoint for backchannel service |
| 164 | * @serv: server endpoint |
| 165 | * @net: network namespace |
| 166 | * |
| 167 | * The "xprt" is an implied argument: it supplies the name of the |
| 168 | * backchannel transport class. |
| 169 | * |
| 170 | * Returns zero on success, negative errno on failure |
| 171 | */ |
| 172 | int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net) |
| 173 | { |
| 174 | int ret; |
| 175 | |
| 176 | ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0); |
| 177 | if (ret < 0) |
| 178 | return ret; |
| 179 | return 0; |
| 180 | } |
| 181 | |
| 182 | /** |
Chuck Lever | 6b26cc8 | 2016-05-02 14:40:40 -0400 | [diff] [blame] | 183 | * xprt_rdma_bc_maxpayload - Return maximum backchannel message size |
| 184 | * @xprt: transport |
| 185 | * |
| 186 | * Returns maximum size, in bytes, of a backchannel message |
| 187 | */ |
| 188 | size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) |
| 189 | { |
| 190 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 191 | struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; |
| 192 | size_t maxmsg; |
| 193 | |
| 194 | maxmsg = min_t(unsigned int, cdata->inline_rsize, cdata->inline_wsize); |
Chuck Lever | 62aee0e | 2016-11-29 10:52:08 -0500 | [diff] [blame] | 195 | maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE); |
Chuck Lever | 6b26cc8 | 2016-05-02 14:40:40 -0400 | [diff] [blame] | 196 | return maxmsg - RPCRDMA_HDRLEN_MIN; |
| 197 | } |
| 198 | |
| 199 | /** |
Chuck Lever | 83128a6 | 2015-10-24 17:27:59 -0400 | [diff] [blame] | 200 | * rpcrdma_bc_marshal_reply - Send backwards direction reply |
| 201 | * @rqst: buffer containing RPC reply data |
| 202 | * |
| 203 | * Returns zero on success. |
| 204 | */ |
| 205 | int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) |
| 206 | { |
| 207 | struct rpc_xprt *xprt = rqst->rq_xprt; |
| 208 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 209 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 210 | struct rpcrdma_msg *headerp; |
Chuck Lever | 83128a6 | 2015-10-24 17:27:59 -0400 | [diff] [blame] | 211 | |
| 212 | headerp = rdmab_to_msg(req->rl_rdmabuf); |
| 213 | headerp->rm_xid = rqst->rq_xid; |
| 214 | headerp->rm_vers = rpcrdma_version; |
| 215 | headerp->rm_credit = |
| 216 | cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests); |
| 217 | headerp->rm_type = rdma_msg; |
| 218 | headerp->rm_body.rm_chunks[0] = xdr_zero; |
| 219 | headerp->rm_body.rm_chunks[1] = xdr_zero; |
| 220 | headerp->rm_body.rm_chunks[2] = xdr_zero; |
| 221 | |
Chuck Lever | 655fec6 | 2016-09-15 10:57:24 -0400 | [diff] [blame] | 222 | if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN, |
| 223 | &rqst->rq_snd_buf, rpcrdma_noch)) |
| 224 | return -EIO; |
Chuck Lever | 83128a6 | 2015-10-24 17:27:59 -0400 | [diff] [blame] | 225 | return 0; |
| 226 | } |
| 227 | |
| 228 | /** |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 229 | * xprt_rdma_bc_destroy - Release resources for handling backchannel requests |
| 230 | * @xprt: transport associated with these backchannel resources |
| 231 | * @reqs: number of incoming requests to destroy; ignored |
| 232 | */ |
| 233 | void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) |
| 234 | { |
| 235 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 236 | struct rpc_rqst *rqst, *tmp; |
| 237 | |
| 238 | spin_lock_bh(&xprt->bc_pa_lock); |
| 239 | list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { |
| 240 | list_del(&rqst->rq_bc_pa_list); |
| 241 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 242 | |
| 243 | rpcrdma_bc_free_rqst(r_xprt, rqst); |
| 244 | |
| 245 | spin_lock_bh(&xprt->bc_pa_lock); |
| 246 | } |
| 247 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 248 | } |
| 249 | |
| 250 | /** |
| 251 | * xprt_rdma_bc_free_rqst - Release a backchannel rqst |
| 252 | * @rqst: request to release |
| 253 | */ |
| 254 | void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) |
| 255 | { |
| 256 | struct rpc_xprt *xprt = rqst->rq_xprt; |
| 257 | |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 258 | dprintk("RPC: %s: freeing rqst %p (req %p)\n", |
| 259 | __func__, rqst, rpcr_to_rdmar(rqst)); |
| 260 | |
Chuck Lever | f531a5d | 2015-10-24 17:27:43 -0400 | [diff] [blame] | 261 | smp_mb__before_atomic(); |
| 262 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); |
| 263 | clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); |
| 264 | smp_mb__after_atomic(); |
| 265 | |
| 266 | spin_lock_bh(&xprt->bc_pa_lock); |
| 267 | list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); |
| 268 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 269 | } |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 270 | |
| 271 | /** |
| 272 | * rpcrdma_bc_receive_call - Handle a backward direction call |
| 273 | * @xprt: transport receiving the call |
| 274 | * @rep: receive buffer containing the call |
| 275 | * |
| 276 | * Called in the RPC reply handler, which runs in a tasklet. |
| 277 | * Be quick about it. |
| 278 | * |
| 279 | * Operational assumptions: |
| 280 | * o Backchannel credits are ignored, just as the NFS server |
| 281 | * forechannel currently does |
| 282 | * o The ULP manages a replay cache (eg, NFSv4.1 sessions). |
| 283 | * No replay detection is done at the transport level |
| 284 | */ |
| 285 | void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, |
| 286 | struct rpcrdma_rep *rep) |
| 287 | { |
| 288 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
| 289 | struct rpcrdma_msg *headerp; |
| 290 | struct svc_serv *bc_serv; |
| 291 | struct rpcrdma_req *req; |
| 292 | struct rpc_rqst *rqst; |
| 293 | struct xdr_buf *buf; |
| 294 | size_t size; |
| 295 | __be32 *p; |
| 296 | |
| 297 | headerp = rdmab_to_msg(rep->rr_rdmabuf); |
| 298 | #ifdef RPCRDMA_BACKCHANNEL_DEBUG |
| 299 | pr_info("RPC: %s: callback XID %08x, length=%u\n", |
| 300 | __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len); |
| 301 | pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp); |
| 302 | #endif |
| 303 | |
| 304 | /* Sanity check: |
| 305 | * Need at least enough bytes for RPC/RDMA header, as code |
| 306 | * here references the header fields by array offset. Also, |
| 307 | * backward calls are always inline, so ensure there |
| 308 | * are some bytes beyond the RPC/RDMA header. |
| 309 | */ |
| 310 | if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24) |
| 311 | goto out_short; |
| 312 | p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN); |
| 313 | size = rep->rr_len - RPCRDMA_HDRLEN_MIN; |
| 314 | |
| 315 | /* Grab a free bc rqst */ |
| 316 | spin_lock(&xprt->bc_pa_lock); |
| 317 | if (list_empty(&xprt->bc_pa_list)) { |
| 318 | spin_unlock(&xprt->bc_pa_lock); |
| 319 | goto out_overflow; |
| 320 | } |
| 321 | rqst = list_first_entry(&xprt->bc_pa_list, |
| 322 | struct rpc_rqst, rq_bc_pa_list); |
| 323 | list_del(&rqst->rq_bc_pa_list); |
| 324 | spin_unlock(&xprt->bc_pa_lock); |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 325 | dprintk("RPC: %s: using rqst %p\n", __func__, rqst); |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 326 | |
| 327 | /* Prepare rqst */ |
| 328 | rqst->rq_reply_bytes_recvd = 0; |
| 329 | rqst->rq_bytes_sent = 0; |
| 330 | rqst->rq_xid = headerp->rm_xid; |
Chuck Lever | 9f74660 | 2016-02-15 10:23:59 -0500 | [diff] [blame] | 331 | |
| 332 | rqst->rq_private_buf.len = size; |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 333 | set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); |
| 334 | |
| 335 | buf = &rqst->rq_rcv_buf; |
| 336 | memset(buf, 0, sizeof(*buf)); |
| 337 | buf->head[0].iov_base = p; |
| 338 | buf->head[0].iov_len = size; |
| 339 | buf->len = size; |
| 340 | |
| 341 | /* The receive buffer has to be hooked to the rpcrdma_req |
| 342 | * so that it can be reposted after the server is done |
| 343 | * parsing it but just before sending the backward |
| 344 | * direction reply. |
| 345 | */ |
| 346 | req = rpcr_to_rdmar(rqst); |
Chuck Lever | c8bbe0c | 2015-12-16 17:22:23 -0500 | [diff] [blame] | 347 | dprintk("RPC: %s: attaching rep %p to req %p\n", |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 348 | __func__, rep, req); |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 349 | req->rl_reply = rep; |
| 350 | |
| 351 | /* Defeat the retransmit detection logic in send_request */ |
| 352 | req->rl_connect_cookie = 0; |
| 353 | |
| 354 | /* Queue rqst for ULP's callback service */ |
| 355 | bc_serv = xprt->bc_serv; |
| 356 | spin_lock(&bc_serv->sv_cb_lock); |
| 357 | list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list); |
| 358 | spin_unlock(&bc_serv->sv_cb_lock); |
| 359 | |
| 360 | wake_up(&bc_serv->sv_cb_waitq); |
| 361 | |
| 362 | r_xprt->rx_stats.bcall_count++; |
| 363 | return; |
| 364 | |
| 365 | out_overflow: |
| 366 | pr_warn("RPC/RDMA backchannel overflow\n"); |
| 367 | xprt_disconnect_done(xprt); |
| 368 | /* This receive buffer gets reposted automatically |
| 369 | * when the connection is re-established. |
| 370 | */ |
| 371 | return; |
| 372 | |
| 373 | out_short: |
| 374 | pr_warn("RPC/RDMA short backward direction call\n"); |
| 375 | |
Chuck Lever | b157380 | 2016-09-15 10:56:35 -0400 | [diff] [blame] | 376 | if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) |
Chuck Lever | 63cae47 | 2015-10-24 17:28:08 -0400 | [diff] [blame] | 377 | xprt_disconnect_done(xprt); |
| 378 | else |
| 379 | pr_warn("RPC: %s: reposting rep %p\n", |
| 380 | __func__, rep); |
| 381 | } |