blob: 71a071aaf0ab686a415d1a6ac82b754f359880d7 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever65866f82014-05-28 10:33:59 -040053#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040054
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040055#include "xprt_rdma.h"
56
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040057/*
58 * Globals/Macros
59 */
60
Jeff Laytonf895b252014-11-17 16:58:04 -050061#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040062# define RPCDBG_FACILITY RPCDBG_TRANS
63#endif
64
Chuck Lever9f9d8022014-07-29 17:24:45 -040065static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050066static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040067
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040068/*
69 * internal functions
70 */
71
72/*
73 * handle replies in tasklet context, using a single, global list
74 * rdma tasklet function -- just turn around and call the func
75 * for all replies on the list
76 */
77
78static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
79static LIST_HEAD(rpcrdma_tasklets_g);
80
81static void
82rpcrdma_run_tasklet(unsigned long data)
83{
84 struct rpcrdma_rep *rep;
85 void (*func)(struct rpcrdma_rep *);
86 unsigned long flags;
87
88 data = data;
89 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
90 while (!list_empty(&rpcrdma_tasklets_g)) {
91 rep = list_entry(rpcrdma_tasklets_g.next,
92 struct rpcrdma_rep, rr_list);
93 list_del(&rep->rr_list);
94 func = rep->rr_func;
95 rep->rr_func = NULL;
96 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
97
98 if (func)
99 func(rep);
100 else
101 rpcrdma_recv_buffer_put(rep);
102
103 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
104 }
105 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
106}
107
108static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
109
Chuck Lever7ff11de2014-11-08 20:15:01 -0500110static const char * const async_event[] = {
111 "CQ error",
112 "QP fatal error",
113 "QP request error",
114 "QP access error",
115 "communication established",
116 "send queue drained",
117 "path migration successful",
118 "path mig error",
119 "device fatal error",
120 "port active",
121 "port error",
122 "LID change",
123 "P_key change",
124 "SM change",
125 "SRQ error",
126 "SRQ limit reached",
127 "last WQE reached",
128 "client reregister",
129 "GID change",
130};
131
132#define ASYNC_MSG(status) \
133 ((status) < ARRAY_SIZE(async_event) ? \
134 async_event[(status)] : "unknown async error")
135
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400136static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500137rpcrdma_schedule_tasklet(struct list_head *sched_list)
138{
139 unsigned long flags;
140
141 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
142 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
143 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
144 tasklet_schedule(&rpcrdma_tasklet_g);
145}
146
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147static void
148rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
149{
150 struct rpcrdma_ep *ep = context;
151
Chuck Lever7ff11de2014-11-08 20:15:01 -0500152 pr_err("RPC: %s: %s on device %s ep %p\n",
153 __func__, ASYNC_MSG(event->event),
154 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400155 if (ep->rep_connected == 1) {
156 ep->rep_connected = -EIO;
157 ep->rep_func(ep);
158 wake_up_all(&ep->rep_connect_wait);
159 }
160}
161
162static void
163rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
164{
165 struct rpcrdma_ep *ep = context;
166
Chuck Lever7ff11de2014-11-08 20:15:01 -0500167 pr_err("RPC: %s: %s on device %s ep %p\n",
168 __func__, ASYNC_MSG(event->event),
169 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400170 if (ep->rep_connected == 1) {
171 ep->rep_connected = -EIO;
172 ep->rep_func(ep);
173 wake_up_all(&ep->rep_connect_wait);
174 }
175}
176
Chuck Lever85024272015-01-21 11:02:04 -0500177static const char * const wc_status[] = {
178 "success",
179 "local length error",
180 "local QP operation error",
181 "local EE context operation error",
182 "local protection error",
183 "WR flushed",
184 "memory management operation error",
185 "bad response error",
186 "local access error",
187 "remote invalid request error",
188 "remote access error",
189 "remote operation error",
190 "transport retry counter exceeded",
191 "RNR retrycounter exceeded",
192 "local RDD violation error",
193 "remove invalid RD request",
194 "operation aborted",
195 "invalid EE context number",
196 "invalid EE context state",
197 "fatal error",
198 "response timeout error",
199 "general error",
200};
201
202#define COMPLETION_MSG(status) \
203 ((status) < ARRAY_SIZE(wc_status) ? \
204 wc_status[(status)] : "unexpected completion error")
205
Chuck Leverfc664482014-05-28 10:33:25 -0400206static void
207rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400208{
Chuck Lever85024272015-01-21 11:02:04 -0500209 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400210 return;
Chuck Lever85024272015-01-21 11:02:04 -0500211
212 /* WARNING: Only wr_id and status are reliable at this point */
213 if (wc->wr_id == 0ULL) {
214 if (wc->status != IB_WC_WR_FLUSH_ERR)
215 pr_err("RPC: %s: SEND: %s\n",
216 __func__, COMPLETION_MSG(wc->status));
217 } else {
218 struct rpcrdma_mw *r;
219
220 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
221 r->r.frmr.fr_state = FRMR_IS_STALE;
222 pr_err("RPC: %s: frmr %p (stale): %s\n",
223 __func__, r, COMPLETION_MSG(wc->status));
224 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400225}
226
Chuck Leverfc664482014-05-28 10:33:25 -0400227static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400228rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400229{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400230 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400231 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400232
Chuck Lever8301a2c2014-05-28 10:33:51 -0400233 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400234 do {
235 wcs = ep->rep_send_wcs;
236
237 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
238 if (rc <= 0)
239 return rc;
240
241 count = rc;
242 while (count-- > 0)
243 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400244 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400245 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400246}
247
248/*
Chuck Leverfc664482014-05-28 10:33:25 -0400249 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400250 *
Chuck Leverfc664482014-05-28 10:33:25 -0400251 * Send events are typically suppressed and thus do not result
252 * in an upcall. Occasionally one is signaled, however. This
253 * prevents the provider's completion queue from wrapping and
254 * losing a completion.
255 */
256static void
257rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
258{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400259 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400260 int rc;
261
Chuck Lever1c00dd02014-05-28 10:33:42 -0400262 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400263 if (rc) {
264 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
265 __func__, rc);
266 return;
267 }
268
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400269 rc = ib_req_notify_cq(cq,
270 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
271 if (rc == 0)
272 return;
273 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400274 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
275 __func__, rc);
276 return;
277 }
278
Chuck Lever1c00dd02014-05-28 10:33:42 -0400279 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400280}
281
282static void
Chuck Leverbb961932014-07-29 17:25:46 -0400283rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400284{
285 struct rpcrdma_rep *rep =
286 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
287
Chuck Lever85024272015-01-21 11:02:04 -0500288 /* WARNING: Only wr_id and status are reliable at this point */
289 if (wc->status != IB_WC_SUCCESS)
290 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400291
Chuck Lever85024272015-01-21 11:02:04 -0500292 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400293 if (wc->opcode != IB_WC_RECV)
294 return;
295
Chuck Lever85024272015-01-21 11:02:04 -0500296 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
297 __func__, rep, wc->byte_len);
298
Chuck Leverfc664482014-05-28 10:33:25 -0400299 rep->rr_len = wc->byte_len;
300 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
301 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
Chuck Levereba8ff62015-01-21 11:03:02 -0500302 prefetch(rep->rr_base);
Chuck Leverfc664482014-05-28 10:33:25 -0400303
304out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400305 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500306 return;
307out_fail:
308 if (wc->status != IB_WC_WR_FLUSH_ERR)
309 pr_err("RPC: %s: rep %p: %s\n",
310 __func__, rep, COMPLETION_MSG(wc->status));
311 rep->rr_len = ~0U;
312 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400313}
314
315static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400316rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400317{
Chuck Leverbb961932014-07-29 17:25:46 -0400318 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400319 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400320 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400321
Chuck Leverbb961932014-07-29 17:25:46 -0400322 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400323 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400324 do {
325 wcs = ep->rep_recv_wcs;
326
327 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
328 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400329 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400330
331 count = rc;
332 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400333 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400334 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400335 rc = 0;
336
337out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500338 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400339 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400340}
341
342/*
343 * Handle receive completions.
344 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400345 * It is reentrant but processes single events in order to maintain
346 * ordering of receives to keep server credits.
347 *
348 * It is the responsibility of the scheduled tasklet to return
349 * recv buffers to the pool. NOTE: this affects synchronization of
350 * connection shutdown. That is, the structures required for
351 * the completion of the reply handler must remain intact until
352 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 */
354static void
Chuck Leverfc664482014-05-28 10:33:25 -0400355rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400356{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400357 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400358 int rc;
359
Chuck Lever1c00dd02014-05-28 10:33:42 -0400360 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400361 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400362 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400363 __func__, rc);
364 return;
365 }
366
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400367 rc = ib_req_notify_cq(cq,
368 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
369 if (rc == 0)
370 return;
371 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400372 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
373 __func__, rc);
374 return;
375 }
376
Chuck Lever1c00dd02014-05-28 10:33:42 -0400377 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400378}
379
Chuck Levera7bc2112014-07-29 17:23:52 -0400380static void
381rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
382{
Chuck Lever5c166be2014-11-08 20:14:45 -0500383 struct ib_wc wc;
384 LIST_HEAD(sched_list);
385
386 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
387 rpcrdma_recvcq_process_wc(&wc, &sched_list);
388 if (!list_empty(&sched_list))
389 rpcrdma_schedule_tasklet(&sched_list);
390 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
391 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400392}
393
Jeff Laytonf895b252014-11-17 16:58:04 -0500394#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400395static const char * const conn[] = {
396 "address resolved",
397 "address error",
398 "route resolved",
399 "route error",
400 "connect request",
401 "connect response",
402 "connect error",
403 "unreachable",
404 "rejected",
405 "established",
406 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400407 "device removal",
408 "multicast join",
409 "multicast error",
410 "address change",
411 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400412};
Chuck Lever8079fb72014-07-29 17:26:12 -0400413
414#define CONNECTION_MSG(status) \
415 ((status) < ARRAY_SIZE(conn) ? \
416 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400417#endif
418
419static int
420rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
421{
422 struct rpcrdma_xprt *xprt = id->context;
423 struct rpcrdma_ia *ia = &xprt->rx_ia;
424 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500425#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400426 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800427#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400428 struct ib_qp_attr attr;
429 struct ib_qp_init_attr iattr;
430 int connstate = 0;
431
432 switch (event->event) {
433 case RDMA_CM_EVENT_ADDR_RESOLVED:
434 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400435 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400436 complete(&ia->ri_done);
437 break;
438 case RDMA_CM_EVENT_ADDR_ERROR:
439 ia->ri_async_rc = -EHOSTUNREACH;
440 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
441 __func__, ep);
442 complete(&ia->ri_done);
443 break;
444 case RDMA_CM_EVENT_ROUTE_ERROR:
445 ia->ri_async_rc = -ENETUNREACH;
446 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
447 __func__, ep);
448 complete(&ia->ri_done);
449 break;
450 case RDMA_CM_EVENT_ESTABLISHED:
451 connstate = 1;
452 ib_query_qp(ia->ri_id->qp, &attr,
453 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
454 &iattr);
455 dprintk("RPC: %s: %d responder resources"
456 " (%d initiator)\n",
457 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
458 goto connected;
459 case RDMA_CM_EVENT_CONNECT_ERROR:
460 connstate = -ENOTCONN;
461 goto connected;
462 case RDMA_CM_EVENT_UNREACHABLE:
463 connstate = -ENETDOWN;
464 goto connected;
465 case RDMA_CM_EVENT_REJECTED:
466 connstate = -ECONNREFUSED;
467 goto connected;
468 case RDMA_CM_EVENT_DISCONNECTED:
469 connstate = -ECONNABORTED;
470 goto connected;
471 case RDMA_CM_EVENT_DEVICE_REMOVAL:
472 connstate = -ENODEV;
473connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400474 dprintk("RPC: %s: %sconnected\n",
475 __func__, connstate > 0 ? "" : "dis");
476 ep->rep_connected = connstate;
477 ep->rep_func(ep);
478 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400479 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400480 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400481 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
482 __func__, &addr->sin_addr.s_addr,
483 ntohs(addr->sin_port), ep,
484 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400485 break;
486 }
487
Jeff Laytonf895b252014-11-17 16:58:04 -0500488#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400489 if (connstate == 1) {
490 int ird = attr.max_dest_rd_atomic;
491 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700492 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400493 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700494 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400495 ntohs(addr->sin_port),
496 ia->ri_id->device->name,
497 ia->ri_memreg_strategy,
498 xprt->rx_buf.rb_max_requests,
499 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
500 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700501 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
502 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400503 ntohs(addr->sin_port),
504 connstate);
505 }
506#endif
507
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400508 return 0;
509}
510
511static struct rdma_cm_id *
512rpcrdma_create_id(struct rpcrdma_xprt *xprt,
513 struct rpcrdma_ia *ia, struct sockaddr *addr)
514{
515 struct rdma_cm_id *id;
516 int rc;
517
Tom Talpey1a954052008-10-09 15:01:31 -0400518 init_completion(&ia->ri_done);
519
Sean Heftyb26f9b92010-04-01 17:08:41 +0000520 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400521 if (IS_ERR(id)) {
522 rc = PTR_ERR(id);
523 dprintk("RPC: %s: rdma_create_id() failed %i\n",
524 __func__, rc);
525 return id;
526 }
527
Tom Talpey5675add2008-10-09 15:01:41 -0400528 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400529 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
530 if (rc) {
531 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
532 __func__, rc);
533 goto out;
534 }
Tom Talpey5675add2008-10-09 15:01:41 -0400535 wait_for_completion_interruptible_timeout(&ia->ri_done,
536 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400537 rc = ia->ri_async_rc;
538 if (rc)
539 goto out;
540
Tom Talpey5675add2008-10-09 15:01:41 -0400541 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400542 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
543 if (rc) {
544 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
545 __func__, rc);
546 goto out;
547 }
Tom Talpey5675add2008-10-09 15:01:41 -0400548 wait_for_completion_interruptible_timeout(&ia->ri_done,
549 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400550 rc = ia->ri_async_rc;
551 if (rc)
552 goto out;
553
554 return id;
555
556out:
557 rdma_destroy_id(id);
558 return ERR_PTR(rc);
559}
560
561/*
562 * Drain any cq, prior to teardown.
563 */
564static void
565rpcrdma_clean_cq(struct ib_cq *cq)
566{
567 struct ib_wc wc;
568 int count = 0;
569
570 while (1 == ib_poll_cq(cq, 1, &wc))
571 ++count;
572
573 if (count)
574 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
575 __func__, count, wc.opcode);
576}
577
578/*
579 * Exported functions.
580 */
581
582/*
583 * Open and initialize an Interface Adapter.
584 * o initializes fields of struct rpcrdma_ia, including
585 * interface and provider attributes and protection zone.
586 */
587int
588rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
589{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400590 int rc, mem_priv;
591 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400592 struct rpcrdma_ia *ia = &xprt->rx_ia;
593
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400594 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
595 if (IS_ERR(ia->ri_id)) {
596 rc = PTR_ERR(ia->ri_id);
597 goto out1;
598 }
599
600 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
601 if (IS_ERR(ia->ri_pd)) {
602 rc = PTR_ERR(ia->ri_pd);
603 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
604 __func__, rc);
605 goto out2;
606 }
607
608 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400609 * Query the device to determine if the requested memory
610 * registration strategy is supported. If it isn't, set the
611 * strategy to a globally supported model.
612 */
613 rc = ib_query_device(ia->ri_id->device, &devattr);
614 if (rc) {
615 dprintk("RPC: %s: ib_query_device failed %d\n",
616 __func__, rc);
617 goto out2;
618 }
619
620 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
621 ia->ri_have_dma_lkey = 1;
622 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
623 }
624
Chuck Leverf10eafd2014-05-28 10:32:51 -0400625 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400626 /* Requires both frmr reg and local dma lkey */
627 if ((devattr.device_cap_flags &
628 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
629 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400630 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400631 "not supported by HCA\n", __func__);
632 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400633 } else {
634 /* Mind the ia limit on FRMR page list depth */
635 ia->ri_max_frmr_depth = min_t(unsigned int,
636 RPCRDMA_MAX_DATA_SEGS,
637 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400638 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400639 }
640 if (memreg == RPCRDMA_MTHCAFMR) {
641 if (!ia->ri_id->device->alloc_fmr) {
642 dprintk("RPC: %s: MTHCAFMR registration "
643 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400644 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400645 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400646 }
647
648 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400649 * Optionally obtain an underlying physical identity mapping in
650 * order to do a memory window-based bind. This base registration
651 * is protected from remote access - that is enabled only by binding
652 * for the specific bytes targeted during each RPC operation, and
653 * revoked after the corresponding completion similar to a storage
654 * adapter.
655 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400656 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400657 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400658 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400659 case RPCRDMA_ALLPHYSICAL:
660 mem_priv = IB_ACCESS_LOCAL_WRITE |
661 IB_ACCESS_REMOTE_WRITE |
662 IB_ACCESS_REMOTE_READ;
663 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400664 case RPCRDMA_MTHCAFMR:
665 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400666 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400667 mem_priv = IB_ACCESS_LOCAL_WRITE;
668 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400669 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
670 if (IS_ERR(ia->ri_bind_mem)) {
671 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400672 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400673 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400674 rc = -ENOMEM;
675 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400676 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400677 break;
678 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400679 printk(KERN_ERR "RPC: Unsupported memory "
680 "registration mode: %d\n", memreg);
681 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400682 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400683 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400684 dprintk("RPC: %s: memory registration strategy is %d\n",
685 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400686
687 /* Else will do memory reg/dereg for each chunk */
688 ia->ri_memreg_strategy = memreg;
689
Chuck Lever73806c82014-07-29 17:23:25 -0400690 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400691 return 0;
692out2:
693 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400694 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400695out1:
696 return rc;
697}
698
699/*
700 * Clean up/close an IA.
701 * o if event handles and PD have been initialized, free them.
702 * o close the IA
703 */
704void
705rpcrdma_ia_close(struct rpcrdma_ia *ia)
706{
707 int rc;
708
709 dprintk("RPC: %s: entering\n", __func__);
710 if (ia->ri_bind_mem != NULL) {
711 rc = ib_dereg_mr(ia->ri_bind_mem);
712 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
713 __func__, rc);
714 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400715 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
716 if (ia->ri_id->qp)
717 rdma_destroy_qp(ia->ri_id);
718 rdma_destroy_id(ia->ri_id);
719 ia->ri_id = NULL;
720 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400721 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
722 rc = ib_dealloc_pd(ia->ri_pd);
723 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
724 __func__, rc);
725 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400726}
727
728/*
729 * Create unconnected endpoint.
730 */
731int
732rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
733 struct rpcrdma_create_data_internal *cdata)
734{
735 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400736 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400737 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400738
739 rc = ib_query_device(ia->ri_id->device, &devattr);
740 if (rc) {
741 dprintk("RPC: %s: ib_query_device failed %d\n",
742 __func__, rc);
743 return rc;
744 }
745
746 /* check provider's send/recv wr limits */
747 if (cdata->max_requests > devattr.max_qp_wr)
748 cdata->max_requests = devattr.max_qp_wr;
749
750 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
751 ep->rep_attr.qp_context = ep;
752 /* send_cq and recv_cq initialized below */
753 ep->rep_attr.srq = NULL;
754 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
755 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400756 case RPCRDMA_FRMR: {
757 int depth = 7;
758
Tom Tucker15cdc6442010-08-11 12:47:24 -0400759 /* Add room for frmr register and invalidate WRs.
760 * 1. FRMR reg WR for head
761 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400762 * 3. N FRMR reg WRs for pagelist
763 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400764 * 5. FRMR reg WR for tail
765 * 6. FRMR invalidate WR for tail
766 * 7. The RDMA_SEND WR
767 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400768
769 /* Calculate N if the device max FRMR depth is smaller than
770 * RPCRDMA_MAX_DATA_SEGS.
771 */
772 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
773 int delta = RPCRDMA_MAX_DATA_SEGS -
774 ia->ri_max_frmr_depth;
775
776 do {
777 depth += 2; /* FRMR reg + invalidate */
778 delta -= ia->ri_max_frmr_depth;
779 } while (delta > 0);
780
781 }
782 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400783 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400784 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400785 if (!cdata->max_requests)
786 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400787 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
788 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400789 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400790 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400791 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400792 default:
793 break;
794 }
795 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
796 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
797 ep->rep_attr.cap.max_recv_sge = 1;
798 ep->rep_attr.cap.max_inline_data = 0;
799 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
800 ep->rep_attr.qp_type = IB_QPT_RC;
801 ep->rep_attr.port_num = ~0;
802
803 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
804 "iovs: send %d recv %d\n",
805 __func__,
806 ep->rep_attr.cap.max_send_wr,
807 ep->rep_attr.cap.max_recv_wr,
808 ep->rep_attr.cap.max_send_sge,
809 ep->rep_attr.cap.max_recv_sge);
810
811 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400812 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500813 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
814 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
815 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400816 ep->rep_cqinit = 0;
817 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400818 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400819 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400820
Chuck Leverfc664482014-05-28 10:33:25 -0400821 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400822 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400823 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400824 if (IS_ERR(sendcq)) {
825 rc = PTR_ERR(sendcq);
826 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400827 __func__, rc);
828 goto out1;
829 }
830
Chuck Leverfc664482014-05-28 10:33:25 -0400831 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400832 if (rc) {
833 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
834 __func__, rc);
835 goto out2;
836 }
837
Chuck Leverfc664482014-05-28 10:33:25 -0400838 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400839 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400840 ep->rep_attr.cap.max_recv_wr + 1, 0);
841 if (IS_ERR(recvcq)) {
842 rc = PTR_ERR(recvcq);
843 dprintk("RPC: %s: failed to create recv CQ: %i\n",
844 __func__, rc);
845 goto out2;
846 }
847
848 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
849 if (rc) {
850 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
851 __func__, rc);
852 ib_destroy_cq(recvcq);
853 goto out2;
854 }
855
856 ep->rep_attr.send_cq = sendcq;
857 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400858
859 /* Initialize cma parameters */
860
861 /* RPC/RDMA does not use private data */
862 ep->rep_remote_cma.private_data = NULL;
863 ep->rep_remote_cma.private_data_len = 0;
864
865 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400866 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400867 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400868 ep->rep_remote_cma.responder_resources = 32;
869 else
870 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400871
872 ep->rep_remote_cma.retry_count = 7;
873 ep->rep_remote_cma.flow_control = 0;
874 ep->rep_remote_cma.rnr_retry_count = 0;
875
876 return 0;
877
878out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400879 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400880 if (err)
881 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
882 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400883out1:
884 return rc;
885}
886
887/*
888 * rpcrdma_ep_destroy
889 *
890 * Disconnect and destroy endpoint. After this, the only
891 * valid operations on the ep are to free it (if dynamically
892 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400893 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400894void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400895rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
896{
897 int rc;
898
899 dprintk("RPC: %s: entering, connected is %d\n",
900 __func__, ep->rep_connected);
901
Chuck Lever254f91e2014-05-28 10:32:17 -0400902 cancel_delayed_work_sync(&ep->rep_connect_worker);
903
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400904 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400905 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400906 rdma_destroy_qp(ia->ri_id);
907 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400908 }
909
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400910 /* padding - could be done in rpcrdma_buffer_destroy... */
911 if (ep->rep_pad_mr) {
912 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
913 ep->rep_pad_mr = NULL;
914 }
915
Chuck Leverfc664482014-05-28 10:33:25 -0400916 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
917 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
918 if (rc)
919 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
920 __func__, rc);
921
922 rpcrdma_clean_cq(ep->rep_attr.send_cq);
923 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400924 if (rc)
925 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
926 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400927}
928
929/*
930 * Connect unconnected endpoint.
931 */
932int
933rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
934{
Chuck Lever73806c82014-07-29 17:23:25 -0400935 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400936 int rc = 0;
937 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400938
Tom Talpeyc0555512008-10-10 11:32:45 -0400939 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400940 struct rpcrdma_xprt *xprt;
941retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400942 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400943
944 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400945 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400946
Chuck Lever467c9672014-11-08 20:14:29 -0500947 switch (ia->ri_memreg_strategy) {
948 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400949 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500950 break;
951 case RPCRDMA_MTHCAFMR:
952 rpcrdma_reset_fmrs(ia);
953 break;
954 case RPCRDMA_ALLPHYSICAL:
955 break;
956 default:
957 rc = -EIO;
958 goto out;
959 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400960
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400961 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
962 id = rpcrdma_create_id(xprt, ia,
963 (struct sockaddr *)&xprt->rx_data.addr);
964 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400965 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400966 goto out;
967 }
968 /* TEMP TEMP TEMP - fail if new device:
969 * Deregister/remarshal *all* requests!
970 * Close and recreate adapter, pd, etc!
971 * Re-determine all attributes still sane!
972 * More stuff I haven't thought of!
973 * Rrrgh!
974 */
975 if (ia->ri_id->device != id->device) {
976 printk("RPC: %s: can't reconnect on "
977 "different device!\n", __func__);
978 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400979 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400980 goto out;
981 }
982 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400983 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
984 if (rc) {
985 dprintk("RPC: %s: rdma_create_qp failed %i\n",
986 __func__, rc);
987 rdma_destroy_id(id);
988 rc = -ENETUNREACH;
989 goto out;
990 }
Chuck Lever73806c82014-07-29 17:23:25 -0400991
992 write_lock(&ia->ri_qplock);
993 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400994 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400995 write_unlock(&ia->ri_qplock);
996
997 rdma_destroy_qp(old);
998 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400999 } else {
1000 dprintk("RPC: %s: connecting...\n", __func__);
1001 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
1002 if (rc) {
1003 dprintk("RPC: %s: rdma_create_qp failed %i\n",
1004 __func__, rc);
1005 /* do not update ep->rep_connected */
1006 return -ENETUNREACH;
1007 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001008 }
1009
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001010 ep->rep_connected = 0;
1011
1012 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
1013 if (rc) {
1014 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1015 __func__, rc);
1016 goto out;
1017 }
1018
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001019 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1020
1021 /*
1022 * Check state. A non-peer reject indicates no listener
1023 * (ECONNREFUSED), which may be a transient state. All
1024 * others indicate a transport condition which has already
1025 * undergone a best-effort.
1026 */
Joe Perchesf64f9e72009-11-29 16:55:45 -08001027 if (ep->rep_connected == -ECONNREFUSED &&
1028 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001029 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1030 goto retry;
1031 }
1032 if (ep->rep_connected <= 0) {
1033 /* Sometimes, the only way to reliably connect to remote
1034 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001035 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1036 (ep->rep_remote_cma.responder_resources == 0 ||
1037 ep->rep_remote_cma.initiator_depth !=
1038 ep->rep_remote_cma.responder_resources)) {
1039 if (ep->rep_remote_cma.responder_resources == 0)
1040 ep->rep_remote_cma.responder_resources = 1;
1041 ep->rep_remote_cma.initiator_depth =
1042 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001043 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001044 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001045 rc = ep->rep_connected;
1046 } else {
1047 dprintk("RPC: %s: connected\n", __func__);
1048 }
1049
1050out:
1051 if (rc)
1052 ep->rep_connected = rc;
1053 return rc;
1054}
1055
1056/*
1057 * rpcrdma_ep_disconnect
1058 *
1059 * This is separate from destroy to facilitate the ability
1060 * to reconnect without recreating the endpoint.
1061 *
1062 * This call is not reentrant, and must not be made in parallel
1063 * on the same endpoint.
1064 */
Chuck Lever282191c2014-07-29 17:25:55 -04001065void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001066rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1067{
1068 int rc;
1069
Chuck Levera7bc2112014-07-29 17:23:52 -04001070 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001071 rc = rdma_disconnect(ia->ri_id);
1072 if (!rc) {
1073 /* returns without wait if not connected */
1074 wait_event_interruptible(ep->rep_connect_wait,
1075 ep->rep_connected != 1);
1076 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1077 (ep->rep_connected == 1) ? "still " : "dis");
1078 } else {
1079 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1080 ep->rep_connected = rc;
1081 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001082}
1083
Chuck Lever2e845222014-07-29 17:25:38 -04001084static int
1085rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1086{
1087 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1088 struct ib_fmr_attr fmr_attr = {
1089 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1090 .max_maps = 1,
1091 .page_shift = PAGE_SHIFT
1092 };
1093 struct rpcrdma_mw *r;
1094 int i, rc;
1095
1096 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1097 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1098
1099 while (i--) {
1100 r = kzalloc(sizeof(*r), GFP_KERNEL);
1101 if (r == NULL)
1102 return -ENOMEM;
1103
1104 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1105 if (IS_ERR(r->r.fmr)) {
1106 rc = PTR_ERR(r->r.fmr);
1107 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1108 __func__, rc);
1109 goto out_free;
1110 }
1111
1112 list_add(&r->mw_list, &buf->rb_mws);
1113 list_add(&r->mw_all, &buf->rb_all);
1114 }
1115 return 0;
1116
1117out_free:
1118 kfree(r);
1119 return rc;
1120}
1121
1122static int
1123rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1124{
1125 struct rpcrdma_frmr *f;
1126 struct rpcrdma_mw *r;
1127 int i, rc;
1128
1129 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1130 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1131
1132 while (i--) {
1133 r = kzalloc(sizeof(*r), GFP_KERNEL);
1134 if (r == NULL)
1135 return -ENOMEM;
1136 f = &r->r.frmr;
1137
1138 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1139 ia->ri_max_frmr_depth);
1140 if (IS_ERR(f->fr_mr)) {
1141 rc = PTR_ERR(f->fr_mr);
1142 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1143 "failed %i\n", __func__, rc);
1144 goto out_free;
1145 }
1146
1147 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1148 ia->ri_max_frmr_depth);
1149 if (IS_ERR(f->fr_pgl)) {
1150 rc = PTR_ERR(f->fr_pgl);
1151 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1152 "failed %i\n", __func__, rc);
1153
1154 ib_dereg_mr(f->fr_mr);
1155 goto out_free;
1156 }
1157
1158 list_add(&r->mw_list, &buf->rb_mws);
1159 list_add(&r->mw_all, &buf->rb_all);
1160 }
1161
1162 return 0;
1163
1164out_free:
1165 kfree(r);
1166 return rc;
1167}
1168
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001169int
1170rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1171 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1172{
1173 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001174 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001175 int i, rc;
1176
1177 buf->rb_max_requests = cdata->max_requests;
1178 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001179
1180 /* Need to allocate:
1181 * 1. arrays for send and recv pointers
1182 * 2. arrays of struct rpcrdma_req to fill in pointers
1183 * 3. array of struct rpcrdma_rep for replies
1184 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001185 * Send/recv buffers in req/rep need to be registered
1186 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001187 len = buf->rb_max_requests *
1188 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1189 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001190
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001191 p = kzalloc(len, GFP_KERNEL);
1192 if (p == NULL) {
1193 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1194 __func__, len);
1195 rc = -ENOMEM;
1196 goto out;
1197 }
1198 buf->rb_pool = p; /* for freeing it later */
1199
1200 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1201 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1202 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1203 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1204
1205 /*
1206 * Register the zeroed pad buffer, if any.
1207 */
1208 if (cdata->padding) {
1209 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1210 &ep->rep_pad_mr, &ep->rep_pad);
1211 if (rc)
1212 goto out;
1213 }
1214 p += cdata->padding;
1215
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001216 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001217 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001218 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001219 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001220 rc = rpcrdma_init_frmrs(ia, buf);
1221 if (rc)
1222 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001223 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001224 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001225 rc = rpcrdma_init_fmrs(ia, buf);
1226 if (rc)
1227 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001228 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001229 default:
1230 break;
1231 }
1232
1233 /*
1234 * Allocate/init the request/reply buffers. Doing this
1235 * using kmalloc for now -- one for each buf.
1236 */
Chuck Lever65866f82014-05-28 10:33:59 -04001237 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1238 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1239 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1240 __func__, wlen, rlen);
1241
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001242 for (i = 0; i < buf->rb_max_requests; i++) {
1243 struct rpcrdma_req *req;
1244 struct rpcrdma_rep *rep;
1245
Chuck Lever65866f82014-05-28 10:33:59 -04001246 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001247 if (req == NULL) {
1248 dprintk("RPC: %s: request buffer %d alloc"
1249 " failed\n", __func__, i);
1250 rc = -ENOMEM;
1251 goto out;
1252 }
1253 memset(req, 0, sizeof(struct rpcrdma_req));
1254 buf->rb_send_bufs[i] = req;
1255 buf->rb_send_bufs[i]->rl_buffer = buf;
1256
1257 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001258 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001259 &buf->rb_send_bufs[i]->rl_handle,
1260 &buf->rb_send_bufs[i]->rl_iov);
1261 if (rc)
1262 goto out;
1263
Chuck Lever65866f82014-05-28 10:33:59 -04001264 buf->rb_send_bufs[i]->rl_size = wlen -
1265 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001266
Chuck Lever65866f82014-05-28 10:33:59 -04001267 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001268 if (rep == NULL) {
1269 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1270 __func__, i);
1271 rc = -ENOMEM;
1272 goto out;
1273 }
1274 memset(rep, 0, sizeof(struct rpcrdma_rep));
1275 buf->rb_recv_bufs[i] = rep;
1276 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001277
1278 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001279 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001280 &buf->rb_recv_bufs[i]->rr_handle,
1281 &buf->rb_recv_bufs[i]->rr_iov);
1282 if (rc)
1283 goto out;
1284
1285 }
1286 dprintk("RPC: %s: max_requests %d\n",
1287 __func__, buf->rb_max_requests);
1288 /* done */
1289 return 0;
1290out:
1291 rpcrdma_buffer_destroy(buf);
1292 return rc;
1293}
1294
Chuck Lever2e845222014-07-29 17:25:38 -04001295static void
1296rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1297{
1298 struct rpcrdma_mw *r;
1299 int rc;
1300
1301 while (!list_empty(&buf->rb_all)) {
1302 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1303 list_del(&r->mw_all);
1304 list_del(&r->mw_list);
1305
1306 rc = ib_dealloc_fmr(r->r.fmr);
1307 if (rc)
1308 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1309 __func__, rc);
1310
1311 kfree(r);
1312 }
1313}
1314
1315static void
1316rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1317{
1318 struct rpcrdma_mw *r;
1319 int rc;
1320
1321 while (!list_empty(&buf->rb_all)) {
1322 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1323 list_del(&r->mw_all);
1324 list_del(&r->mw_list);
1325
1326 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1327 if (rc)
1328 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1329 __func__, rc);
1330 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1331
1332 kfree(r);
1333 }
1334}
1335
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001336void
1337rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1338{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001339 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001340 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001341
1342 /* clean up in reverse order from create
1343 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001344 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001345 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001346 */
1347 dprintk("RPC: %s: entering\n", __func__);
1348
1349 for (i = 0; i < buf->rb_max_requests; i++) {
1350 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1351 rpcrdma_deregister_internal(ia,
1352 buf->rb_recv_bufs[i]->rr_handle,
1353 &buf->rb_recv_bufs[i]->rr_iov);
1354 kfree(buf->rb_recv_bufs[i]);
1355 }
1356 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001357 rpcrdma_deregister_internal(ia,
1358 buf->rb_send_bufs[i]->rl_handle,
1359 &buf->rb_send_bufs[i]->rl_iov);
1360 kfree(buf->rb_send_bufs[i]);
1361 }
1362 }
1363
Chuck Lever2e845222014-07-29 17:25:38 -04001364 switch (ia->ri_memreg_strategy) {
1365 case RPCRDMA_FRMR:
1366 rpcrdma_destroy_frmrs(buf);
1367 break;
1368 case RPCRDMA_MTHCAFMR:
1369 rpcrdma_destroy_fmrs(buf);
1370 break;
1371 default:
1372 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001373 }
1374
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001375 kfree(buf->rb_pool);
1376}
1377
Chuck Lever467c9672014-11-08 20:14:29 -05001378/* After a disconnect, unmap all FMRs.
1379 *
1380 * This is invoked only in the transport connect worker in order
1381 * to serialize with rpcrdma_register_fmr_external().
1382 */
1383static void
1384rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1385{
1386 struct rpcrdma_xprt *r_xprt =
1387 container_of(ia, struct rpcrdma_xprt, rx_ia);
1388 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1389 struct list_head *pos;
1390 struct rpcrdma_mw *r;
1391 LIST_HEAD(l);
1392 int rc;
1393
1394 list_for_each(pos, &buf->rb_all) {
1395 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1396
1397 INIT_LIST_HEAD(&l);
1398 list_add(&r->r.fmr->list, &l);
1399 rc = ib_unmap_fmr(&l);
1400 if (rc)
1401 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1402 __func__, rc);
1403 }
1404}
1405
Chuck Lever9f9d8022014-07-29 17:24:45 -04001406/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1407 * an unusable state. Find FRMRs in this state and dereg / reg
1408 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1409 * also torn down.
1410 *
1411 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1412 *
1413 * This is invoked only in the transport connect worker in order
1414 * to serialize with rpcrdma_register_frmr_external().
1415 */
1416static void
1417rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1418{
1419 struct rpcrdma_xprt *r_xprt =
1420 container_of(ia, struct rpcrdma_xprt, rx_ia);
1421 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1422 struct list_head *pos;
1423 struct rpcrdma_mw *r;
1424 int rc;
1425
1426 list_for_each(pos, &buf->rb_all) {
1427 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1428
1429 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1430 continue;
1431
1432 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1433 if (rc)
1434 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1435 __func__, rc);
1436 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1437
1438 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1439 ia->ri_max_frmr_depth);
1440 if (IS_ERR(r->r.frmr.fr_mr)) {
1441 rc = PTR_ERR(r->r.frmr.fr_mr);
1442 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1443 " failed %i\n", __func__, rc);
1444 continue;
1445 }
1446 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1447 ia->ri_id->device,
1448 ia->ri_max_frmr_depth);
1449 if (IS_ERR(r->r.frmr.fr_pgl)) {
1450 rc = PTR_ERR(r->r.frmr.fr_pgl);
1451 dprintk("RPC: %s: "
1452 "ib_alloc_fast_reg_page_list "
1453 "failed %i\n", __func__, rc);
1454
1455 ib_dereg_mr(r->r.frmr.fr_mr);
1456 continue;
1457 }
1458 r->r.frmr.fr_state = FRMR_IS_INVALID;
1459 }
1460}
1461
Chuck Leverc2922c02014-07-29 17:24:36 -04001462/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1463 * some req segments uninitialized.
1464 */
1465static void
1466rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1467{
1468 if (*mw) {
1469 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1470 *mw = NULL;
1471 }
1472}
1473
1474/* Cycle mw's back in reverse order, and "spin" them.
1475 * This delays and scrambles reuse as much as possible.
1476 */
1477static void
1478rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1479{
1480 struct rpcrdma_mr_seg *seg = req->rl_segments;
1481 struct rpcrdma_mr_seg *seg1 = seg;
1482 int i;
1483
1484 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001485 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1486 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001487}
1488
1489static void
1490rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1491{
1492 buf->rb_send_bufs[--buf->rb_send_index] = req;
1493 req->rl_niovs = 0;
1494 if (req->rl_reply) {
1495 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1496 req->rl_reply->rr_func = NULL;
1497 req->rl_reply = NULL;
1498 }
1499}
1500
Chuck Leverddb6beb2014-07-29 17:24:54 -04001501/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1502 * Redo only the ib_post_send().
1503 */
1504static void
1505rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1506{
1507 struct rpcrdma_xprt *r_xprt =
1508 container_of(ia, struct rpcrdma_xprt, rx_ia);
1509 struct ib_send_wr invalidate_wr, *bad_wr;
1510 int rc;
1511
1512 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1513
1514 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001515 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001516
1517 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1518 invalidate_wr.wr_id = (unsigned long)(void *)r;
1519 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001520 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1521 DECR_CQCOUNT(&r_xprt->rx_ep);
1522
1523 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1524 __func__, r, r->r.frmr.fr_mr->rkey);
1525
1526 read_lock(&ia->ri_qplock);
1527 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1528 read_unlock(&ia->ri_qplock);
1529 if (rc) {
1530 /* Force rpcrdma_buffer_get() to retry */
1531 r->r.frmr.fr_state = FRMR_IS_STALE;
1532 dprintk("RPC: %s: ib_post_send failed, %i\n",
1533 __func__, rc);
1534 }
1535}
1536
1537static void
1538rpcrdma_retry_flushed_linv(struct list_head *stale,
1539 struct rpcrdma_buffer *buf)
1540{
1541 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1542 struct list_head *pos;
1543 struct rpcrdma_mw *r;
1544 unsigned long flags;
1545
1546 list_for_each(pos, stale) {
1547 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1548 rpcrdma_retry_local_inv(r, ia);
1549 }
1550
1551 spin_lock_irqsave(&buf->rb_lock, flags);
1552 list_splice_tail(stale, &buf->rb_mws);
1553 spin_unlock_irqrestore(&buf->rb_lock, flags);
1554}
1555
Chuck Leverc2922c02014-07-29 17:24:36 -04001556static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001557rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1558 struct list_head *stale)
1559{
1560 struct rpcrdma_mw *r;
1561 int i;
1562
1563 i = RPCRDMA_MAX_SEGS - 1;
1564 while (!list_empty(&buf->rb_mws)) {
1565 r = list_entry(buf->rb_mws.next,
1566 struct rpcrdma_mw, mw_list);
1567 list_del(&r->mw_list);
1568 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1569 list_add(&r->mw_list, stale);
1570 continue;
1571 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001572 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001573 if (unlikely(i-- == 0))
1574 return req; /* Success */
1575 }
1576
1577 /* Not enough entries on rb_mws for this req */
1578 rpcrdma_buffer_put_sendbuf(req, buf);
1579 rpcrdma_buffer_put_mrs(req, buf);
1580 return NULL;
1581}
1582
1583static struct rpcrdma_req *
1584rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001585{
1586 struct rpcrdma_mw *r;
1587 int i;
1588
1589 i = RPCRDMA_MAX_SEGS - 1;
1590 while (!list_empty(&buf->rb_mws)) {
1591 r = list_entry(buf->rb_mws.next,
1592 struct rpcrdma_mw, mw_list);
1593 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001594 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001595 if (unlikely(i-- == 0))
1596 return req; /* Success */
1597 }
1598
1599 /* Not enough entries on rb_mws for this req */
1600 rpcrdma_buffer_put_sendbuf(req, buf);
1601 rpcrdma_buffer_put_mrs(req, buf);
1602 return NULL;
1603}
1604
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001605/*
1606 * Get a set of request/reply buffers.
1607 *
1608 * Reply buffer (if needed) is attached to send buffer upon return.
1609 * Rule:
1610 * rb_send_index and rb_recv_index MUST always be pointing to the
1611 * *next* available buffer (non-NULL). They are incremented after
1612 * removing buffers, and decremented *before* returning them.
1613 */
1614struct rpcrdma_req *
1615rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1616{
Chuck Leverc2922c02014-07-29 17:24:36 -04001617 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001618 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001619 struct rpcrdma_req *req;
1620 unsigned long flags;
1621
1622 spin_lock_irqsave(&buffers->rb_lock, flags);
1623 if (buffers->rb_send_index == buffers->rb_max_requests) {
1624 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1625 dprintk("RPC: %s: out of request buffers\n", __func__);
1626 return ((struct rpcrdma_req *)NULL);
1627 }
1628
1629 req = buffers->rb_send_bufs[buffers->rb_send_index];
1630 if (buffers->rb_send_index < buffers->rb_recv_index) {
1631 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1632 __func__,
1633 buffers->rb_recv_index - buffers->rb_send_index);
1634 req->rl_reply = NULL;
1635 } else {
1636 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1637 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1638 }
1639 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001640
1641 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001642 switch (ia->ri_memreg_strategy) {
1643 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001644 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1645 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001646 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001647 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001648 break;
1649 default:
1650 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001651 }
1652 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001653 if (!list_empty(&stale))
1654 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001655 return req;
1656}
1657
1658/*
1659 * Put request/reply buffers back into pool.
1660 * Pre-decrement counter/array index.
1661 */
1662void
1663rpcrdma_buffer_put(struct rpcrdma_req *req)
1664{
1665 struct rpcrdma_buffer *buffers = req->rl_buffer;
1666 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001667 unsigned long flags;
1668
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001669 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001670 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001671 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001672 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001673 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001674 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001675 break;
1676 default:
1677 break;
1678 }
1679 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1680}
1681
1682/*
1683 * Recover reply buffers from pool.
1684 * This happens when recovering from error conditions.
1685 * Post-increment counter/array index.
1686 */
1687void
1688rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1689{
1690 struct rpcrdma_buffer *buffers = req->rl_buffer;
1691 unsigned long flags;
1692
1693 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1694 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1695 spin_lock_irqsave(&buffers->rb_lock, flags);
1696 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1697 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1698 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1699 }
1700 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1701}
1702
1703/*
1704 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001705 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001706 */
1707void
1708rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1709{
1710 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1711 unsigned long flags;
1712
1713 rep->rr_func = NULL;
1714 spin_lock_irqsave(&buffers->rb_lock, flags);
1715 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1716 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1717}
1718
1719/*
1720 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1721 */
1722
1723int
1724rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1725 struct ib_mr **mrp, struct ib_sge *iov)
1726{
1727 struct ib_phys_buf ipb;
1728 struct ib_mr *mr;
1729 int rc;
1730
1731 /*
1732 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1733 */
1734 iov->addr = ib_dma_map_single(ia->ri_id->device,
1735 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001736 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1737 return -ENOMEM;
1738
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001739 iov->length = len;
1740
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001741 if (ia->ri_have_dma_lkey) {
1742 *mrp = NULL;
1743 iov->lkey = ia->ri_dma_lkey;
1744 return 0;
1745 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001746 *mrp = NULL;
1747 iov->lkey = ia->ri_bind_mem->lkey;
1748 return 0;
1749 }
1750
1751 ipb.addr = iov->addr;
1752 ipb.size = iov->length;
1753 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1754 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1755
1756 dprintk("RPC: %s: phys convert: 0x%llx "
1757 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001758 __func__, (unsigned long long)ipb.addr,
1759 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001760
1761 if (IS_ERR(mr)) {
1762 *mrp = NULL;
1763 rc = PTR_ERR(mr);
1764 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1765 } else {
1766 *mrp = mr;
1767 iov->lkey = mr->lkey;
1768 rc = 0;
1769 }
1770
1771 return rc;
1772}
1773
1774int
1775rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1776 struct ib_mr *mr, struct ib_sge *iov)
1777{
1778 int rc;
1779
1780 ib_dma_unmap_single(ia->ri_id->device,
1781 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1782
1783 if (NULL == mr)
1784 return 0;
1785
1786 rc = ib_dereg_mr(mr);
1787 if (rc)
1788 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1789 return rc;
1790}
1791
1792/*
1793 * Wrappers for chunk registration, shared by read/write chunk code.
1794 */
1795
1796static void
1797rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1798{
1799 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1800 seg->mr_dmalen = seg->mr_len;
1801 if (seg->mr_page)
1802 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1803 seg->mr_page, offset_in_page(seg->mr_offset),
1804 seg->mr_dmalen, seg->mr_dir);
1805 else
1806 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1807 seg->mr_offset,
1808 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001809 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1810 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1811 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001812 (unsigned long long)seg->mr_dma,
1813 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001814 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001815}
1816
1817static void
1818rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1819{
1820 if (seg->mr_page)
1821 ib_dma_unmap_page(ia->ri_id->device,
1822 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1823 else
1824 ib_dma_unmap_single(ia->ri_id->device,
1825 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1826}
1827
Tom Talpey8d4ba032008-10-09 14:59:49 -04001828static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001829rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1830 int *nsegs, int writing, struct rpcrdma_ia *ia,
1831 struct rpcrdma_xprt *r_xprt)
1832{
1833 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever3eb35812015-01-21 11:02:54 -05001834 struct rpcrdma_mw *mw = seg1->rl_mw;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001835 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1836 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001837 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001838 u8 key;
1839 int len, pageoff;
1840 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001841 int seg_len;
1842 u64 pa;
1843 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001844
1845 pageoff = offset_in_page(seg1->mr_offset);
1846 seg1->mr_offset -= pageoff; /* start of page */
1847 seg1->mr_len += pageoff;
1848 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001849 if (*nsegs > ia->ri_max_frmr_depth)
1850 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001851 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001852 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001853 pa = seg->mr_dma;
1854 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001855 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001856 pa += PAGE_SIZE;
1857 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001858 len += seg->mr_len;
1859 ++seg;
1860 ++i;
1861 /* Check for holes */
1862 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1863 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1864 break;
1865 }
1866 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001867 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001868
Chuck Lever05055722014-07-29 17:25:12 -04001869 frmr->fr_state = FRMR_IS_VALID;
1870
Chuck Leverf590e872014-07-29 17:25:29 -04001871 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1872 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1873 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1874 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1875 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1876 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1877 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1878 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1879 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001880 rc = -EIO;
1881 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001882 }
1883
1884 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001885 key = (u8)(mr->rkey & 0x000000FF);
1886 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001887
Chuck Leverf590e872014-07-29 17:25:29 -04001888 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001889 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1890 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001891 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001892 DECR_CQCOUNT(&r_xprt->rx_ep);
1893
Chuck Leverf590e872014-07-29 17:25:29 -04001894 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001895 if (rc) {
1896 dprintk("RPC: %s: failed ib_post_send for register,"
1897 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001898 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001899 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001900 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001901 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001902 seg1->mr_base = seg1->mr_dma + pageoff;
1903 seg1->mr_nsegs = i;
1904 seg1->mr_len = len;
1905 }
1906 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001907 return 0;
1908out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001909 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001910 while (i--)
1911 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001912 return rc;
1913}
1914
1915static int
1916rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1917 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1918{
1919 struct rpcrdma_mr_seg *seg1 = seg;
1920 struct ib_send_wr invalidate_wr, *bad_wr;
1921 int rc;
1922
Chuck Lever3eb35812015-01-21 11:02:54 -05001923 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001924
Tom Talpey3197d3092008-10-09 15:00:20 -04001925 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Chuck Lever3eb35812015-01-21 11:02:54 -05001926 invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001927 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Lever3eb35812015-01-21 11:02:54 -05001928 invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001929 DECR_CQCOUNT(&r_xprt->rx_ep);
1930
Chuck Lever73806c82014-07-29 17:23:25 -04001931 read_lock(&ia->ri_qplock);
1932 while (seg1->mr_nsegs--)
1933 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001934 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001935 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001936 if (rc) {
1937 /* Force rpcrdma_buffer_get() to retry */
Chuck Lever3eb35812015-01-21 11:02:54 -05001938 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001939 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1940 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001941 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001942 return rc;
1943}
1944
1945static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001946rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1947 int *nsegs, int writing, struct rpcrdma_ia *ia)
1948{
1949 struct rpcrdma_mr_seg *seg1 = seg;
1950 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1951 int len, pageoff, i, rc;
1952
1953 pageoff = offset_in_page(seg1->mr_offset);
1954 seg1->mr_offset -= pageoff; /* start of page */
1955 seg1->mr_len += pageoff;
1956 len = -pageoff;
1957 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1958 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1959 for (i = 0; i < *nsegs;) {
1960 rpcrdma_map_one(ia, seg, writing);
1961 physaddrs[i] = seg->mr_dma;
1962 len += seg->mr_len;
1963 ++seg;
1964 ++i;
1965 /* Check for holes */
1966 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1967 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1968 break;
1969 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001970 rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001971 if (rc) {
1972 dprintk("RPC: %s: failed ib_map_phys_fmr "
1973 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1974 len, (unsigned long long)seg1->mr_dma,
1975 pageoff, i, rc);
1976 while (i--)
1977 rpcrdma_unmap_one(ia, --seg);
1978 } else {
Chuck Lever3eb35812015-01-21 11:02:54 -05001979 seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001980 seg1->mr_base = seg1->mr_dma + pageoff;
1981 seg1->mr_nsegs = i;
1982 seg1->mr_len = len;
1983 }
1984 *nsegs = i;
1985 return rc;
1986}
1987
1988static int
1989rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1990 struct rpcrdma_ia *ia)
1991{
1992 struct rpcrdma_mr_seg *seg1 = seg;
1993 LIST_HEAD(l);
1994 int rc;
1995
Chuck Lever3eb35812015-01-21 11:02:54 -05001996 list_add(&seg1->rl_mw->r.fmr->list, &l);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001997 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001998 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001999 while (seg1->mr_nsegs--)
2000 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04002001 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002002 if (rc)
2003 dprintk("RPC: %s: failed ib_unmap_fmr,"
2004 " status %i\n", __func__, rc);
2005 return rc;
2006}
2007
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002008int
2009rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
2010 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
2011{
2012 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002013 int rc = 0;
2014
2015 switch (ia->ri_memreg_strategy) {
2016
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002017 case RPCRDMA_ALLPHYSICAL:
2018 rpcrdma_map_one(ia, seg, writing);
2019 seg->mr_rkey = ia->ri_bind_mem->rkey;
2020 seg->mr_base = seg->mr_dma;
2021 seg->mr_nsegs = 1;
2022 nsegs = 1;
2023 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002024
Tom Talpey3197d3092008-10-09 15:00:20 -04002025 /* Registration using frmr registration */
2026 case RPCRDMA_FRMR:
2027 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
2028 break;
2029
Tom Talpey8d4ba032008-10-09 14:59:49 -04002030 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002031 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002032 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002033 break;
2034
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002035 default:
Chuck Lever92b98362014-11-08 20:14:12 -05002036 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002037 }
2038 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05002039 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002040
2041 return nsegs;
2042}
2043
2044int
2045rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04002046 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002047{
2048 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002049 int nsegs = seg->mr_nsegs, rc;
2050
2051 switch (ia->ri_memreg_strategy) {
2052
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002053 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04002054 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002055 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04002056 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002057 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002058
Tom Talpey3197d3092008-10-09 15:00:20 -04002059 case RPCRDMA_FRMR:
2060 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2061 break;
2062
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002063 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002064 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002065 break;
2066
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002067 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002068 break;
2069 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002070 return nsegs;
2071}
2072
2073/*
2074 * Prepost any receive buffer, then post send.
2075 *
2076 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2077 */
2078int
2079rpcrdma_ep_post(struct rpcrdma_ia *ia,
2080 struct rpcrdma_ep *ep,
2081 struct rpcrdma_req *req)
2082{
2083 struct ib_send_wr send_wr, *send_wr_fail;
2084 struct rpcrdma_rep *rep = req->rl_reply;
2085 int rc;
2086
2087 if (rep) {
2088 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2089 if (rc)
2090 goto out;
2091 req->rl_reply = NULL;
2092 }
2093
2094 send_wr.next = NULL;
2095 send_wr.wr_id = 0ULL; /* no send cookie */
2096 send_wr.sg_list = req->rl_send_iov;
2097 send_wr.num_sge = req->rl_niovs;
2098 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002099 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2100 ib_dma_sync_single_for_device(ia->ri_id->device,
2101 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2102 DMA_TO_DEVICE);
2103 ib_dma_sync_single_for_device(ia->ri_id->device,
2104 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2105 DMA_TO_DEVICE);
2106 ib_dma_sync_single_for_device(ia->ri_id->device,
2107 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2108 DMA_TO_DEVICE);
2109
2110 if (DECR_CQCOUNT(ep) > 0)
2111 send_wr.send_flags = 0;
2112 else { /* Provider must take a send completion every now and then */
2113 INIT_CQCOUNT(ep);
2114 send_wr.send_flags = IB_SEND_SIGNALED;
2115 }
2116
2117 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2118 if (rc)
2119 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2120 rc);
2121out:
2122 return rc;
2123}
2124
2125/*
2126 * (Re)post a receive buffer.
2127 */
2128int
2129rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2130 struct rpcrdma_ep *ep,
2131 struct rpcrdma_rep *rep)
2132{
2133 struct ib_recv_wr recv_wr, *recv_wr_fail;
2134 int rc;
2135
2136 recv_wr.next = NULL;
2137 recv_wr.wr_id = (u64) (unsigned long) rep;
2138 recv_wr.sg_list = &rep->rr_iov;
2139 recv_wr.num_sge = 1;
2140
2141 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2142 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2143
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002144 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2145
2146 if (rc)
2147 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2148 rc);
2149 return rc;
2150}
Chuck Lever43e95982014-07-29 17:23:34 -04002151
2152/* Physical mapping means one Read/Write list entry per-page.
2153 * All list entries must fit within an inline buffer
2154 *
2155 * NB: The server must return a Write list for NFS READ,
2156 * which has the same constraint. Factor in the inline
2157 * rsize as well.
2158 */
2159static size_t
2160rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2161{
2162 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2163 unsigned int inline_size, pages;
2164
2165 inline_size = min_t(unsigned int,
2166 cdata->inline_wsize, cdata->inline_rsize);
2167 inline_size -= RPCRDMA_HDRLEN_MIN;
2168 pages = inline_size / sizeof(struct rpcrdma_segment);
2169 return pages << PAGE_SHIFT;
2170}
2171
2172static size_t
2173rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2174{
2175 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2176}
2177
2178size_t
2179rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2180{
2181 size_t result;
2182
2183 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2184 case RPCRDMA_ALLPHYSICAL:
2185 result = rpcrdma_physical_max_payload(r_xprt);
2186 break;
2187 default:
2188 result = rpcrdma_mr_max_payload(r_xprt);
2189 }
2190 return result;
2191}