blob: 56f705d63d5ced32c2dd241150ae42292b9f9e2d [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
Jeff Laytonf895b252014-11-17 16:58:04 -050060#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040061# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
Chuck Lever9f9d8022014-07-29 17:24:45 -040064static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050065static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040066
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040067/*
68 * internal functions
69 */
70
71/*
72 * handle replies in tasklet context, using a single, global list
73 * rdma tasklet function -- just turn around and call the func
74 * for all replies on the list
75 */
76
77static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
78static LIST_HEAD(rpcrdma_tasklets_g);
79
80static void
81rpcrdma_run_tasklet(unsigned long data)
82{
83 struct rpcrdma_rep *rep;
84 void (*func)(struct rpcrdma_rep *);
85 unsigned long flags;
86
87 data = data;
88 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
89 while (!list_empty(&rpcrdma_tasklets_g)) {
90 rep = list_entry(rpcrdma_tasklets_g.next,
91 struct rpcrdma_rep, rr_list);
92 list_del(&rep->rr_list);
93 func = rep->rr_func;
94 rep->rr_func = NULL;
95 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
96
97 if (func)
98 func(rep);
99 else
100 rpcrdma_recv_buffer_put(rep);
101
102 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
103 }
104 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
105}
106
107static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
108
Chuck Lever7ff11de2014-11-08 20:15:01 -0500109static const char * const async_event[] = {
110 "CQ error",
111 "QP fatal error",
112 "QP request error",
113 "QP access error",
114 "communication established",
115 "send queue drained",
116 "path migration successful",
117 "path mig error",
118 "device fatal error",
119 "port active",
120 "port error",
121 "LID change",
122 "P_key change",
123 "SM change",
124 "SRQ error",
125 "SRQ limit reached",
126 "last WQE reached",
127 "client reregister",
128 "GID change",
129};
130
131#define ASYNC_MSG(status) \
132 ((status) < ARRAY_SIZE(async_event) ? \
133 async_event[(status)] : "unknown async error")
134
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400135static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500136rpcrdma_schedule_tasklet(struct list_head *sched_list)
137{
138 unsigned long flags;
139
140 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
141 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
142 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
143 tasklet_schedule(&rpcrdma_tasklet_g);
144}
145
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400146static void
147rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
148{
149 struct rpcrdma_ep *ep = context;
150
Chuck Lever7ff11de2014-11-08 20:15:01 -0500151 pr_err("RPC: %s: %s on device %s ep %p\n",
152 __func__, ASYNC_MSG(event->event),
153 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400154 if (ep->rep_connected == 1) {
155 ep->rep_connected = -EIO;
156 ep->rep_func(ep);
157 wake_up_all(&ep->rep_connect_wait);
158 }
159}
160
161static void
162rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
163{
164 struct rpcrdma_ep *ep = context;
165
Chuck Lever7ff11de2014-11-08 20:15:01 -0500166 pr_err("RPC: %s: %s on device %s ep %p\n",
167 __func__, ASYNC_MSG(event->event),
168 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400169 if (ep->rep_connected == 1) {
170 ep->rep_connected = -EIO;
171 ep->rep_func(ep);
172 wake_up_all(&ep->rep_connect_wait);
173 }
174}
175
Chuck Lever85024272015-01-21 11:02:04 -0500176static const char * const wc_status[] = {
177 "success",
178 "local length error",
179 "local QP operation error",
180 "local EE context operation error",
181 "local protection error",
182 "WR flushed",
183 "memory management operation error",
184 "bad response error",
185 "local access error",
186 "remote invalid request error",
187 "remote access error",
188 "remote operation error",
189 "transport retry counter exceeded",
190 "RNR retrycounter exceeded",
191 "local RDD violation error",
192 "remove invalid RD request",
193 "operation aborted",
194 "invalid EE context number",
195 "invalid EE context state",
196 "fatal error",
197 "response timeout error",
198 "general error",
199};
200
201#define COMPLETION_MSG(status) \
202 ((status) < ARRAY_SIZE(wc_status) ? \
203 wc_status[(status)] : "unexpected completion error")
204
Chuck Leverfc664482014-05-28 10:33:25 -0400205static void
206rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400207{
Chuck Lever85024272015-01-21 11:02:04 -0500208 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400209 return;
Chuck Lever85024272015-01-21 11:02:04 -0500210
211 /* WARNING: Only wr_id and status are reliable at this point */
212 if (wc->wr_id == 0ULL) {
213 if (wc->status != IB_WC_WR_FLUSH_ERR)
214 pr_err("RPC: %s: SEND: %s\n",
215 __func__, COMPLETION_MSG(wc->status));
216 } else {
217 struct rpcrdma_mw *r;
218
219 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
220 r->r.frmr.fr_state = FRMR_IS_STALE;
221 pr_err("RPC: %s: frmr %p (stale): %s\n",
222 __func__, r, COMPLETION_MSG(wc->status));
223 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400224}
225
Chuck Leverfc664482014-05-28 10:33:25 -0400226static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400227rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400228{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400229 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400230 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400231
Chuck Lever8301a2c2014-05-28 10:33:51 -0400232 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400233 do {
234 wcs = ep->rep_send_wcs;
235
236 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
237 if (rc <= 0)
238 return rc;
239
240 count = rc;
241 while (count-- > 0)
242 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400243 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400244 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400245}
246
247/*
Chuck Leverfc664482014-05-28 10:33:25 -0400248 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400249 *
Chuck Leverfc664482014-05-28 10:33:25 -0400250 * Send events are typically suppressed and thus do not result
251 * in an upcall. Occasionally one is signaled, however. This
252 * prevents the provider's completion queue from wrapping and
253 * losing a completion.
254 */
255static void
256rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
257{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400258 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400259 int rc;
260
Chuck Lever1c00dd02014-05-28 10:33:42 -0400261 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400262 if (rc) {
263 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
264 __func__, rc);
265 return;
266 }
267
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400268 rc = ib_req_notify_cq(cq,
269 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
270 if (rc == 0)
271 return;
272 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400273 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
274 __func__, rc);
275 return;
276 }
277
Chuck Lever1c00dd02014-05-28 10:33:42 -0400278 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400279}
280
281static void
Chuck Leverbb961932014-07-29 17:25:46 -0400282rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400283{
284 struct rpcrdma_rep *rep =
285 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
286
Chuck Lever85024272015-01-21 11:02:04 -0500287 /* WARNING: Only wr_id and status are reliable at this point */
288 if (wc->status != IB_WC_SUCCESS)
289 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400290
Chuck Lever85024272015-01-21 11:02:04 -0500291 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400292 if (wc->opcode != IB_WC_RECV)
293 return;
294
Chuck Lever85024272015-01-21 11:02:04 -0500295 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
296 __func__, rep, wc->byte_len);
297
Chuck Leverfc664482014-05-28 10:33:25 -0400298 rep->rr_len = wc->byte_len;
299 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
300 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
301
302 if (rep->rr_len >= 16) {
303 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
304 unsigned int credits = ntohl(p->rm_credit);
305
306 if (credits == 0)
307 credits = 1; /* don't deadlock */
308 else if (credits > rep->rr_buffer->rb_max_requests)
309 credits = rep->rr_buffer->rb_max_requests;
310 atomic_set(&rep->rr_buffer->rb_credits, credits);
311 }
312
313out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400314 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500315 return;
316out_fail:
317 if (wc->status != IB_WC_WR_FLUSH_ERR)
318 pr_err("RPC: %s: rep %p: %s\n",
319 __func__, rep, COMPLETION_MSG(wc->status));
320 rep->rr_len = ~0U;
321 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400322}
323
324static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400325rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400326{
Chuck Leverbb961932014-07-29 17:25:46 -0400327 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400328 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400329 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400330
Chuck Leverbb961932014-07-29 17:25:46 -0400331 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400332 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400333 do {
334 wcs = ep->rep_recv_wcs;
335
336 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
337 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400338 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400339
340 count = rc;
341 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400342 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400343 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400344 rc = 0;
345
346out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500347 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400348 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400349}
350
351/*
352 * Handle receive completions.
353 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400354 * It is reentrant but processes single events in order to maintain
355 * ordering of receives to keep server credits.
356 *
357 * It is the responsibility of the scheduled tasklet to return
358 * recv buffers to the pool. NOTE: this affects synchronization of
359 * connection shutdown. That is, the structures required for
360 * the completion of the reply handler must remain intact until
361 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400362 */
363static void
Chuck Leverfc664482014-05-28 10:33:25 -0400364rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400365{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400366 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400367 int rc;
368
Chuck Lever1c00dd02014-05-28 10:33:42 -0400369 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400370 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400371 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400372 __func__, rc);
373 return;
374 }
375
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400376 rc = ib_req_notify_cq(cq,
377 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
378 if (rc == 0)
379 return;
380 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400381 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
382 __func__, rc);
383 return;
384 }
385
Chuck Lever1c00dd02014-05-28 10:33:42 -0400386 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400387}
388
Chuck Levera7bc2112014-07-29 17:23:52 -0400389static void
390rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
391{
Chuck Lever5c166be2014-11-08 20:14:45 -0500392 struct ib_wc wc;
393 LIST_HEAD(sched_list);
394
395 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
396 rpcrdma_recvcq_process_wc(&wc, &sched_list);
397 if (!list_empty(&sched_list))
398 rpcrdma_schedule_tasklet(&sched_list);
399 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
400 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400401}
402
Jeff Laytonf895b252014-11-17 16:58:04 -0500403#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400404static const char * const conn[] = {
405 "address resolved",
406 "address error",
407 "route resolved",
408 "route error",
409 "connect request",
410 "connect response",
411 "connect error",
412 "unreachable",
413 "rejected",
414 "established",
415 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400416 "device removal",
417 "multicast join",
418 "multicast error",
419 "address change",
420 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400421};
Chuck Lever8079fb72014-07-29 17:26:12 -0400422
423#define CONNECTION_MSG(status) \
424 ((status) < ARRAY_SIZE(conn) ? \
425 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400426#endif
427
428static int
429rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
430{
431 struct rpcrdma_xprt *xprt = id->context;
432 struct rpcrdma_ia *ia = &xprt->rx_ia;
433 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500434#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400435 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800436#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400437 struct ib_qp_attr attr;
438 struct ib_qp_init_attr iattr;
439 int connstate = 0;
440
441 switch (event->event) {
442 case RDMA_CM_EVENT_ADDR_RESOLVED:
443 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400444 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400445 complete(&ia->ri_done);
446 break;
447 case RDMA_CM_EVENT_ADDR_ERROR:
448 ia->ri_async_rc = -EHOSTUNREACH;
449 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
450 __func__, ep);
451 complete(&ia->ri_done);
452 break;
453 case RDMA_CM_EVENT_ROUTE_ERROR:
454 ia->ri_async_rc = -ENETUNREACH;
455 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
456 __func__, ep);
457 complete(&ia->ri_done);
458 break;
459 case RDMA_CM_EVENT_ESTABLISHED:
460 connstate = 1;
461 ib_query_qp(ia->ri_id->qp, &attr,
462 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
463 &iattr);
464 dprintk("RPC: %s: %d responder resources"
465 " (%d initiator)\n",
466 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
467 goto connected;
468 case RDMA_CM_EVENT_CONNECT_ERROR:
469 connstate = -ENOTCONN;
470 goto connected;
471 case RDMA_CM_EVENT_UNREACHABLE:
472 connstate = -ENETDOWN;
473 goto connected;
474 case RDMA_CM_EVENT_REJECTED:
475 connstate = -ECONNREFUSED;
476 goto connected;
477 case RDMA_CM_EVENT_DISCONNECTED:
478 connstate = -ECONNABORTED;
479 goto connected;
480 case RDMA_CM_EVENT_DEVICE_REMOVAL:
481 connstate = -ENODEV;
482connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400483 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
484 dprintk("RPC: %s: %sconnected\n",
485 __func__, connstate > 0 ? "" : "dis");
486 ep->rep_connected = connstate;
487 ep->rep_func(ep);
488 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400489 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400490 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400491 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
492 __func__, &addr->sin_addr.s_addr,
493 ntohs(addr->sin_port), ep,
494 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400495 break;
496 }
497
Jeff Laytonf895b252014-11-17 16:58:04 -0500498#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400499 if (connstate == 1) {
500 int ird = attr.max_dest_rd_atomic;
501 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700502 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400503 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700504 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400505 ntohs(addr->sin_port),
506 ia->ri_id->device->name,
507 ia->ri_memreg_strategy,
508 xprt->rx_buf.rb_max_requests,
509 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
510 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700511 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
512 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400513 ntohs(addr->sin_port),
514 connstate);
515 }
516#endif
517
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400518 return 0;
519}
520
521static struct rdma_cm_id *
522rpcrdma_create_id(struct rpcrdma_xprt *xprt,
523 struct rpcrdma_ia *ia, struct sockaddr *addr)
524{
525 struct rdma_cm_id *id;
526 int rc;
527
Tom Talpey1a954052008-10-09 15:01:31 -0400528 init_completion(&ia->ri_done);
529
Sean Heftyb26f9b92010-04-01 17:08:41 +0000530 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400531 if (IS_ERR(id)) {
532 rc = PTR_ERR(id);
533 dprintk("RPC: %s: rdma_create_id() failed %i\n",
534 __func__, rc);
535 return id;
536 }
537
Tom Talpey5675add2008-10-09 15:01:41 -0400538 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400539 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
540 if (rc) {
541 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
542 __func__, rc);
543 goto out;
544 }
Tom Talpey5675add2008-10-09 15:01:41 -0400545 wait_for_completion_interruptible_timeout(&ia->ri_done,
546 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400547 rc = ia->ri_async_rc;
548 if (rc)
549 goto out;
550
Tom Talpey5675add2008-10-09 15:01:41 -0400551 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400552 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
553 if (rc) {
554 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
555 __func__, rc);
556 goto out;
557 }
Tom Talpey5675add2008-10-09 15:01:41 -0400558 wait_for_completion_interruptible_timeout(&ia->ri_done,
559 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400560 rc = ia->ri_async_rc;
561 if (rc)
562 goto out;
563
564 return id;
565
566out:
567 rdma_destroy_id(id);
568 return ERR_PTR(rc);
569}
570
571/*
572 * Drain any cq, prior to teardown.
573 */
574static void
575rpcrdma_clean_cq(struct ib_cq *cq)
576{
577 struct ib_wc wc;
578 int count = 0;
579
580 while (1 == ib_poll_cq(cq, 1, &wc))
581 ++count;
582
583 if (count)
584 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
585 __func__, count, wc.opcode);
586}
587
588/*
589 * Exported functions.
590 */
591
592/*
593 * Open and initialize an Interface Adapter.
594 * o initializes fields of struct rpcrdma_ia, including
595 * interface and provider attributes and protection zone.
596 */
597int
598rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
599{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400600 int rc, mem_priv;
601 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400602 struct rpcrdma_ia *ia = &xprt->rx_ia;
603
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400604 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
605 if (IS_ERR(ia->ri_id)) {
606 rc = PTR_ERR(ia->ri_id);
607 goto out1;
608 }
609
610 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
611 if (IS_ERR(ia->ri_pd)) {
612 rc = PTR_ERR(ia->ri_pd);
613 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
614 __func__, rc);
615 goto out2;
616 }
617
618 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400619 * Query the device to determine if the requested memory
620 * registration strategy is supported. If it isn't, set the
621 * strategy to a globally supported model.
622 */
623 rc = ib_query_device(ia->ri_id->device, &devattr);
624 if (rc) {
625 dprintk("RPC: %s: ib_query_device failed %d\n",
626 __func__, rc);
627 goto out2;
628 }
629
630 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
631 ia->ri_have_dma_lkey = 1;
632 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
633 }
634
Chuck Leverf10eafd2014-05-28 10:32:51 -0400635 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400636 /* Requires both frmr reg and local dma lkey */
637 if ((devattr.device_cap_flags &
638 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
639 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400640 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400641 "not supported by HCA\n", __func__);
642 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400643 } else {
644 /* Mind the ia limit on FRMR page list depth */
645 ia->ri_max_frmr_depth = min_t(unsigned int,
646 RPCRDMA_MAX_DATA_SEGS,
647 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400648 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400649 }
650 if (memreg == RPCRDMA_MTHCAFMR) {
651 if (!ia->ri_id->device->alloc_fmr) {
652 dprintk("RPC: %s: MTHCAFMR registration "
653 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400654 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400655 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400656 }
657
658 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400659 * Optionally obtain an underlying physical identity mapping in
660 * order to do a memory window-based bind. This base registration
661 * is protected from remote access - that is enabled only by binding
662 * for the specific bytes targeted during each RPC operation, and
663 * revoked after the corresponding completion similar to a storage
664 * adapter.
665 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400666 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400667 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400668 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400669 case RPCRDMA_ALLPHYSICAL:
670 mem_priv = IB_ACCESS_LOCAL_WRITE |
671 IB_ACCESS_REMOTE_WRITE |
672 IB_ACCESS_REMOTE_READ;
673 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400674 case RPCRDMA_MTHCAFMR:
675 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400676 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400677 mem_priv = IB_ACCESS_LOCAL_WRITE;
678 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400679 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
680 if (IS_ERR(ia->ri_bind_mem)) {
681 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400682 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400683 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400684 rc = -ENOMEM;
685 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400686 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400687 break;
688 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400689 printk(KERN_ERR "RPC: Unsupported memory "
690 "registration mode: %d\n", memreg);
691 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400692 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400693 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400694 dprintk("RPC: %s: memory registration strategy is %d\n",
695 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400696
697 /* Else will do memory reg/dereg for each chunk */
698 ia->ri_memreg_strategy = memreg;
699
Chuck Lever73806c82014-07-29 17:23:25 -0400700 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400701 return 0;
702out2:
703 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400704 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400705out1:
706 return rc;
707}
708
709/*
710 * Clean up/close an IA.
711 * o if event handles and PD have been initialized, free them.
712 * o close the IA
713 */
714void
715rpcrdma_ia_close(struct rpcrdma_ia *ia)
716{
717 int rc;
718
719 dprintk("RPC: %s: entering\n", __func__);
720 if (ia->ri_bind_mem != NULL) {
721 rc = ib_dereg_mr(ia->ri_bind_mem);
722 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
723 __func__, rc);
724 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400725 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
726 if (ia->ri_id->qp)
727 rdma_destroy_qp(ia->ri_id);
728 rdma_destroy_id(ia->ri_id);
729 ia->ri_id = NULL;
730 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400731 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
732 rc = ib_dealloc_pd(ia->ri_pd);
733 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
734 __func__, rc);
735 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400736}
737
738/*
739 * Create unconnected endpoint.
740 */
741int
742rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
743 struct rpcrdma_create_data_internal *cdata)
744{
745 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400746 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400747 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400748
749 rc = ib_query_device(ia->ri_id->device, &devattr);
750 if (rc) {
751 dprintk("RPC: %s: ib_query_device failed %d\n",
752 __func__, rc);
753 return rc;
754 }
755
756 /* check provider's send/recv wr limits */
757 if (cdata->max_requests > devattr.max_qp_wr)
758 cdata->max_requests = devattr.max_qp_wr;
759
760 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
761 ep->rep_attr.qp_context = ep;
762 /* send_cq and recv_cq initialized below */
763 ep->rep_attr.srq = NULL;
764 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
765 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400766 case RPCRDMA_FRMR: {
767 int depth = 7;
768
Tom Tucker15cdc6442010-08-11 12:47:24 -0400769 /* Add room for frmr register and invalidate WRs.
770 * 1. FRMR reg WR for head
771 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400772 * 3. N FRMR reg WRs for pagelist
773 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400774 * 5. FRMR reg WR for tail
775 * 6. FRMR invalidate WR for tail
776 * 7. The RDMA_SEND WR
777 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400778
779 /* Calculate N if the device max FRMR depth is smaller than
780 * RPCRDMA_MAX_DATA_SEGS.
781 */
782 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
783 int delta = RPCRDMA_MAX_DATA_SEGS -
784 ia->ri_max_frmr_depth;
785
786 do {
787 depth += 2; /* FRMR reg + invalidate */
788 delta -= ia->ri_max_frmr_depth;
789 } while (delta > 0);
790
791 }
792 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400793 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400794 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400795 if (!cdata->max_requests)
796 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400797 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
798 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400799 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400800 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400801 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400802 default:
803 break;
804 }
805 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
806 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
807 ep->rep_attr.cap.max_recv_sge = 1;
808 ep->rep_attr.cap.max_inline_data = 0;
809 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
810 ep->rep_attr.qp_type = IB_QPT_RC;
811 ep->rep_attr.port_num = ~0;
812
813 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
814 "iovs: send %d recv %d\n",
815 __func__,
816 ep->rep_attr.cap.max_send_wr,
817 ep->rep_attr.cap.max_recv_wr,
818 ep->rep_attr.cap.max_send_sge,
819 ep->rep_attr.cap.max_recv_sge);
820
821 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400822 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500823 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
824 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
825 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400826 ep->rep_cqinit = 0;
827 INIT_CQCOUNT(ep);
828 ep->rep_ia = ia;
829 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400830 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400831
Chuck Leverfc664482014-05-28 10:33:25 -0400832 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400833 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400834 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400835 if (IS_ERR(sendcq)) {
836 rc = PTR_ERR(sendcq);
837 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400838 __func__, rc);
839 goto out1;
840 }
841
Chuck Leverfc664482014-05-28 10:33:25 -0400842 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400843 if (rc) {
844 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
845 __func__, rc);
846 goto out2;
847 }
848
Chuck Leverfc664482014-05-28 10:33:25 -0400849 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400850 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400851 ep->rep_attr.cap.max_recv_wr + 1, 0);
852 if (IS_ERR(recvcq)) {
853 rc = PTR_ERR(recvcq);
854 dprintk("RPC: %s: failed to create recv CQ: %i\n",
855 __func__, rc);
856 goto out2;
857 }
858
859 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
860 if (rc) {
861 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
862 __func__, rc);
863 ib_destroy_cq(recvcq);
864 goto out2;
865 }
866
867 ep->rep_attr.send_cq = sendcq;
868 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400869
870 /* Initialize cma parameters */
871
872 /* RPC/RDMA does not use private data */
873 ep->rep_remote_cma.private_data = NULL;
874 ep->rep_remote_cma.private_data_len = 0;
875
876 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400877 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400878 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400879 ep->rep_remote_cma.responder_resources = 32;
880 else
881 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400882
883 ep->rep_remote_cma.retry_count = 7;
884 ep->rep_remote_cma.flow_control = 0;
885 ep->rep_remote_cma.rnr_retry_count = 0;
886
887 return 0;
888
889out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400890 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400891 if (err)
892 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
893 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400894out1:
895 return rc;
896}
897
898/*
899 * rpcrdma_ep_destroy
900 *
901 * Disconnect and destroy endpoint. After this, the only
902 * valid operations on the ep are to free it (if dynamically
903 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400904 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400905void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400906rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
907{
908 int rc;
909
910 dprintk("RPC: %s: entering, connected is %d\n",
911 __func__, ep->rep_connected);
912
Chuck Lever254f91e2014-05-28 10:32:17 -0400913 cancel_delayed_work_sync(&ep->rep_connect_worker);
914
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400915 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400916 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400917 rdma_destroy_qp(ia->ri_id);
918 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400919 }
920
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400921 /* padding - could be done in rpcrdma_buffer_destroy... */
922 if (ep->rep_pad_mr) {
923 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
924 ep->rep_pad_mr = NULL;
925 }
926
Chuck Leverfc664482014-05-28 10:33:25 -0400927 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
928 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
929 if (rc)
930 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
931 __func__, rc);
932
933 rpcrdma_clean_cq(ep->rep_attr.send_cq);
934 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400935 if (rc)
936 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
937 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400938}
939
940/*
941 * Connect unconnected endpoint.
942 */
943int
944rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
945{
Chuck Lever73806c82014-07-29 17:23:25 -0400946 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400947 int rc = 0;
948 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400949
Tom Talpeyc0555512008-10-10 11:32:45 -0400950 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400951 struct rpcrdma_xprt *xprt;
952retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400953 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400954
955 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400956 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400957
Chuck Lever467c9672014-11-08 20:14:29 -0500958 switch (ia->ri_memreg_strategy) {
959 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400960 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500961 break;
962 case RPCRDMA_MTHCAFMR:
963 rpcrdma_reset_fmrs(ia);
964 break;
965 case RPCRDMA_ALLPHYSICAL:
966 break;
967 default:
968 rc = -EIO;
969 goto out;
970 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400971
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400972 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
973 id = rpcrdma_create_id(xprt, ia,
974 (struct sockaddr *)&xprt->rx_data.addr);
975 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400976 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400977 goto out;
978 }
979 /* TEMP TEMP TEMP - fail if new device:
980 * Deregister/remarshal *all* requests!
981 * Close and recreate adapter, pd, etc!
982 * Re-determine all attributes still sane!
983 * More stuff I haven't thought of!
984 * Rrrgh!
985 */
986 if (ia->ri_id->device != id->device) {
987 printk("RPC: %s: can't reconnect on "
988 "different device!\n", __func__);
989 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400990 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400991 goto out;
992 }
993 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400994 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
995 if (rc) {
996 dprintk("RPC: %s: rdma_create_qp failed %i\n",
997 __func__, rc);
998 rdma_destroy_id(id);
999 rc = -ENETUNREACH;
1000 goto out;
1001 }
Chuck Lever73806c82014-07-29 17:23:25 -04001002
1003 write_lock(&ia->ri_qplock);
1004 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001005 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -04001006 write_unlock(&ia->ri_qplock);
1007
1008 rdma_destroy_qp(old);
1009 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -04001010 } else {
1011 dprintk("RPC: %s: connecting...\n", __func__);
1012 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
1013 if (rc) {
1014 dprintk("RPC: %s: rdma_create_qp failed %i\n",
1015 __func__, rc);
1016 /* do not update ep->rep_connected */
1017 return -ENETUNREACH;
1018 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001019 }
1020
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001021 ep->rep_connected = 0;
1022
1023 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
1024 if (rc) {
1025 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1026 __func__, rc);
1027 goto out;
1028 }
1029
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001030 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1031
1032 /*
1033 * Check state. A non-peer reject indicates no listener
1034 * (ECONNREFUSED), which may be a transient state. All
1035 * others indicate a transport condition which has already
1036 * undergone a best-effort.
1037 */
Joe Perchesf64f9e72009-11-29 16:55:45 -08001038 if (ep->rep_connected == -ECONNREFUSED &&
1039 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001040 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1041 goto retry;
1042 }
1043 if (ep->rep_connected <= 0) {
1044 /* Sometimes, the only way to reliably connect to remote
1045 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001046 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1047 (ep->rep_remote_cma.responder_resources == 0 ||
1048 ep->rep_remote_cma.initiator_depth !=
1049 ep->rep_remote_cma.responder_resources)) {
1050 if (ep->rep_remote_cma.responder_resources == 0)
1051 ep->rep_remote_cma.responder_resources = 1;
1052 ep->rep_remote_cma.initiator_depth =
1053 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001054 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001055 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001056 rc = ep->rep_connected;
1057 } else {
1058 dprintk("RPC: %s: connected\n", __func__);
1059 }
1060
1061out:
1062 if (rc)
1063 ep->rep_connected = rc;
1064 return rc;
1065}
1066
1067/*
1068 * rpcrdma_ep_disconnect
1069 *
1070 * This is separate from destroy to facilitate the ability
1071 * to reconnect without recreating the endpoint.
1072 *
1073 * This call is not reentrant, and must not be made in parallel
1074 * on the same endpoint.
1075 */
Chuck Lever282191c2014-07-29 17:25:55 -04001076void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001077rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1078{
1079 int rc;
1080
Chuck Levera7bc2112014-07-29 17:23:52 -04001081 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001082 rc = rdma_disconnect(ia->ri_id);
1083 if (!rc) {
1084 /* returns without wait if not connected */
1085 wait_event_interruptible(ep->rep_connect_wait,
1086 ep->rep_connected != 1);
1087 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1088 (ep->rep_connected == 1) ? "still " : "dis");
1089 } else {
1090 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1091 ep->rep_connected = rc;
1092 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001093}
1094
Chuck Lever2e845222014-07-29 17:25:38 -04001095static int
1096rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1097{
1098 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1099 struct ib_fmr_attr fmr_attr = {
1100 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1101 .max_maps = 1,
1102 .page_shift = PAGE_SHIFT
1103 };
1104 struct rpcrdma_mw *r;
1105 int i, rc;
1106
1107 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1108 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1109
1110 while (i--) {
1111 r = kzalloc(sizeof(*r), GFP_KERNEL);
1112 if (r == NULL)
1113 return -ENOMEM;
1114
1115 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1116 if (IS_ERR(r->r.fmr)) {
1117 rc = PTR_ERR(r->r.fmr);
1118 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1119 __func__, rc);
1120 goto out_free;
1121 }
1122
1123 list_add(&r->mw_list, &buf->rb_mws);
1124 list_add(&r->mw_all, &buf->rb_all);
1125 }
1126 return 0;
1127
1128out_free:
1129 kfree(r);
1130 return rc;
1131}
1132
1133static int
1134rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1135{
1136 struct rpcrdma_frmr *f;
1137 struct rpcrdma_mw *r;
1138 int i, rc;
1139
1140 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1141 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1142
1143 while (i--) {
1144 r = kzalloc(sizeof(*r), GFP_KERNEL);
1145 if (r == NULL)
1146 return -ENOMEM;
1147 f = &r->r.frmr;
1148
1149 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1150 ia->ri_max_frmr_depth);
1151 if (IS_ERR(f->fr_mr)) {
1152 rc = PTR_ERR(f->fr_mr);
1153 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1154 "failed %i\n", __func__, rc);
1155 goto out_free;
1156 }
1157
1158 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1159 ia->ri_max_frmr_depth);
1160 if (IS_ERR(f->fr_pgl)) {
1161 rc = PTR_ERR(f->fr_pgl);
1162 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1163 "failed %i\n", __func__, rc);
1164
1165 ib_dereg_mr(f->fr_mr);
1166 goto out_free;
1167 }
1168
1169 list_add(&r->mw_list, &buf->rb_mws);
1170 list_add(&r->mw_all, &buf->rb_all);
1171 }
1172
1173 return 0;
1174
1175out_free:
1176 kfree(r);
1177 return rc;
1178}
1179
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001180int
1181rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1182 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1183{
1184 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001185 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001186 int i, rc;
1187
1188 buf->rb_max_requests = cdata->max_requests;
1189 spin_lock_init(&buf->rb_lock);
1190 atomic_set(&buf->rb_credits, 1);
1191
1192 /* Need to allocate:
1193 * 1. arrays for send and recv pointers
1194 * 2. arrays of struct rpcrdma_req to fill in pointers
1195 * 3. array of struct rpcrdma_rep for replies
1196 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001197 * Send/recv buffers in req/rep need to be registered
1198 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001199 len = buf->rb_max_requests *
1200 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1201 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001202
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001203 p = kzalloc(len, GFP_KERNEL);
1204 if (p == NULL) {
1205 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1206 __func__, len);
1207 rc = -ENOMEM;
1208 goto out;
1209 }
1210 buf->rb_pool = p; /* for freeing it later */
1211
1212 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1213 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1214 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1215 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1216
1217 /*
1218 * Register the zeroed pad buffer, if any.
1219 */
1220 if (cdata->padding) {
1221 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1222 &ep->rep_pad_mr, &ep->rep_pad);
1223 if (rc)
1224 goto out;
1225 }
1226 p += cdata->padding;
1227
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001228 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001229 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001230 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001231 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001232 rc = rpcrdma_init_frmrs(ia, buf);
1233 if (rc)
1234 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001235 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001236 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001237 rc = rpcrdma_init_fmrs(ia, buf);
1238 if (rc)
1239 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001240 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001241 default:
1242 break;
1243 }
1244
1245 /*
1246 * Allocate/init the request/reply buffers. Doing this
1247 * using kmalloc for now -- one for each buf.
1248 */
Chuck Lever65866f82014-05-28 10:33:59 -04001249 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1250 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1251 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1252 __func__, wlen, rlen);
1253
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001254 for (i = 0; i < buf->rb_max_requests; i++) {
1255 struct rpcrdma_req *req;
1256 struct rpcrdma_rep *rep;
1257
Chuck Lever65866f82014-05-28 10:33:59 -04001258 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001259 if (req == NULL) {
1260 dprintk("RPC: %s: request buffer %d alloc"
1261 " failed\n", __func__, i);
1262 rc = -ENOMEM;
1263 goto out;
1264 }
1265 memset(req, 0, sizeof(struct rpcrdma_req));
1266 buf->rb_send_bufs[i] = req;
1267 buf->rb_send_bufs[i]->rl_buffer = buf;
1268
1269 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001270 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001271 &buf->rb_send_bufs[i]->rl_handle,
1272 &buf->rb_send_bufs[i]->rl_iov);
1273 if (rc)
1274 goto out;
1275
Chuck Lever65866f82014-05-28 10:33:59 -04001276 buf->rb_send_bufs[i]->rl_size = wlen -
1277 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001278
Chuck Lever65866f82014-05-28 10:33:59 -04001279 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001280 if (rep == NULL) {
1281 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1282 __func__, i);
1283 rc = -ENOMEM;
1284 goto out;
1285 }
1286 memset(rep, 0, sizeof(struct rpcrdma_rep));
1287 buf->rb_recv_bufs[i] = rep;
1288 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001289
1290 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001291 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001292 &buf->rb_recv_bufs[i]->rr_handle,
1293 &buf->rb_recv_bufs[i]->rr_iov);
1294 if (rc)
1295 goto out;
1296
1297 }
1298 dprintk("RPC: %s: max_requests %d\n",
1299 __func__, buf->rb_max_requests);
1300 /* done */
1301 return 0;
1302out:
1303 rpcrdma_buffer_destroy(buf);
1304 return rc;
1305}
1306
Chuck Lever2e845222014-07-29 17:25:38 -04001307static void
1308rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1309{
1310 struct rpcrdma_mw *r;
1311 int rc;
1312
1313 while (!list_empty(&buf->rb_all)) {
1314 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1315 list_del(&r->mw_all);
1316 list_del(&r->mw_list);
1317
1318 rc = ib_dealloc_fmr(r->r.fmr);
1319 if (rc)
1320 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1321 __func__, rc);
1322
1323 kfree(r);
1324 }
1325}
1326
1327static void
1328rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1329{
1330 struct rpcrdma_mw *r;
1331 int rc;
1332
1333 while (!list_empty(&buf->rb_all)) {
1334 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1335 list_del(&r->mw_all);
1336 list_del(&r->mw_list);
1337
1338 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1339 if (rc)
1340 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1341 __func__, rc);
1342 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1343
1344 kfree(r);
1345 }
1346}
1347
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001348void
1349rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1350{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001351 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001352 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001353
1354 /* clean up in reverse order from create
1355 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001356 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001357 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001358 */
1359 dprintk("RPC: %s: entering\n", __func__);
1360
1361 for (i = 0; i < buf->rb_max_requests; i++) {
1362 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1363 rpcrdma_deregister_internal(ia,
1364 buf->rb_recv_bufs[i]->rr_handle,
1365 &buf->rb_recv_bufs[i]->rr_iov);
1366 kfree(buf->rb_recv_bufs[i]);
1367 }
1368 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001369 rpcrdma_deregister_internal(ia,
1370 buf->rb_send_bufs[i]->rl_handle,
1371 &buf->rb_send_bufs[i]->rl_iov);
1372 kfree(buf->rb_send_bufs[i]);
1373 }
1374 }
1375
Chuck Lever2e845222014-07-29 17:25:38 -04001376 switch (ia->ri_memreg_strategy) {
1377 case RPCRDMA_FRMR:
1378 rpcrdma_destroy_frmrs(buf);
1379 break;
1380 case RPCRDMA_MTHCAFMR:
1381 rpcrdma_destroy_fmrs(buf);
1382 break;
1383 default:
1384 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001385 }
1386
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001387 kfree(buf->rb_pool);
1388}
1389
Chuck Lever467c9672014-11-08 20:14:29 -05001390/* After a disconnect, unmap all FMRs.
1391 *
1392 * This is invoked only in the transport connect worker in order
1393 * to serialize with rpcrdma_register_fmr_external().
1394 */
1395static void
1396rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1397{
1398 struct rpcrdma_xprt *r_xprt =
1399 container_of(ia, struct rpcrdma_xprt, rx_ia);
1400 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1401 struct list_head *pos;
1402 struct rpcrdma_mw *r;
1403 LIST_HEAD(l);
1404 int rc;
1405
1406 list_for_each(pos, &buf->rb_all) {
1407 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1408
1409 INIT_LIST_HEAD(&l);
1410 list_add(&r->r.fmr->list, &l);
1411 rc = ib_unmap_fmr(&l);
1412 if (rc)
1413 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1414 __func__, rc);
1415 }
1416}
1417
Chuck Lever9f9d8022014-07-29 17:24:45 -04001418/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1419 * an unusable state. Find FRMRs in this state and dereg / reg
1420 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1421 * also torn down.
1422 *
1423 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1424 *
1425 * This is invoked only in the transport connect worker in order
1426 * to serialize with rpcrdma_register_frmr_external().
1427 */
1428static void
1429rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1430{
1431 struct rpcrdma_xprt *r_xprt =
1432 container_of(ia, struct rpcrdma_xprt, rx_ia);
1433 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1434 struct list_head *pos;
1435 struct rpcrdma_mw *r;
1436 int rc;
1437
1438 list_for_each(pos, &buf->rb_all) {
1439 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1440
1441 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1442 continue;
1443
1444 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1445 if (rc)
1446 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1447 __func__, rc);
1448 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1449
1450 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1451 ia->ri_max_frmr_depth);
1452 if (IS_ERR(r->r.frmr.fr_mr)) {
1453 rc = PTR_ERR(r->r.frmr.fr_mr);
1454 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1455 " failed %i\n", __func__, rc);
1456 continue;
1457 }
1458 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1459 ia->ri_id->device,
1460 ia->ri_max_frmr_depth);
1461 if (IS_ERR(r->r.frmr.fr_pgl)) {
1462 rc = PTR_ERR(r->r.frmr.fr_pgl);
1463 dprintk("RPC: %s: "
1464 "ib_alloc_fast_reg_page_list "
1465 "failed %i\n", __func__, rc);
1466
1467 ib_dereg_mr(r->r.frmr.fr_mr);
1468 continue;
1469 }
1470 r->r.frmr.fr_state = FRMR_IS_INVALID;
1471 }
1472}
1473
Chuck Leverc2922c02014-07-29 17:24:36 -04001474/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1475 * some req segments uninitialized.
1476 */
1477static void
1478rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1479{
1480 if (*mw) {
1481 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1482 *mw = NULL;
1483 }
1484}
1485
1486/* Cycle mw's back in reverse order, and "spin" them.
1487 * This delays and scrambles reuse as much as possible.
1488 */
1489static void
1490rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1491{
1492 struct rpcrdma_mr_seg *seg = req->rl_segments;
1493 struct rpcrdma_mr_seg *seg1 = seg;
1494 int i;
1495
1496 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1497 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
1498 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
1499}
1500
1501static void
1502rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1503{
1504 buf->rb_send_bufs[--buf->rb_send_index] = req;
1505 req->rl_niovs = 0;
1506 if (req->rl_reply) {
1507 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1508 req->rl_reply->rr_func = NULL;
1509 req->rl_reply = NULL;
1510 }
1511}
1512
Chuck Leverddb6beb2014-07-29 17:24:54 -04001513/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1514 * Redo only the ib_post_send().
1515 */
1516static void
1517rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1518{
1519 struct rpcrdma_xprt *r_xprt =
1520 container_of(ia, struct rpcrdma_xprt, rx_ia);
1521 struct ib_send_wr invalidate_wr, *bad_wr;
1522 int rc;
1523
1524 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1525
1526 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001527 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001528
1529 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1530 invalidate_wr.wr_id = (unsigned long)(void *)r;
1531 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001532 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1533 DECR_CQCOUNT(&r_xprt->rx_ep);
1534
1535 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1536 __func__, r, r->r.frmr.fr_mr->rkey);
1537
1538 read_lock(&ia->ri_qplock);
1539 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1540 read_unlock(&ia->ri_qplock);
1541 if (rc) {
1542 /* Force rpcrdma_buffer_get() to retry */
1543 r->r.frmr.fr_state = FRMR_IS_STALE;
1544 dprintk("RPC: %s: ib_post_send failed, %i\n",
1545 __func__, rc);
1546 }
1547}
1548
1549static void
1550rpcrdma_retry_flushed_linv(struct list_head *stale,
1551 struct rpcrdma_buffer *buf)
1552{
1553 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1554 struct list_head *pos;
1555 struct rpcrdma_mw *r;
1556 unsigned long flags;
1557
1558 list_for_each(pos, stale) {
1559 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1560 rpcrdma_retry_local_inv(r, ia);
1561 }
1562
1563 spin_lock_irqsave(&buf->rb_lock, flags);
1564 list_splice_tail(stale, &buf->rb_mws);
1565 spin_unlock_irqrestore(&buf->rb_lock, flags);
1566}
1567
Chuck Leverc2922c02014-07-29 17:24:36 -04001568static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001569rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1570 struct list_head *stale)
1571{
1572 struct rpcrdma_mw *r;
1573 int i;
1574
1575 i = RPCRDMA_MAX_SEGS - 1;
1576 while (!list_empty(&buf->rb_mws)) {
1577 r = list_entry(buf->rb_mws.next,
1578 struct rpcrdma_mw, mw_list);
1579 list_del(&r->mw_list);
1580 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1581 list_add(&r->mw_list, stale);
1582 continue;
1583 }
1584 req->rl_segments[i].mr_chunk.rl_mw = r;
1585 if (unlikely(i-- == 0))
1586 return req; /* Success */
1587 }
1588
1589 /* Not enough entries on rb_mws for this req */
1590 rpcrdma_buffer_put_sendbuf(req, buf);
1591 rpcrdma_buffer_put_mrs(req, buf);
1592 return NULL;
1593}
1594
1595static struct rpcrdma_req *
1596rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001597{
1598 struct rpcrdma_mw *r;
1599 int i;
1600
1601 i = RPCRDMA_MAX_SEGS - 1;
1602 while (!list_empty(&buf->rb_mws)) {
1603 r = list_entry(buf->rb_mws.next,
1604 struct rpcrdma_mw, mw_list);
1605 list_del(&r->mw_list);
1606 req->rl_segments[i].mr_chunk.rl_mw = r;
1607 if (unlikely(i-- == 0))
1608 return req; /* Success */
1609 }
1610
1611 /* Not enough entries on rb_mws for this req */
1612 rpcrdma_buffer_put_sendbuf(req, buf);
1613 rpcrdma_buffer_put_mrs(req, buf);
1614 return NULL;
1615}
1616
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001617/*
1618 * Get a set of request/reply buffers.
1619 *
1620 * Reply buffer (if needed) is attached to send buffer upon return.
1621 * Rule:
1622 * rb_send_index and rb_recv_index MUST always be pointing to the
1623 * *next* available buffer (non-NULL). They are incremented after
1624 * removing buffers, and decremented *before* returning them.
1625 */
1626struct rpcrdma_req *
1627rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1628{
Chuck Leverc2922c02014-07-29 17:24:36 -04001629 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001630 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001631 struct rpcrdma_req *req;
1632 unsigned long flags;
1633
1634 spin_lock_irqsave(&buffers->rb_lock, flags);
1635 if (buffers->rb_send_index == buffers->rb_max_requests) {
1636 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1637 dprintk("RPC: %s: out of request buffers\n", __func__);
1638 return ((struct rpcrdma_req *)NULL);
1639 }
1640
1641 req = buffers->rb_send_bufs[buffers->rb_send_index];
1642 if (buffers->rb_send_index < buffers->rb_recv_index) {
1643 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1644 __func__,
1645 buffers->rb_recv_index - buffers->rb_send_index);
1646 req->rl_reply = NULL;
1647 } else {
1648 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1649 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1650 }
1651 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001652
1653 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001654 switch (ia->ri_memreg_strategy) {
1655 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001656 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1657 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001658 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001659 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001660 break;
1661 default:
1662 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001663 }
1664 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001665 if (!list_empty(&stale))
1666 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001667 return req;
1668}
1669
1670/*
1671 * Put request/reply buffers back into pool.
1672 * Pre-decrement counter/array index.
1673 */
1674void
1675rpcrdma_buffer_put(struct rpcrdma_req *req)
1676{
1677 struct rpcrdma_buffer *buffers = req->rl_buffer;
1678 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001679 unsigned long flags;
1680
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001681 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001682 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001683 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001684 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001685 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001686 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001687 break;
1688 default:
1689 break;
1690 }
1691 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1692}
1693
1694/*
1695 * Recover reply buffers from pool.
1696 * This happens when recovering from error conditions.
1697 * Post-increment counter/array index.
1698 */
1699void
1700rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1701{
1702 struct rpcrdma_buffer *buffers = req->rl_buffer;
1703 unsigned long flags;
1704
1705 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1706 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1707 spin_lock_irqsave(&buffers->rb_lock, flags);
1708 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1709 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1710 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1711 }
1712 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1713}
1714
1715/*
1716 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001717 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001718 */
1719void
1720rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1721{
1722 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1723 unsigned long flags;
1724
1725 rep->rr_func = NULL;
1726 spin_lock_irqsave(&buffers->rb_lock, flags);
1727 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1728 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1729}
1730
1731/*
1732 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1733 */
1734
1735int
1736rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1737 struct ib_mr **mrp, struct ib_sge *iov)
1738{
1739 struct ib_phys_buf ipb;
1740 struct ib_mr *mr;
1741 int rc;
1742
1743 /*
1744 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1745 */
1746 iov->addr = ib_dma_map_single(ia->ri_id->device,
1747 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001748 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1749 return -ENOMEM;
1750
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001751 iov->length = len;
1752
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001753 if (ia->ri_have_dma_lkey) {
1754 *mrp = NULL;
1755 iov->lkey = ia->ri_dma_lkey;
1756 return 0;
1757 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001758 *mrp = NULL;
1759 iov->lkey = ia->ri_bind_mem->lkey;
1760 return 0;
1761 }
1762
1763 ipb.addr = iov->addr;
1764 ipb.size = iov->length;
1765 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1766 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1767
1768 dprintk("RPC: %s: phys convert: 0x%llx "
1769 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001770 __func__, (unsigned long long)ipb.addr,
1771 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001772
1773 if (IS_ERR(mr)) {
1774 *mrp = NULL;
1775 rc = PTR_ERR(mr);
1776 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1777 } else {
1778 *mrp = mr;
1779 iov->lkey = mr->lkey;
1780 rc = 0;
1781 }
1782
1783 return rc;
1784}
1785
1786int
1787rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1788 struct ib_mr *mr, struct ib_sge *iov)
1789{
1790 int rc;
1791
1792 ib_dma_unmap_single(ia->ri_id->device,
1793 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1794
1795 if (NULL == mr)
1796 return 0;
1797
1798 rc = ib_dereg_mr(mr);
1799 if (rc)
1800 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1801 return rc;
1802}
1803
1804/*
1805 * Wrappers for chunk registration, shared by read/write chunk code.
1806 */
1807
1808static void
1809rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1810{
1811 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1812 seg->mr_dmalen = seg->mr_len;
1813 if (seg->mr_page)
1814 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1815 seg->mr_page, offset_in_page(seg->mr_offset),
1816 seg->mr_dmalen, seg->mr_dir);
1817 else
1818 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1819 seg->mr_offset,
1820 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001821 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1822 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1823 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001824 (unsigned long long)seg->mr_dma,
1825 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001826 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001827}
1828
1829static void
1830rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1831{
1832 if (seg->mr_page)
1833 ib_dma_unmap_page(ia->ri_id->device,
1834 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1835 else
1836 ib_dma_unmap_single(ia->ri_id->device,
1837 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1838}
1839
Tom Talpey8d4ba032008-10-09 14:59:49 -04001840static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001841rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1842 int *nsegs, int writing, struct rpcrdma_ia *ia,
1843 struct rpcrdma_xprt *r_xprt)
1844{
1845 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001846 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
1847 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1848 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001849 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001850 u8 key;
1851 int len, pageoff;
1852 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001853 int seg_len;
1854 u64 pa;
1855 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001856
1857 pageoff = offset_in_page(seg1->mr_offset);
1858 seg1->mr_offset -= pageoff; /* start of page */
1859 seg1->mr_len += pageoff;
1860 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001861 if (*nsegs > ia->ri_max_frmr_depth)
1862 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001863 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001864 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001865 pa = seg->mr_dma;
1866 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001867 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001868 pa += PAGE_SIZE;
1869 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001870 len += seg->mr_len;
1871 ++seg;
1872 ++i;
1873 /* Check for holes */
1874 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1875 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1876 break;
1877 }
1878 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001879 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001880
Chuck Lever05055722014-07-29 17:25:12 -04001881 frmr->fr_state = FRMR_IS_VALID;
1882
Chuck Leverf590e872014-07-29 17:25:29 -04001883 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1884 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1885 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1886 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1887 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1888 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1889 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1890 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1891 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001892 rc = -EIO;
1893 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001894 }
1895
1896 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001897 key = (u8)(mr->rkey & 0x000000FF);
1898 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001899
Chuck Leverf590e872014-07-29 17:25:29 -04001900 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001901 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1902 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001903 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001904 DECR_CQCOUNT(&r_xprt->rx_ep);
1905
Chuck Leverf590e872014-07-29 17:25:29 -04001906 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001907 if (rc) {
1908 dprintk("RPC: %s: failed ib_post_send for register,"
1909 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001910 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001911 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001912 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001913 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001914 seg1->mr_base = seg1->mr_dma + pageoff;
1915 seg1->mr_nsegs = i;
1916 seg1->mr_len = len;
1917 }
1918 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001919 return 0;
1920out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001921 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001922 while (i--)
1923 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001924 return rc;
1925}
1926
1927static int
1928rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1929 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1930{
1931 struct rpcrdma_mr_seg *seg1 = seg;
1932 struct ib_send_wr invalidate_wr, *bad_wr;
1933 int rc;
1934
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001935 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
1936
Tom Talpey3197d3092008-10-09 15:00:20 -04001937 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001938 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001939 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Talpey3197d3092008-10-09 15:00:20 -04001940 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1941 DECR_CQCOUNT(&r_xprt->rx_ep);
1942
Chuck Lever73806c82014-07-29 17:23:25 -04001943 read_lock(&ia->ri_qplock);
1944 while (seg1->mr_nsegs--)
1945 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001946 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001947 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001948 if (rc) {
1949 /* Force rpcrdma_buffer_get() to retry */
1950 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001951 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1952 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001953 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001954 return rc;
1955}
1956
1957static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001958rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1959 int *nsegs, int writing, struct rpcrdma_ia *ia)
1960{
1961 struct rpcrdma_mr_seg *seg1 = seg;
1962 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1963 int len, pageoff, i, rc;
1964
1965 pageoff = offset_in_page(seg1->mr_offset);
1966 seg1->mr_offset -= pageoff; /* start of page */
1967 seg1->mr_len += pageoff;
1968 len = -pageoff;
1969 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1970 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1971 for (i = 0; i < *nsegs;) {
1972 rpcrdma_map_one(ia, seg, writing);
1973 physaddrs[i] = seg->mr_dma;
1974 len += seg->mr_len;
1975 ++seg;
1976 ++i;
1977 /* Check for holes */
1978 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1979 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1980 break;
1981 }
1982 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1983 physaddrs, i, seg1->mr_dma);
1984 if (rc) {
1985 dprintk("RPC: %s: failed ib_map_phys_fmr "
1986 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1987 len, (unsigned long long)seg1->mr_dma,
1988 pageoff, i, rc);
1989 while (i--)
1990 rpcrdma_unmap_one(ia, --seg);
1991 } else {
1992 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1993 seg1->mr_base = seg1->mr_dma + pageoff;
1994 seg1->mr_nsegs = i;
1995 seg1->mr_len = len;
1996 }
1997 *nsegs = i;
1998 return rc;
1999}
2000
2001static int
2002rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
2003 struct rpcrdma_ia *ia)
2004{
2005 struct rpcrdma_mr_seg *seg1 = seg;
2006 LIST_HEAD(l);
2007 int rc;
2008
2009 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
2010 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04002011 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002012 while (seg1->mr_nsegs--)
2013 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04002014 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002015 if (rc)
2016 dprintk("RPC: %s: failed ib_unmap_fmr,"
2017 " status %i\n", __func__, rc);
2018 return rc;
2019}
2020
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002021int
2022rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
2023 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
2024{
2025 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002026 int rc = 0;
2027
2028 switch (ia->ri_memreg_strategy) {
2029
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002030 case RPCRDMA_ALLPHYSICAL:
2031 rpcrdma_map_one(ia, seg, writing);
2032 seg->mr_rkey = ia->ri_bind_mem->rkey;
2033 seg->mr_base = seg->mr_dma;
2034 seg->mr_nsegs = 1;
2035 nsegs = 1;
2036 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002037
Tom Talpey3197d3092008-10-09 15:00:20 -04002038 /* Registration using frmr registration */
2039 case RPCRDMA_FRMR:
2040 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
2041 break;
2042
Tom Talpey8d4ba032008-10-09 14:59:49 -04002043 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002044 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002045 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002046 break;
2047
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002048 default:
Chuck Lever92b98362014-11-08 20:14:12 -05002049 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002050 }
2051 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05002052 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002053
2054 return nsegs;
2055}
2056
2057int
2058rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04002059 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002060{
2061 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002062 int nsegs = seg->mr_nsegs, rc;
2063
2064 switch (ia->ri_memreg_strategy) {
2065
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002066 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04002067 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002068 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04002069 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002070 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002071
Tom Talpey3197d3092008-10-09 15:00:20 -04002072 case RPCRDMA_FRMR:
2073 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2074 break;
2075
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002076 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002077 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002078 break;
2079
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002080 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002081 break;
2082 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002083 return nsegs;
2084}
2085
2086/*
2087 * Prepost any receive buffer, then post send.
2088 *
2089 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2090 */
2091int
2092rpcrdma_ep_post(struct rpcrdma_ia *ia,
2093 struct rpcrdma_ep *ep,
2094 struct rpcrdma_req *req)
2095{
2096 struct ib_send_wr send_wr, *send_wr_fail;
2097 struct rpcrdma_rep *rep = req->rl_reply;
2098 int rc;
2099
2100 if (rep) {
2101 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2102 if (rc)
2103 goto out;
2104 req->rl_reply = NULL;
2105 }
2106
2107 send_wr.next = NULL;
2108 send_wr.wr_id = 0ULL; /* no send cookie */
2109 send_wr.sg_list = req->rl_send_iov;
2110 send_wr.num_sge = req->rl_niovs;
2111 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002112 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2113 ib_dma_sync_single_for_device(ia->ri_id->device,
2114 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2115 DMA_TO_DEVICE);
2116 ib_dma_sync_single_for_device(ia->ri_id->device,
2117 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2118 DMA_TO_DEVICE);
2119 ib_dma_sync_single_for_device(ia->ri_id->device,
2120 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2121 DMA_TO_DEVICE);
2122
2123 if (DECR_CQCOUNT(ep) > 0)
2124 send_wr.send_flags = 0;
2125 else { /* Provider must take a send completion every now and then */
2126 INIT_CQCOUNT(ep);
2127 send_wr.send_flags = IB_SEND_SIGNALED;
2128 }
2129
2130 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2131 if (rc)
2132 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2133 rc);
2134out:
2135 return rc;
2136}
2137
2138/*
2139 * (Re)post a receive buffer.
2140 */
2141int
2142rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2143 struct rpcrdma_ep *ep,
2144 struct rpcrdma_rep *rep)
2145{
2146 struct ib_recv_wr recv_wr, *recv_wr_fail;
2147 int rc;
2148
2149 recv_wr.next = NULL;
2150 recv_wr.wr_id = (u64) (unsigned long) rep;
2151 recv_wr.sg_list = &rep->rr_iov;
2152 recv_wr.num_sge = 1;
2153
2154 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2155 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2156
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002157 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2158
2159 if (rc)
2160 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2161 rc);
2162 return rc;
2163}
Chuck Lever43e95982014-07-29 17:23:34 -04002164
2165/* Physical mapping means one Read/Write list entry per-page.
2166 * All list entries must fit within an inline buffer
2167 *
2168 * NB: The server must return a Write list for NFS READ,
2169 * which has the same constraint. Factor in the inline
2170 * rsize as well.
2171 */
2172static size_t
2173rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2174{
2175 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2176 unsigned int inline_size, pages;
2177
2178 inline_size = min_t(unsigned int,
2179 cdata->inline_wsize, cdata->inline_rsize);
2180 inline_size -= RPCRDMA_HDRLEN_MIN;
2181 pages = inline_size / sizeof(struct rpcrdma_segment);
2182 return pages << PAGE_SHIFT;
2183}
2184
2185static size_t
2186rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2187{
2188 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2189}
2190
2191size_t
2192rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2193{
2194 size_t result;
2195
2196 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2197 case RPCRDMA_ALLPHYSICAL:
2198 result = rpcrdma_physical_max_payload(r_xprt);
2199 break;
2200 default:
2201 result = rpcrdma_mr_max_payload(r_xprt);
2202 }
2203 return result;
2204}