blob: 1000f637edeeb02bb18a389ab94510a6c1473ea5 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
Jeff Laytonf895b252014-11-17 16:58:04 -050060#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040061# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
Chuck Lever9f9d8022014-07-29 17:24:45 -040064static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050065static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040066
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040067/*
68 * internal functions
69 */
70
71/*
72 * handle replies in tasklet context, using a single, global list
73 * rdma tasklet function -- just turn around and call the func
74 * for all replies on the list
75 */
76
77static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
78static LIST_HEAD(rpcrdma_tasklets_g);
79
80static void
81rpcrdma_run_tasklet(unsigned long data)
82{
83 struct rpcrdma_rep *rep;
84 void (*func)(struct rpcrdma_rep *);
85 unsigned long flags;
86
87 data = data;
88 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
89 while (!list_empty(&rpcrdma_tasklets_g)) {
90 rep = list_entry(rpcrdma_tasklets_g.next,
91 struct rpcrdma_rep, rr_list);
92 list_del(&rep->rr_list);
93 func = rep->rr_func;
94 rep->rr_func = NULL;
95 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
96
97 if (func)
98 func(rep);
99 else
100 rpcrdma_recv_buffer_put(rep);
101
102 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
103 }
104 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
105}
106
107static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
108
Chuck Lever7ff11de2014-11-08 20:15:01 -0500109static const char * const async_event[] = {
110 "CQ error",
111 "QP fatal error",
112 "QP request error",
113 "QP access error",
114 "communication established",
115 "send queue drained",
116 "path migration successful",
117 "path mig error",
118 "device fatal error",
119 "port active",
120 "port error",
121 "LID change",
122 "P_key change",
123 "SM change",
124 "SRQ error",
125 "SRQ limit reached",
126 "last WQE reached",
127 "client reregister",
128 "GID change",
129};
130
131#define ASYNC_MSG(status) \
132 ((status) < ARRAY_SIZE(async_event) ? \
133 async_event[(status)] : "unknown async error")
134
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400135static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500136rpcrdma_schedule_tasklet(struct list_head *sched_list)
137{
138 unsigned long flags;
139
140 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
141 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
142 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
143 tasklet_schedule(&rpcrdma_tasklet_g);
144}
145
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400146static void
147rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
148{
149 struct rpcrdma_ep *ep = context;
150
Chuck Lever7ff11de2014-11-08 20:15:01 -0500151 pr_err("RPC: %s: %s on device %s ep %p\n",
152 __func__, ASYNC_MSG(event->event),
153 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400154 if (ep->rep_connected == 1) {
155 ep->rep_connected = -EIO;
156 ep->rep_func(ep);
157 wake_up_all(&ep->rep_connect_wait);
158 }
159}
160
161static void
162rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
163{
164 struct rpcrdma_ep *ep = context;
165
Chuck Lever7ff11de2014-11-08 20:15:01 -0500166 pr_err("RPC: %s: %s on device %s ep %p\n",
167 __func__, ASYNC_MSG(event->event),
168 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400169 if (ep->rep_connected == 1) {
170 ep->rep_connected = -EIO;
171 ep->rep_func(ep);
172 wake_up_all(&ep->rep_connect_wait);
173 }
174}
175
Chuck Lever85024272015-01-21 11:02:04 -0500176static const char * const wc_status[] = {
177 "success",
178 "local length error",
179 "local QP operation error",
180 "local EE context operation error",
181 "local protection error",
182 "WR flushed",
183 "memory management operation error",
184 "bad response error",
185 "local access error",
186 "remote invalid request error",
187 "remote access error",
188 "remote operation error",
189 "transport retry counter exceeded",
190 "RNR retrycounter exceeded",
191 "local RDD violation error",
192 "remove invalid RD request",
193 "operation aborted",
194 "invalid EE context number",
195 "invalid EE context state",
196 "fatal error",
197 "response timeout error",
198 "general error",
199};
200
201#define COMPLETION_MSG(status) \
202 ((status) < ARRAY_SIZE(wc_status) ? \
203 wc_status[(status)] : "unexpected completion error")
204
Chuck Leverfc664482014-05-28 10:33:25 -0400205static void
206rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400207{
Chuck Lever85024272015-01-21 11:02:04 -0500208 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400209 return;
Chuck Lever85024272015-01-21 11:02:04 -0500210
211 /* WARNING: Only wr_id and status are reliable at this point */
212 if (wc->wr_id == 0ULL) {
213 if (wc->status != IB_WC_WR_FLUSH_ERR)
214 pr_err("RPC: %s: SEND: %s\n",
215 __func__, COMPLETION_MSG(wc->status));
216 } else {
217 struct rpcrdma_mw *r;
218
219 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
220 r->r.frmr.fr_state = FRMR_IS_STALE;
221 pr_err("RPC: %s: frmr %p (stale): %s\n",
222 __func__, r, COMPLETION_MSG(wc->status));
223 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400224}
225
Chuck Leverfc664482014-05-28 10:33:25 -0400226static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400227rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400228{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400229 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400230 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400231
Chuck Lever8301a2c2014-05-28 10:33:51 -0400232 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400233 do {
234 wcs = ep->rep_send_wcs;
235
236 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
237 if (rc <= 0)
238 return rc;
239
240 count = rc;
241 while (count-- > 0)
242 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400243 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400244 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400245}
246
247/*
Chuck Leverfc664482014-05-28 10:33:25 -0400248 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400249 *
Chuck Leverfc664482014-05-28 10:33:25 -0400250 * Send events are typically suppressed and thus do not result
251 * in an upcall. Occasionally one is signaled, however. This
252 * prevents the provider's completion queue from wrapping and
253 * losing a completion.
254 */
255static void
256rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
257{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400258 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400259 int rc;
260
Chuck Lever1c00dd02014-05-28 10:33:42 -0400261 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400262 if (rc) {
263 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
264 __func__, rc);
265 return;
266 }
267
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400268 rc = ib_req_notify_cq(cq,
269 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
270 if (rc == 0)
271 return;
272 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400273 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
274 __func__, rc);
275 return;
276 }
277
Chuck Lever1c00dd02014-05-28 10:33:42 -0400278 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400279}
280
281static void
Chuck Leverbb961932014-07-29 17:25:46 -0400282rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400283{
284 struct rpcrdma_rep *rep =
285 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
286
Chuck Lever85024272015-01-21 11:02:04 -0500287 /* WARNING: Only wr_id and status are reliable at this point */
288 if (wc->status != IB_WC_SUCCESS)
289 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400290
Chuck Lever85024272015-01-21 11:02:04 -0500291 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400292 if (wc->opcode != IB_WC_RECV)
293 return;
294
Chuck Lever85024272015-01-21 11:02:04 -0500295 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
296 __func__, rep, wc->byte_len);
297
Chuck Leverfc664482014-05-28 10:33:25 -0400298 rep->rr_len = wc->byte_len;
299 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
300 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
301
302 if (rep->rr_len >= 16) {
303 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
304 unsigned int credits = ntohl(p->rm_credit);
305
306 if (credits == 0)
307 credits = 1; /* don't deadlock */
308 else if (credits > rep->rr_buffer->rb_max_requests)
309 credits = rep->rr_buffer->rb_max_requests;
310 atomic_set(&rep->rr_buffer->rb_credits, credits);
311 }
312
313out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400314 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500315 return;
316out_fail:
317 if (wc->status != IB_WC_WR_FLUSH_ERR)
318 pr_err("RPC: %s: rep %p: %s\n",
319 __func__, rep, COMPLETION_MSG(wc->status));
320 rep->rr_len = ~0U;
321 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400322}
323
324static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400325rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400326{
Chuck Leverbb961932014-07-29 17:25:46 -0400327 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400328 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400329 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400330
Chuck Leverbb961932014-07-29 17:25:46 -0400331 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400332 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400333 do {
334 wcs = ep->rep_recv_wcs;
335
336 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
337 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400338 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400339
340 count = rc;
341 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400342 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400343 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400344 rc = 0;
345
346out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500347 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400348 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400349}
350
351/*
352 * Handle receive completions.
353 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400354 * It is reentrant but processes single events in order to maintain
355 * ordering of receives to keep server credits.
356 *
357 * It is the responsibility of the scheduled tasklet to return
358 * recv buffers to the pool. NOTE: this affects synchronization of
359 * connection shutdown. That is, the structures required for
360 * the completion of the reply handler must remain intact until
361 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400362 */
363static void
Chuck Leverfc664482014-05-28 10:33:25 -0400364rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400365{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400366 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400367 int rc;
368
Chuck Lever1c00dd02014-05-28 10:33:42 -0400369 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400370 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400371 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400372 __func__, rc);
373 return;
374 }
375
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400376 rc = ib_req_notify_cq(cq,
377 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
378 if (rc == 0)
379 return;
380 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400381 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
382 __func__, rc);
383 return;
384 }
385
Chuck Lever1c00dd02014-05-28 10:33:42 -0400386 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400387}
388
Chuck Levera7bc2112014-07-29 17:23:52 -0400389static void
390rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
391{
Chuck Lever5c166be2014-11-08 20:14:45 -0500392 struct ib_wc wc;
393 LIST_HEAD(sched_list);
394
395 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
396 rpcrdma_recvcq_process_wc(&wc, &sched_list);
397 if (!list_empty(&sched_list))
398 rpcrdma_schedule_tasklet(&sched_list);
399 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
400 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400401}
402
Jeff Laytonf895b252014-11-17 16:58:04 -0500403#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400404static const char * const conn[] = {
405 "address resolved",
406 "address error",
407 "route resolved",
408 "route error",
409 "connect request",
410 "connect response",
411 "connect error",
412 "unreachable",
413 "rejected",
414 "established",
415 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400416 "device removal",
417 "multicast join",
418 "multicast error",
419 "address change",
420 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400421};
Chuck Lever8079fb72014-07-29 17:26:12 -0400422
423#define CONNECTION_MSG(status) \
424 ((status) < ARRAY_SIZE(conn) ? \
425 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400426#endif
427
428static int
429rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
430{
431 struct rpcrdma_xprt *xprt = id->context;
432 struct rpcrdma_ia *ia = &xprt->rx_ia;
433 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500434#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400435 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800436#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400437 struct ib_qp_attr attr;
438 struct ib_qp_init_attr iattr;
439 int connstate = 0;
440
441 switch (event->event) {
442 case RDMA_CM_EVENT_ADDR_RESOLVED:
443 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400444 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400445 complete(&ia->ri_done);
446 break;
447 case RDMA_CM_EVENT_ADDR_ERROR:
448 ia->ri_async_rc = -EHOSTUNREACH;
449 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
450 __func__, ep);
451 complete(&ia->ri_done);
452 break;
453 case RDMA_CM_EVENT_ROUTE_ERROR:
454 ia->ri_async_rc = -ENETUNREACH;
455 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
456 __func__, ep);
457 complete(&ia->ri_done);
458 break;
459 case RDMA_CM_EVENT_ESTABLISHED:
460 connstate = 1;
461 ib_query_qp(ia->ri_id->qp, &attr,
462 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
463 &iattr);
464 dprintk("RPC: %s: %d responder resources"
465 " (%d initiator)\n",
466 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
467 goto connected;
468 case RDMA_CM_EVENT_CONNECT_ERROR:
469 connstate = -ENOTCONN;
470 goto connected;
471 case RDMA_CM_EVENT_UNREACHABLE:
472 connstate = -ENETDOWN;
473 goto connected;
474 case RDMA_CM_EVENT_REJECTED:
475 connstate = -ECONNREFUSED;
476 goto connected;
477 case RDMA_CM_EVENT_DISCONNECTED:
478 connstate = -ECONNABORTED;
479 goto connected;
480 case RDMA_CM_EVENT_DEVICE_REMOVAL:
481 connstate = -ENODEV;
482connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400483 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
484 dprintk("RPC: %s: %sconnected\n",
485 __func__, connstate > 0 ? "" : "dis");
486 ep->rep_connected = connstate;
487 ep->rep_func(ep);
488 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400489 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400490 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400491 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
492 __func__, &addr->sin_addr.s_addr,
493 ntohs(addr->sin_port), ep,
494 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400495 break;
496 }
497
Jeff Laytonf895b252014-11-17 16:58:04 -0500498#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400499 if (connstate == 1) {
500 int ird = attr.max_dest_rd_atomic;
501 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700502 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400503 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700504 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400505 ntohs(addr->sin_port),
506 ia->ri_id->device->name,
507 ia->ri_memreg_strategy,
508 xprt->rx_buf.rb_max_requests,
509 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
510 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700511 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
512 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400513 ntohs(addr->sin_port),
514 connstate);
515 }
516#endif
517
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400518 return 0;
519}
520
521static struct rdma_cm_id *
522rpcrdma_create_id(struct rpcrdma_xprt *xprt,
523 struct rpcrdma_ia *ia, struct sockaddr *addr)
524{
525 struct rdma_cm_id *id;
526 int rc;
527
Tom Talpey1a954052008-10-09 15:01:31 -0400528 init_completion(&ia->ri_done);
529
Sean Heftyb26f9b92010-04-01 17:08:41 +0000530 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400531 if (IS_ERR(id)) {
532 rc = PTR_ERR(id);
533 dprintk("RPC: %s: rdma_create_id() failed %i\n",
534 __func__, rc);
535 return id;
536 }
537
Tom Talpey5675add2008-10-09 15:01:41 -0400538 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400539 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
540 if (rc) {
541 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
542 __func__, rc);
543 goto out;
544 }
Tom Talpey5675add2008-10-09 15:01:41 -0400545 wait_for_completion_interruptible_timeout(&ia->ri_done,
546 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400547 rc = ia->ri_async_rc;
548 if (rc)
549 goto out;
550
Tom Talpey5675add2008-10-09 15:01:41 -0400551 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400552 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
553 if (rc) {
554 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
555 __func__, rc);
556 goto out;
557 }
Tom Talpey5675add2008-10-09 15:01:41 -0400558 wait_for_completion_interruptible_timeout(&ia->ri_done,
559 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400560 rc = ia->ri_async_rc;
561 if (rc)
562 goto out;
563
564 return id;
565
566out:
567 rdma_destroy_id(id);
568 return ERR_PTR(rc);
569}
570
571/*
572 * Drain any cq, prior to teardown.
573 */
574static void
575rpcrdma_clean_cq(struct ib_cq *cq)
576{
577 struct ib_wc wc;
578 int count = 0;
579
580 while (1 == ib_poll_cq(cq, 1, &wc))
581 ++count;
582
583 if (count)
584 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
585 __func__, count, wc.opcode);
586}
587
588/*
589 * Exported functions.
590 */
591
592/*
593 * Open and initialize an Interface Adapter.
594 * o initializes fields of struct rpcrdma_ia, including
595 * interface and provider attributes and protection zone.
596 */
597int
598rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
599{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400600 int rc, mem_priv;
601 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400602 struct rpcrdma_ia *ia = &xprt->rx_ia;
603
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400604 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
605 if (IS_ERR(ia->ri_id)) {
606 rc = PTR_ERR(ia->ri_id);
607 goto out1;
608 }
609
610 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
611 if (IS_ERR(ia->ri_pd)) {
612 rc = PTR_ERR(ia->ri_pd);
613 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
614 __func__, rc);
615 goto out2;
616 }
617
618 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400619 * Query the device to determine if the requested memory
620 * registration strategy is supported. If it isn't, set the
621 * strategy to a globally supported model.
622 */
623 rc = ib_query_device(ia->ri_id->device, &devattr);
624 if (rc) {
625 dprintk("RPC: %s: ib_query_device failed %d\n",
626 __func__, rc);
627 goto out2;
628 }
629
630 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
631 ia->ri_have_dma_lkey = 1;
632 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
633 }
634
Chuck Leverf10eafd2014-05-28 10:32:51 -0400635 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400636 /* Requires both frmr reg and local dma lkey */
637 if ((devattr.device_cap_flags &
638 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
639 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400640 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400641 "not supported by HCA\n", __func__);
642 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400643 } else {
644 /* Mind the ia limit on FRMR page list depth */
645 ia->ri_max_frmr_depth = min_t(unsigned int,
646 RPCRDMA_MAX_DATA_SEGS,
647 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400648 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400649 }
650 if (memreg == RPCRDMA_MTHCAFMR) {
651 if (!ia->ri_id->device->alloc_fmr) {
652 dprintk("RPC: %s: MTHCAFMR registration "
653 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400654 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400655 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400656 }
657
658 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400659 * Optionally obtain an underlying physical identity mapping in
660 * order to do a memory window-based bind. This base registration
661 * is protected from remote access - that is enabled only by binding
662 * for the specific bytes targeted during each RPC operation, and
663 * revoked after the corresponding completion similar to a storage
664 * adapter.
665 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400666 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400667 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400668 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400669 case RPCRDMA_ALLPHYSICAL:
670 mem_priv = IB_ACCESS_LOCAL_WRITE |
671 IB_ACCESS_REMOTE_WRITE |
672 IB_ACCESS_REMOTE_READ;
673 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400674 case RPCRDMA_MTHCAFMR:
675 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400676 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400677 mem_priv = IB_ACCESS_LOCAL_WRITE;
678 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400679 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
680 if (IS_ERR(ia->ri_bind_mem)) {
681 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400682 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400683 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400684 rc = -ENOMEM;
685 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400686 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400687 break;
688 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400689 printk(KERN_ERR "RPC: Unsupported memory "
690 "registration mode: %d\n", memreg);
691 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400692 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400693 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400694 dprintk("RPC: %s: memory registration strategy is %d\n",
695 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400696
697 /* Else will do memory reg/dereg for each chunk */
698 ia->ri_memreg_strategy = memreg;
699
Chuck Lever73806c82014-07-29 17:23:25 -0400700 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400701 return 0;
702out2:
703 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400704 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400705out1:
706 return rc;
707}
708
709/*
710 * Clean up/close an IA.
711 * o if event handles and PD have been initialized, free them.
712 * o close the IA
713 */
714void
715rpcrdma_ia_close(struct rpcrdma_ia *ia)
716{
717 int rc;
718
719 dprintk("RPC: %s: entering\n", __func__);
720 if (ia->ri_bind_mem != NULL) {
721 rc = ib_dereg_mr(ia->ri_bind_mem);
722 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
723 __func__, rc);
724 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400725 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
726 if (ia->ri_id->qp)
727 rdma_destroy_qp(ia->ri_id);
728 rdma_destroy_id(ia->ri_id);
729 ia->ri_id = NULL;
730 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400731 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
732 rc = ib_dealloc_pd(ia->ri_pd);
733 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
734 __func__, rc);
735 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400736}
737
738/*
739 * Create unconnected endpoint.
740 */
741int
742rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
743 struct rpcrdma_create_data_internal *cdata)
744{
745 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400746 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400747 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400748
749 rc = ib_query_device(ia->ri_id->device, &devattr);
750 if (rc) {
751 dprintk("RPC: %s: ib_query_device failed %d\n",
752 __func__, rc);
753 return rc;
754 }
755
756 /* check provider's send/recv wr limits */
757 if (cdata->max_requests > devattr.max_qp_wr)
758 cdata->max_requests = devattr.max_qp_wr;
759
760 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
761 ep->rep_attr.qp_context = ep;
762 /* send_cq and recv_cq initialized below */
763 ep->rep_attr.srq = NULL;
764 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
765 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400766 case RPCRDMA_FRMR: {
767 int depth = 7;
768
Tom Tucker15cdc6442010-08-11 12:47:24 -0400769 /* Add room for frmr register and invalidate WRs.
770 * 1. FRMR reg WR for head
771 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400772 * 3. N FRMR reg WRs for pagelist
773 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400774 * 5. FRMR reg WR for tail
775 * 6. FRMR invalidate WR for tail
776 * 7. The RDMA_SEND WR
777 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400778
779 /* Calculate N if the device max FRMR depth is smaller than
780 * RPCRDMA_MAX_DATA_SEGS.
781 */
782 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
783 int delta = RPCRDMA_MAX_DATA_SEGS -
784 ia->ri_max_frmr_depth;
785
786 do {
787 depth += 2; /* FRMR reg + invalidate */
788 delta -= ia->ri_max_frmr_depth;
789 } while (delta > 0);
790
791 }
792 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400793 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400794 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400795 if (!cdata->max_requests)
796 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400797 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
798 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400799 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400800 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400801 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400802 default:
803 break;
804 }
805 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
806 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
807 ep->rep_attr.cap.max_recv_sge = 1;
808 ep->rep_attr.cap.max_inline_data = 0;
809 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
810 ep->rep_attr.qp_type = IB_QPT_RC;
811 ep->rep_attr.port_num = ~0;
812
813 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
814 "iovs: send %d recv %d\n",
815 __func__,
816 ep->rep_attr.cap.max_send_wr,
817 ep->rep_attr.cap.max_recv_wr,
818 ep->rep_attr.cap.max_send_sge,
819 ep->rep_attr.cap.max_recv_sge);
820
821 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400822 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500823 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
824 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
825 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400826 ep->rep_cqinit = 0;
827 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400828 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400829 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400830
Chuck Leverfc664482014-05-28 10:33:25 -0400831 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400832 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400833 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400834 if (IS_ERR(sendcq)) {
835 rc = PTR_ERR(sendcq);
836 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400837 __func__, rc);
838 goto out1;
839 }
840
Chuck Leverfc664482014-05-28 10:33:25 -0400841 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400842 if (rc) {
843 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
844 __func__, rc);
845 goto out2;
846 }
847
Chuck Leverfc664482014-05-28 10:33:25 -0400848 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400849 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400850 ep->rep_attr.cap.max_recv_wr + 1, 0);
851 if (IS_ERR(recvcq)) {
852 rc = PTR_ERR(recvcq);
853 dprintk("RPC: %s: failed to create recv CQ: %i\n",
854 __func__, rc);
855 goto out2;
856 }
857
858 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
859 if (rc) {
860 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
861 __func__, rc);
862 ib_destroy_cq(recvcq);
863 goto out2;
864 }
865
866 ep->rep_attr.send_cq = sendcq;
867 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400868
869 /* Initialize cma parameters */
870
871 /* RPC/RDMA does not use private data */
872 ep->rep_remote_cma.private_data = NULL;
873 ep->rep_remote_cma.private_data_len = 0;
874
875 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400876 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400877 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400878 ep->rep_remote_cma.responder_resources = 32;
879 else
880 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400881
882 ep->rep_remote_cma.retry_count = 7;
883 ep->rep_remote_cma.flow_control = 0;
884 ep->rep_remote_cma.rnr_retry_count = 0;
885
886 return 0;
887
888out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400889 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400890 if (err)
891 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
892 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400893out1:
894 return rc;
895}
896
897/*
898 * rpcrdma_ep_destroy
899 *
900 * Disconnect and destroy endpoint. After this, the only
901 * valid operations on the ep are to free it (if dynamically
902 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400903 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400904void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400905rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
906{
907 int rc;
908
909 dprintk("RPC: %s: entering, connected is %d\n",
910 __func__, ep->rep_connected);
911
Chuck Lever254f91e2014-05-28 10:32:17 -0400912 cancel_delayed_work_sync(&ep->rep_connect_worker);
913
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400914 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400915 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400916 rdma_destroy_qp(ia->ri_id);
917 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400918 }
919
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400920 /* padding - could be done in rpcrdma_buffer_destroy... */
921 if (ep->rep_pad_mr) {
922 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
923 ep->rep_pad_mr = NULL;
924 }
925
Chuck Leverfc664482014-05-28 10:33:25 -0400926 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
927 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
928 if (rc)
929 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
930 __func__, rc);
931
932 rpcrdma_clean_cq(ep->rep_attr.send_cq);
933 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400934 if (rc)
935 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
936 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400937}
938
939/*
940 * Connect unconnected endpoint.
941 */
942int
943rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
944{
Chuck Lever73806c82014-07-29 17:23:25 -0400945 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400946 int rc = 0;
947 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400948
Tom Talpeyc0555512008-10-10 11:32:45 -0400949 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400950 struct rpcrdma_xprt *xprt;
951retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400952 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400953
954 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400955 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400956
Chuck Lever467c9672014-11-08 20:14:29 -0500957 switch (ia->ri_memreg_strategy) {
958 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400959 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500960 break;
961 case RPCRDMA_MTHCAFMR:
962 rpcrdma_reset_fmrs(ia);
963 break;
964 case RPCRDMA_ALLPHYSICAL:
965 break;
966 default:
967 rc = -EIO;
968 goto out;
969 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400970
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400971 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
972 id = rpcrdma_create_id(xprt, ia,
973 (struct sockaddr *)&xprt->rx_data.addr);
974 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400975 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400976 goto out;
977 }
978 /* TEMP TEMP TEMP - fail if new device:
979 * Deregister/remarshal *all* requests!
980 * Close and recreate adapter, pd, etc!
981 * Re-determine all attributes still sane!
982 * More stuff I haven't thought of!
983 * Rrrgh!
984 */
985 if (ia->ri_id->device != id->device) {
986 printk("RPC: %s: can't reconnect on "
987 "different device!\n", __func__);
988 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400989 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400990 goto out;
991 }
992 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400993 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
994 if (rc) {
995 dprintk("RPC: %s: rdma_create_qp failed %i\n",
996 __func__, rc);
997 rdma_destroy_id(id);
998 rc = -ENETUNREACH;
999 goto out;
1000 }
Chuck Lever73806c82014-07-29 17:23:25 -04001001
1002 write_lock(&ia->ri_qplock);
1003 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001004 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -04001005 write_unlock(&ia->ri_qplock);
1006
1007 rdma_destroy_qp(old);
1008 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -04001009 } else {
1010 dprintk("RPC: %s: connecting...\n", __func__);
1011 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
1012 if (rc) {
1013 dprintk("RPC: %s: rdma_create_qp failed %i\n",
1014 __func__, rc);
1015 /* do not update ep->rep_connected */
1016 return -ENETUNREACH;
1017 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001018 }
1019
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001020 ep->rep_connected = 0;
1021
1022 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
1023 if (rc) {
1024 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1025 __func__, rc);
1026 goto out;
1027 }
1028
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001029 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1030
1031 /*
1032 * Check state. A non-peer reject indicates no listener
1033 * (ECONNREFUSED), which may be a transient state. All
1034 * others indicate a transport condition which has already
1035 * undergone a best-effort.
1036 */
Joe Perchesf64f9e72009-11-29 16:55:45 -08001037 if (ep->rep_connected == -ECONNREFUSED &&
1038 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001039 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1040 goto retry;
1041 }
1042 if (ep->rep_connected <= 0) {
1043 /* Sometimes, the only way to reliably connect to remote
1044 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001045 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1046 (ep->rep_remote_cma.responder_resources == 0 ||
1047 ep->rep_remote_cma.initiator_depth !=
1048 ep->rep_remote_cma.responder_resources)) {
1049 if (ep->rep_remote_cma.responder_resources == 0)
1050 ep->rep_remote_cma.responder_resources = 1;
1051 ep->rep_remote_cma.initiator_depth =
1052 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001053 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001054 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001055 rc = ep->rep_connected;
1056 } else {
1057 dprintk("RPC: %s: connected\n", __func__);
1058 }
1059
1060out:
1061 if (rc)
1062 ep->rep_connected = rc;
1063 return rc;
1064}
1065
1066/*
1067 * rpcrdma_ep_disconnect
1068 *
1069 * This is separate from destroy to facilitate the ability
1070 * to reconnect without recreating the endpoint.
1071 *
1072 * This call is not reentrant, and must not be made in parallel
1073 * on the same endpoint.
1074 */
Chuck Lever282191c2014-07-29 17:25:55 -04001075void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001076rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1077{
1078 int rc;
1079
Chuck Levera7bc2112014-07-29 17:23:52 -04001080 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001081 rc = rdma_disconnect(ia->ri_id);
1082 if (!rc) {
1083 /* returns without wait if not connected */
1084 wait_event_interruptible(ep->rep_connect_wait,
1085 ep->rep_connected != 1);
1086 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1087 (ep->rep_connected == 1) ? "still " : "dis");
1088 } else {
1089 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1090 ep->rep_connected = rc;
1091 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001092}
1093
Chuck Lever2e845222014-07-29 17:25:38 -04001094static int
1095rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1096{
1097 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1098 struct ib_fmr_attr fmr_attr = {
1099 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1100 .max_maps = 1,
1101 .page_shift = PAGE_SHIFT
1102 };
1103 struct rpcrdma_mw *r;
1104 int i, rc;
1105
1106 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1107 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1108
1109 while (i--) {
1110 r = kzalloc(sizeof(*r), GFP_KERNEL);
1111 if (r == NULL)
1112 return -ENOMEM;
1113
1114 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1115 if (IS_ERR(r->r.fmr)) {
1116 rc = PTR_ERR(r->r.fmr);
1117 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1118 __func__, rc);
1119 goto out_free;
1120 }
1121
1122 list_add(&r->mw_list, &buf->rb_mws);
1123 list_add(&r->mw_all, &buf->rb_all);
1124 }
1125 return 0;
1126
1127out_free:
1128 kfree(r);
1129 return rc;
1130}
1131
1132static int
1133rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1134{
1135 struct rpcrdma_frmr *f;
1136 struct rpcrdma_mw *r;
1137 int i, rc;
1138
1139 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1140 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1141
1142 while (i--) {
1143 r = kzalloc(sizeof(*r), GFP_KERNEL);
1144 if (r == NULL)
1145 return -ENOMEM;
1146 f = &r->r.frmr;
1147
1148 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1149 ia->ri_max_frmr_depth);
1150 if (IS_ERR(f->fr_mr)) {
1151 rc = PTR_ERR(f->fr_mr);
1152 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1153 "failed %i\n", __func__, rc);
1154 goto out_free;
1155 }
1156
1157 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1158 ia->ri_max_frmr_depth);
1159 if (IS_ERR(f->fr_pgl)) {
1160 rc = PTR_ERR(f->fr_pgl);
1161 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1162 "failed %i\n", __func__, rc);
1163
1164 ib_dereg_mr(f->fr_mr);
1165 goto out_free;
1166 }
1167
1168 list_add(&r->mw_list, &buf->rb_mws);
1169 list_add(&r->mw_all, &buf->rb_all);
1170 }
1171
1172 return 0;
1173
1174out_free:
1175 kfree(r);
1176 return rc;
1177}
1178
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001179int
1180rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1181 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1182{
1183 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001184 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001185 int i, rc;
1186
1187 buf->rb_max_requests = cdata->max_requests;
1188 spin_lock_init(&buf->rb_lock);
1189 atomic_set(&buf->rb_credits, 1);
1190
1191 /* Need to allocate:
1192 * 1. arrays for send and recv pointers
1193 * 2. arrays of struct rpcrdma_req to fill in pointers
1194 * 3. array of struct rpcrdma_rep for replies
1195 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001196 * Send/recv buffers in req/rep need to be registered
1197 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001198 len = buf->rb_max_requests *
1199 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1200 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001201
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001202 p = kzalloc(len, GFP_KERNEL);
1203 if (p == NULL) {
1204 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1205 __func__, len);
1206 rc = -ENOMEM;
1207 goto out;
1208 }
1209 buf->rb_pool = p; /* for freeing it later */
1210
1211 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1212 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1213 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1214 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1215
1216 /*
1217 * Register the zeroed pad buffer, if any.
1218 */
1219 if (cdata->padding) {
1220 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1221 &ep->rep_pad_mr, &ep->rep_pad);
1222 if (rc)
1223 goto out;
1224 }
1225 p += cdata->padding;
1226
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001227 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001228 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001229 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001230 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001231 rc = rpcrdma_init_frmrs(ia, buf);
1232 if (rc)
1233 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001234 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001235 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001236 rc = rpcrdma_init_fmrs(ia, buf);
1237 if (rc)
1238 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001239 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001240 default:
1241 break;
1242 }
1243
1244 /*
1245 * Allocate/init the request/reply buffers. Doing this
1246 * using kmalloc for now -- one for each buf.
1247 */
Chuck Lever65866f82014-05-28 10:33:59 -04001248 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1249 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1250 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1251 __func__, wlen, rlen);
1252
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001253 for (i = 0; i < buf->rb_max_requests; i++) {
1254 struct rpcrdma_req *req;
1255 struct rpcrdma_rep *rep;
1256
Chuck Lever65866f82014-05-28 10:33:59 -04001257 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001258 if (req == NULL) {
1259 dprintk("RPC: %s: request buffer %d alloc"
1260 " failed\n", __func__, i);
1261 rc = -ENOMEM;
1262 goto out;
1263 }
1264 memset(req, 0, sizeof(struct rpcrdma_req));
1265 buf->rb_send_bufs[i] = req;
1266 buf->rb_send_bufs[i]->rl_buffer = buf;
1267
1268 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001269 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001270 &buf->rb_send_bufs[i]->rl_handle,
1271 &buf->rb_send_bufs[i]->rl_iov);
1272 if (rc)
1273 goto out;
1274
Chuck Lever65866f82014-05-28 10:33:59 -04001275 buf->rb_send_bufs[i]->rl_size = wlen -
1276 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001277
Chuck Lever65866f82014-05-28 10:33:59 -04001278 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001279 if (rep == NULL) {
1280 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1281 __func__, i);
1282 rc = -ENOMEM;
1283 goto out;
1284 }
1285 memset(rep, 0, sizeof(struct rpcrdma_rep));
1286 buf->rb_recv_bufs[i] = rep;
1287 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001288
1289 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001290 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001291 &buf->rb_recv_bufs[i]->rr_handle,
1292 &buf->rb_recv_bufs[i]->rr_iov);
1293 if (rc)
1294 goto out;
1295
1296 }
1297 dprintk("RPC: %s: max_requests %d\n",
1298 __func__, buf->rb_max_requests);
1299 /* done */
1300 return 0;
1301out:
1302 rpcrdma_buffer_destroy(buf);
1303 return rc;
1304}
1305
Chuck Lever2e845222014-07-29 17:25:38 -04001306static void
1307rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1308{
1309 struct rpcrdma_mw *r;
1310 int rc;
1311
1312 while (!list_empty(&buf->rb_all)) {
1313 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1314 list_del(&r->mw_all);
1315 list_del(&r->mw_list);
1316
1317 rc = ib_dealloc_fmr(r->r.fmr);
1318 if (rc)
1319 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1320 __func__, rc);
1321
1322 kfree(r);
1323 }
1324}
1325
1326static void
1327rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1328{
1329 struct rpcrdma_mw *r;
1330 int rc;
1331
1332 while (!list_empty(&buf->rb_all)) {
1333 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1334 list_del(&r->mw_all);
1335 list_del(&r->mw_list);
1336
1337 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1338 if (rc)
1339 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1340 __func__, rc);
1341 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1342
1343 kfree(r);
1344 }
1345}
1346
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001347void
1348rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1349{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001350 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001351 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001352
1353 /* clean up in reverse order from create
1354 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001355 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001356 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001357 */
1358 dprintk("RPC: %s: entering\n", __func__);
1359
1360 for (i = 0; i < buf->rb_max_requests; i++) {
1361 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1362 rpcrdma_deregister_internal(ia,
1363 buf->rb_recv_bufs[i]->rr_handle,
1364 &buf->rb_recv_bufs[i]->rr_iov);
1365 kfree(buf->rb_recv_bufs[i]);
1366 }
1367 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001368 rpcrdma_deregister_internal(ia,
1369 buf->rb_send_bufs[i]->rl_handle,
1370 &buf->rb_send_bufs[i]->rl_iov);
1371 kfree(buf->rb_send_bufs[i]);
1372 }
1373 }
1374
Chuck Lever2e845222014-07-29 17:25:38 -04001375 switch (ia->ri_memreg_strategy) {
1376 case RPCRDMA_FRMR:
1377 rpcrdma_destroy_frmrs(buf);
1378 break;
1379 case RPCRDMA_MTHCAFMR:
1380 rpcrdma_destroy_fmrs(buf);
1381 break;
1382 default:
1383 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001384 }
1385
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001386 kfree(buf->rb_pool);
1387}
1388
Chuck Lever467c9672014-11-08 20:14:29 -05001389/* After a disconnect, unmap all FMRs.
1390 *
1391 * This is invoked only in the transport connect worker in order
1392 * to serialize with rpcrdma_register_fmr_external().
1393 */
1394static void
1395rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1396{
1397 struct rpcrdma_xprt *r_xprt =
1398 container_of(ia, struct rpcrdma_xprt, rx_ia);
1399 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1400 struct list_head *pos;
1401 struct rpcrdma_mw *r;
1402 LIST_HEAD(l);
1403 int rc;
1404
1405 list_for_each(pos, &buf->rb_all) {
1406 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1407
1408 INIT_LIST_HEAD(&l);
1409 list_add(&r->r.fmr->list, &l);
1410 rc = ib_unmap_fmr(&l);
1411 if (rc)
1412 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1413 __func__, rc);
1414 }
1415}
1416
Chuck Lever9f9d8022014-07-29 17:24:45 -04001417/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1418 * an unusable state. Find FRMRs in this state and dereg / reg
1419 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1420 * also torn down.
1421 *
1422 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1423 *
1424 * This is invoked only in the transport connect worker in order
1425 * to serialize with rpcrdma_register_frmr_external().
1426 */
1427static void
1428rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1429{
1430 struct rpcrdma_xprt *r_xprt =
1431 container_of(ia, struct rpcrdma_xprt, rx_ia);
1432 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1433 struct list_head *pos;
1434 struct rpcrdma_mw *r;
1435 int rc;
1436
1437 list_for_each(pos, &buf->rb_all) {
1438 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1439
1440 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1441 continue;
1442
1443 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1444 if (rc)
1445 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1446 __func__, rc);
1447 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1448
1449 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1450 ia->ri_max_frmr_depth);
1451 if (IS_ERR(r->r.frmr.fr_mr)) {
1452 rc = PTR_ERR(r->r.frmr.fr_mr);
1453 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1454 " failed %i\n", __func__, rc);
1455 continue;
1456 }
1457 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1458 ia->ri_id->device,
1459 ia->ri_max_frmr_depth);
1460 if (IS_ERR(r->r.frmr.fr_pgl)) {
1461 rc = PTR_ERR(r->r.frmr.fr_pgl);
1462 dprintk("RPC: %s: "
1463 "ib_alloc_fast_reg_page_list "
1464 "failed %i\n", __func__, rc);
1465
1466 ib_dereg_mr(r->r.frmr.fr_mr);
1467 continue;
1468 }
1469 r->r.frmr.fr_state = FRMR_IS_INVALID;
1470 }
1471}
1472
Chuck Leverc2922c02014-07-29 17:24:36 -04001473/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1474 * some req segments uninitialized.
1475 */
1476static void
1477rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1478{
1479 if (*mw) {
1480 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1481 *mw = NULL;
1482 }
1483}
1484
1485/* Cycle mw's back in reverse order, and "spin" them.
1486 * This delays and scrambles reuse as much as possible.
1487 */
1488static void
1489rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1490{
1491 struct rpcrdma_mr_seg *seg = req->rl_segments;
1492 struct rpcrdma_mr_seg *seg1 = seg;
1493 int i;
1494
1495 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001496 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1497 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001498}
1499
1500static void
1501rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1502{
1503 buf->rb_send_bufs[--buf->rb_send_index] = req;
1504 req->rl_niovs = 0;
1505 if (req->rl_reply) {
1506 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1507 req->rl_reply->rr_func = NULL;
1508 req->rl_reply = NULL;
1509 }
1510}
1511
Chuck Leverddb6beb2014-07-29 17:24:54 -04001512/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1513 * Redo only the ib_post_send().
1514 */
1515static void
1516rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1517{
1518 struct rpcrdma_xprt *r_xprt =
1519 container_of(ia, struct rpcrdma_xprt, rx_ia);
1520 struct ib_send_wr invalidate_wr, *bad_wr;
1521 int rc;
1522
1523 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1524
1525 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001526 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001527
1528 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1529 invalidate_wr.wr_id = (unsigned long)(void *)r;
1530 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001531 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1532 DECR_CQCOUNT(&r_xprt->rx_ep);
1533
1534 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1535 __func__, r, r->r.frmr.fr_mr->rkey);
1536
1537 read_lock(&ia->ri_qplock);
1538 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1539 read_unlock(&ia->ri_qplock);
1540 if (rc) {
1541 /* Force rpcrdma_buffer_get() to retry */
1542 r->r.frmr.fr_state = FRMR_IS_STALE;
1543 dprintk("RPC: %s: ib_post_send failed, %i\n",
1544 __func__, rc);
1545 }
1546}
1547
1548static void
1549rpcrdma_retry_flushed_linv(struct list_head *stale,
1550 struct rpcrdma_buffer *buf)
1551{
1552 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1553 struct list_head *pos;
1554 struct rpcrdma_mw *r;
1555 unsigned long flags;
1556
1557 list_for_each(pos, stale) {
1558 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1559 rpcrdma_retry_local_inv(r, ia);
1560 }
1561
1562 spin_lock_irqsave(&buf->rb_lock, flags);
1563 list_splice_tail(stale, &buf->rb_mws);
1564 spin_unlock_irqrestore(&buf->rb_lock, flags);
1565}
1566
Chuck Leverc2922c02014-07-29 17:24:36 -04001567static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001568rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1569 struct list_head *stale)
1570{
1571 struct rpcrdma_mw *r;
1572 int i;
1573
1574 i = RPCRDMA_MAX_SEGS - 1;
1575 while (!list_empty(&buf->rb_mws)) {
1576 r = list_entry(buf->rb_mws.next,
1577 struct rpcrdma_mw, mw_list);
1578 list_del(&r->mw_list);
1579 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1580 list_add(&r->mw_list, stale);
1581 continue;
1582 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001583 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001584 if (unlikely(i-- == 0))
1585 return req; /* Success */
1586 }
1587
1588 /* Not enough entries on rb_mws for this req */
1589 rpcrdma_buffer_put_sendbuf(req, buf);
1590 rpcrdma_buffer_put_mrs(req, buf);
1591 return NULL;
1592}
1593
1594static struct rpcrdma_req *
1595rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001596{
1597 struct rpcrdma_mw *r;
1598 int i;
1599
1600 i = RPCRDMA_MAX_SEGS - 1;
1601 while (!list_empty(&buf->rb_mws)) {
1602 r = list_entry(buf->rb_mws.next,
1603 struct rpcrdma_mw, mw_list);
1604 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001605 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001606 if (unlikely(i-- == 0))
1607 return req; /* Success */
1608 }
1609
1610 /* Not enough entries on rb_mws for this req */
1611 rpcrdma_buffer_put_sendbuf(req, buf);
1612 rpcrdma_buffer_put_mrs(req, buf);
1613 return NULL;
1614}
1615
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001616/*
1617 * Get a set of request/reply buffers.
1618 *
1619 * Reply buffer (if needed) is attached to send buffer upon return.
1620 * Rule:
1621 * rb_send_index and rb_recv_index MUST always be pointing to the
1622 * *next* available buffer (non-NULL). They are incremented after
1623 * removing buffers, and decremented *before* returning them.
1624 */
1625struct rpcrdma_req *
1626rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1627{
Chuck Leverc2922c02014-07-29 17:24:36 -04001628 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001629 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001630 struct rpcrdma_req *req;
1631 unsigned long flags;
1632
1633 spin_lock_irqsave(&buffers->rb_lock, flags);
1634 if (buffers->rb_send_index == buffers->rb_max_requests) {
1635 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1636 dprintk("RPC: %s: out of request buffers\n", __func__);
1637 return ((struct rpcrdma_req *)NULL);
1638 }
1639
1640 req = buffers->rb_send_bufs[buffers->rb_send_index];
1641 if (buffers->rb_send_index < buffers->rb_recv_index) {
1642 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1643 __func__,
1644 buffers->rb_recv_index - buffers->rb_send_index);
1645 req->rl_reply = NULL;
1646 } else {
1647 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1648 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1649 }
1650 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001651
1652 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001653 switch (ia->ri_memreg_strategy) {
1654 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001655 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1656 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001657 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001658 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001659 break;
1660 default:
1661 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001662 }
1663 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001664 if (!list_empty(&stale))
1665 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001666 return req;
1667}
1668
1669/*
1670 * Put request/reply buffers back into pool.
1671 * Pre-decrement counter/array index.
1672 */
1673void
1674rpcrdma_buffer_put(struct rpcrdma_req *req)
1675{
1676 struct rpcrdma_buffer *buffers = req->rl_buffer;
1677 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001678 unsigned long flags;
1679
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001680 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001681 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001682 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001683 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001684 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001685 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001686 break;
1687 default:
1688 break;
1689 }
1690 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1691}
1692
1693/*
1694 * Recover reply buffers from pool.
1695 * This happens when recovering from error conditions.
1696 * Post-increment counter/array index.
1697 */
1698void
1699rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1700{
1701 struct rpcrdma_buffer *buffers = req->rl_buffer;
1702 unsigned long flags;
1703
1704 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1705 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1706 spin_lock_irqsave(&buffers->rb_lock, flags);
1707 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1708 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1709 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1710 }
1711 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1712}
1713
1714/*
1715 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001716 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001717 */
1718void
1719rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1720{
1721 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1722 unsigned long flags;
1723
1724 rep->rr_func = NULL;
1725 spin_lock_irqsave(&buffers->rb_lock, flags);
1726 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1727 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1728}
1729
1730/*
1731 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1732 */
1733
1734int
1735rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1736 struct ib_mr **mrp, struct ib_sge *iov)
1737{
1738 struct ib_phys_buf ipb;
1739 struct ib_mr *mr;
1740 int rc;
1741
1742 /*
1743 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1744 */
1745 iov->addr = ib_dma_map_single(ia->ri_id->device,
1746 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001747 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1748 return -ENOMEM;
1749
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001750 iov->length = len;
1751
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001752 if (ia->ri_have_dma_lkey) {
1753 *mrp = NULL;
1754 iov->lkey = ia->ri_dma_lkey;
1755 return 0;
1756 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001757 *mrp = NULL;
1758 iov->lkey = ia->ri_bind_mem->lkey;
1759 return 0;
1760 }
1761
1762 ipb.addr = iov->addr;
1763 ipb.size = iov->length;
1764 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1765 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1766
1767 dprintk("RPC: %s: phys convert: 0x%llx "
1768 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001769 __func__, (unsigned long long)ipb.addr,
1770 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001771
1772 if (IS_ERR(mr)) {
1773 *mrp = NULL;
1774 rc = PTR_ERR(mr);
1775 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1776 } else {
1777 *mrp = mr;
1778 iov->lkey = mr->lkey;
1779 rc = 0;
1780 }
1781
1782 return rc;
1783}
1784
1785int
1786rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1787 struct ib_mr *mr, struct ib_sge *iov)
1788{
1789 int rc;
1790
1791 ib_dma_unmap_single(ia->ri_id->device,
1792 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1793
1794 if (NULL == mr)
1795 return 0;
1796
1797 rc = ib_dereg_mr(mr);
1798 if (rc)
1799 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1800 return rc;
1801}
1802
1803/*
1804 * Wrappers for chunk registration, shared by read/write chunk code.
1805 */
1806
1807static void
1808rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1809{
1810 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1811 seg->mr_dmalen = seg->mr_len;
1812 if (seg->mr_page)
1813 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1814 seg->mr_page, offset_in_page(seg->mr_offset),
1815 seg->mr_dmalen, seg->mr_dir);
1816 else
1817 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1818 seg->mr_offset,
1819 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001820 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1821 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1822 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001823 (unsigned long long)seg->mr_dma,
1824 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001825 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001826}
1827
1828static void
1829rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1830{
1831 if (seg->mr_page)
1832 ib_dma_unmap_page(ia->ri_id->device,
1833 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1834 else
1835 ib_dma_unmap_single(ia->ri_id->device,
1836 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1837}
1838
Tom Talpey8d4ba032008-10-09 14:59:49 -04001839static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001840rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1841 int *nsegs, int writing, struct rpcrdma_ia *ia,
1842 struct rpcrdma_xprt *r_xprt)
1843{
1844 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever3eb35812015-01-21 11:02:54 -05001845 struct rpcrdma_mw *mw = seg1->rl_mw;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001846 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1847 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001848 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001849 u8 key;
1850 int len, pageoff;
1851 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001852 int seg_len;
1853 u64 pa;
1854 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001855
1856 pageoff = offset_in_page(seg1->mr_offset);
1857 seg1->mr_offset -= pageoff; /* start of page */
1858 seg1->mr_len += pageoff;
1859 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001860 if (*nsegs > ia->ri_max_frmr_depth)
1861 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001862 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001863 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001864 pa = seg->mr_dma;
1865 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001866 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001867 pa += PAGE_SIZE;
1868 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001869 len += seg->mr_len;
1870 ++seg;
1871 ++i;
1872 /* Check for holes */
1873 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1874 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1875 break;
1876 }
1877 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001878 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001879
Chuck Lever05055722014-07-29 17:25:12 -04001880 frmr->fr_state = FRMR_IS_VALID;
1881
Chuck Leverf590e872014-07-29 17:25:29 -04001882 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1883 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1884 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1885 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1886 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1887 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1888 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1889 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1890 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001891 rc = -EIO;
1892 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001893 }
1894
1895 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001896 key = (u8)(mr->rkey & 0x000000FF);
1897 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001898
Chuck Leverf590e872014-07-29 17:25:29 -04001899 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001900 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1901 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001902 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001903 DECR_CQCOUNT(&r_xprt->rx_ep);
1904
Chuck Leverf590e872014-07-29 17:25:29 -04001905 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001906 if (rc) {
1907 dprintk("RPC: %s: failed ib_post_send for register,"
1908 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001909 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001910 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001911 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001912 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001913 seg1->mr_base = seg1->mr_dma + pageoff;
1914 seg1->mr_nsegs = i;
1915 seg1->mr_len = len;
1916 }
1917 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001918 return 0;
1919out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001920 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001921 while (i--)
1922 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001923 return rc;
1924}
1925
1926static int
1927rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1928 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1929{
1930 struct rpcrdma_mr_seg *seg1 = seg;
1931 struct ib_send_wr invalidate_wr, *bad_wr;
1932 int rc;
1933
Chuck Lever3eb35812015-01-21 11:02:54 -05001934 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001935
Tom Talpey3197d3092008-10-09 15:00:20 -04001936 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Chuck Lever3eb35812015-01-21 11:02:54 -05001937 invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001938 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Lever3eb35812015-01-21 11:02:54 -05001939 invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001940 DECR_CQCOUNT(&r_xprt->rx_ep);
1941
Chuck Lever73806c82014-07-29 17:23:25 -04001942 read_lock(&ia->ri_qplock);
1943 while (seg1->mr_nsegs--)
1944 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001945 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001946 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001947 if (rc) {
1948 /* Force rpcrdma_buffer_get() to retry */
Chuck Lever3eb35812015-01-21 11:02:54 -05001949 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001950 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1951 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001952 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001953 return rc;
1954}
1955
1956static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001957rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1958 int *nsegs, int writing, struct rpcrdma_ia *ia)
1959{
1960 struct rpcrdma_mr_seg *seg1 = seg;
1961 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1962 int len, pageoff, i, rc;
1963
1964 pageoff = offset_in_page(seg1->mr_offset);
1965 seg1->mr_offset -= pageoff; /* start of page */
1966 seg1->mr_len += pageoff;
1967 len = -pageoff;
1968 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1969 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1970 for (i = 0; i < *nsegs;) {
1971 rpcrdma_map_one(ia, seg, writing);
1972 physaddrs[i] = seg->mr_dma;
1973 len += seg->mr_len;
1974 ++seg;
1975 ++i;
1976 /* Check for holes */
1977 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1978 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1979 break;
1980 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001981 rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001982 if (rc) {
1983 dprintk("RPC: %s: failed ib_map_phys_fmr "
1984 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1985 len, (unsigned long long)seg1->mr_dma,
1986 pageoff, i, rc);
1987 while (i--)
1988 rpcrdma_unmap_one(ia, --seg);
1989 } else {
Chuck Lever3eb35812015-01-21 11:02:54 -05001990 seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001991 seg1->mr_base = seg1->mr_dma + pageoff;
1992 seg1->mr_nsegs = i;
1993 seg1->mr_len = len;
1994 }
1995 *nsegs = i;
1996 return rc;
1997}
1998
1999static int
2000rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
2001 struct rpcrdma_ia *ia)
2002{
2003 struct rpcrdma_mr_seg *seg1 = seg;
2004 LIST_HEAD(l);
2005 int rc;
2006
Chuck Lever3eb35812015-01-21 11:02:54 -05002007 list_add(&seg1->rl_mw->r.fmr->list, &l);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002008 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04002009 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002010 while (seg1->mr_nsegs--)
2011 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04002012 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002013 if (rc)
2014 dprintk("RPC: %s: failed ib_unmap_fmr,"
2015 " status %i\n", __func__, rc);
2016 return rc;
2017}
2018
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002019int
2020rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
2021 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
2022{
2023 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002024 int rc = 0;
2025
2026 switch (ia->ri_memreg_strategy) {
2027
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002028 case RPCRDMA_ALLPHYSICAL:
2029 rpcrdma_map_one(ia, seg, writing);
2030 seg->mr_rkey = ia->ri_bind_mem->rkey;
2031 seg->mr_base = seg->mr_dma;
2032 seg->mr_nsegs = 1;
2033 nsegs = 1;
2034 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002035
Tom Talpey3197d3092008-10-09 15:00:20 -04002036 /* Registration using frmr registration */
2037 case RPCRDMA_FRMR:
2038 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
2039 break;
2040
Tom Talpey8d4ba032008-10-09 14:59:49 -04002041 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002042 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002043 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002044 break;
2045
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002046 default:
Chuck Lever92b98362014-11-08 20:14:12 -05002047 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002048 }
2049 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05002050 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002051
2052 return nsegs;
2053}
2054
2055int
2056rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04002057 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002058{
2059 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002060 int nsegs = seg->mr_nsegs, rc;
2061
2062 switch (ia->ri_memreg_strategy) {
2063
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002064 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04002065 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002066 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04002067 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002068 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002069
Tom Talpey3197d3092008-10-09 15:00:20 -04002070 case RPCRDMA_FRMR:
2071 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2072 break;
2073
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002074 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002075 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002076 break;
2077
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002078 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002079 break;
2080 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002081 return nsegs;
2082}
2083
2084/*
2085 * Prepost any receive buffer, then post send.
2086 *
2087 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2088 */
2089int
2090rpcrdma_ep_post(struct rpcrdma_ia *ia,
2091 struct rpcrdma_ep *ep,
2092 struct rpcrdma_req *req)
2093{
2094 struct ib_send_wr send_wr, *send_wr_fail;
2095 struct rpcrdma_rep *rep = req->rl_reply;
2096 int rc;
2097
2098 if (rep) {
2099 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2100 if (rc)
2101 goto out;
2102 req->rl_reply = NULL;
2103 }
2104
2105 send_wr.next = NULL;
2106 send_wr.wr_id = 0ULL; /* no send cookie */
2107 send_wr.sg_list = req->rl_send_iov;
2108 send_wr.num_sge = req->rl_niovs;
2109 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002110 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2111 ib_dma_sync_single_for_device(ia->ri_id->device,
2112 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2113 DMA_TO_DEVICE);
2114 ib_dma_sync_single_for_device(ia->ri_id->device,
2115 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2116 DMA_TO_DEVICE);
2117 ib_dma_sync_single_for_device(ia->ri_id->device,
2118 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2119 DMA_TO_DEVICE);
2120
2121 if (DECR_CQCOUNT(ep) > 0)
2122 send_wr.send_flags = 0;
2123 else { /* Provider must take a send completion every now and then */
2124 INIT_CQCOUNT(ep);
2125 send_wr.send_flags = IB_SEND_SIGNALED;
2126 }
2127
2128 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2129 if (rc)
2130 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2131 rc);
2132out:
2133 return rc;
2134}
2135
2136/*
2137 * (Re)post a receive buffer.
2138 */
2139int
2140rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2141 struct rpcrdma_ep *ep,
2142 struct rpcrdma_rep *rep)
2143{
2144 struct ib_recv_wr recv_wr, *recv_wr_fail;
2145 int rc;
2146
2147 recv_wr.next = NULL;
2148 recv_wr.wr_id = (u64) (unsigned long) rep;
2149 recv_wr.sg_list = &rep->rr_iov;
2150 recv_wr.num_sge = 1;
2151
2152 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2153 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2154
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002155 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2156
2157 if (rc)
2158 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2159 rc);
2160 return rc;
2161}
Chuck Lever43e95982014-07-29 17:23:34 -04002162
2163/* Physical mapping means one Read/Write list entry per-page.
2164 * All list entries must fit within an inline buffer
2165 *
2166 * NB: The server must return a Write list for NFS READ,
2167 * which has the same constraint. Factor in the inline
2168 * rsize as well.
2169 */
2170static size_t
2171rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2172{
2173 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2174 unsigned int inline_size, pages;
2175
2176 inline_size = min_t(unsigned int,
2177 cdata->inline_wsize, cdata->inline_rsize);
2178 inline_size -= RPCRDMA_HDRLEN_MIN;
2179 pages = inline_size / sizeof(struct rpcrdma_segment);
2180 return pages << PAGE_SHIFT;
2181}
2182
2183static size_t
2184rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2185{
2186 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2187}
2188
2189size_t
2190rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2191{
2192 size_t result;
2193
2194 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2195 case RPCRDMA_ALLPHYSICAL:
2196 result = rpcrdma_physical_max_payload(r_xprt);
2197 break;
2198 default:
2199 result = rpcrdma_mr_max_payload(r_xprt);
2200 }
2201 return result;
2202}