blob: aa012a393448a8ec1a5186135cae4005c09cc1a9 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever65866f82014-05-28 10:33:59 -040053#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040054
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040055#include "xprt_rdma.h"
56
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040057/*
58 * Globals/Macros
59 */
60
Jeff Laytonf895b252014-11-17 16:58:04 -050061#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040062# define RPCDBG_FACILITY RPCDBG_TRANS
63#endif
64
Chuck Lever9f9d8022014-07-29 17:24:45 -040065static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050066static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040067
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040068/*
69 * internal functions
70 */
71
72/*
73 * handle replies in tasklet context, using a single, global list
74 * rdma tasklet function -- just turn around and call the func
75 * for all replies on the list
76 */
77
78static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
79static LIST_HEAD(rpcrdma_tasklets_g);
80
81static void
82rpcrdma_run_tasklet(unsigned long data)
83{
84 struct rpcrdma_rep *rep;
85 void (*func)(struct rpcrdma_rep *);
86 unsigned long flags;
87
88 data = data;
89 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
90 while (!list_empty(&rpcrdma_tasklets_g)) {
91 rep = list_entry(rpcrdma_tasklets_g.next,
92 struct rpcrdma_rep, rr_list);
93 list_del(&rep->rr_list);
94 func = rep->rr_func;
95 rep->rr_func = NULL;
96 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
97
98 if (func)
99 func(rep);
100 else
101 rpcrdma_recv_buffer_put(rep);
102
103 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
104 }
105 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
106}
107
108static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
109
Chuck Lever7ff11de2014-11-08 20:15:01 -0500110static const char * const async_event[] = {
111 "CQ error",
112 "QP fatal error",
113 "QP request error",
114 "QP access error",
115 "communication established",
116 "send queue drained",
117 "path migration successful",
118 "path mig error",
119 "device fatal error",
120 "port active",
121 "port error",
122 "LID change",
123 "P_key change",
124 "SM change",
125 "SRQ error",
126 "SRQ limit reached",
127 "last WQE reached",
128 "client reregister",
129 "GID change",
130};
131
132#define ASYNC_MSG(status) \
133 ((status) < ARRAY_SIZE(async_event) ? \
134 async_event[(status)] : "unknown async error")
135
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400136static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500137rpcrdma_schedule_tasklet(struct list_head *sched_list)
138{
139 unsigned long flags;
140
141 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
142 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
143 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
144 tasklet_schedule(&rpcrdma_tasklet_g);
145}
146
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147static void
148rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
149{
150 struct rpcrdma_ep *ep = context;
151
Chuck Lever7ff11de2014-11-08 20:15:01 -0500152 pr_err("RPC: %s: %s on device %s ep %p\n",
153 __func__, ASYNC_MSG(event->event),
154 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400155 if (ep->rep_connected == 1) {
156 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500157 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400158 wake_up_all(&ep->rep_connect_wait);
159 }
160}
161
162static void
163rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
164{
165 struct rpcrdma_ep *ep = context;
166
Chuck Lever7ff11de2014-11-08 20:15:01 -0500167 pr_err("RPC: %s: %s on device %s ep %p\n",
168 __func__, ASYNC_MSG(event->event),
169 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400170 if (ep->rep_connected == 1) {
171 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500172 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400173 wake_up_all(&ep->rep_connect_wait);
174 }
175}
176
Chuck Lever85024272015-01-21 11:02:04 -0500177static const char * const wc_status[] = {
178 "success",
179 "local length error",
180 "local QP operation error",
181 "local EE context operation error",
182 "local protection error",
183 "WR flushed",
184 "memory management operation error",
185 "bad response error",
186 "local access error",
187 "remote invalid request error",
188 "remote access error",
189 "remote operation error",
190 "transport retry counter exceeded",
191 "RNR retrycounter exceeded",
192 "local RDD violation error",
193 "remove invalid RD request",
194 "operation aborted",
195 "invalid EE context number",
196 "invalid EE context state",
197 "fatal error",
198 "response timeout error",
199 "general error",
200};
201
202#define COMPLETION_MSG(status) \
203 ((status) < ARRAY_SIZE(wc_status) ? \
204 wc_status[(status)] : "unexpected completion error")
205
Chuck Leverfc664482014-05-28 10:33:25 -0400206static void
207rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400208{
Chuck Lever85024272015-01-21 11:02:04 -0500209 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400210 return;
Chuck Lever85024272015-01-21 11:02:04 -0500211
212 /* WARNING: Only wr_id and status are reliable at this point */
213 if (wc->wr_id == 0ULL) {
214 if (wc->status != IB_WC_WR_FLUSH_ERR)
215 pr_err("RPC: %s: SEND: %s\n",
216 __func__, COMPLETION_MSG(wc->status));
217 } else {
218 struct rpcrdma_mw *r;
219
220 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
221 r->r.frmr.fr_state = FRMR_IS_STALE;
222 pr_err("RPC: %s: frmr %p (stale): %s\n",
223 __func__, r, COMPLETION_MSG(wc->status));
224 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400225}
226
Chuck Leverfc664482014-05-28 10:33:25 -0400227static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400228rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400229{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400230 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400231 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400232
Chuck Lever8301a2c2014-05-28 10:33:51 -0400233 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400234 do {
235 wcs = ep->rep_send_wcs;
236
237 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
238 if (rc <= 0)
239 return rc;
240
241 count = rc;
242 while (count-- > 0)
243 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400244 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400245 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400246}
247
248/*
Chuck Leverfc664482014-05-28 10:33:25 -0400249 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400250 *
Chuck Leverfc664482014-05-28 10:33:25 -0400251 * Send events are typically suppressed and thus do not result
252 * in an upcall. Occasionally one is signaled, however. This
253 * prevents the provider's completion queue from wrapping and
254 * losing a completion.
255 */
256static void
257rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
258{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400259 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400260 int rc;
261
Chuck Lever1c00dd02014-05-28 10:33:42 -0400262 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400263 if (rc) {
264 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
265 __func__, rc);
266 return;
267 }
268
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400269 rc = ib_req_notify_cq(cq,
270 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
271 if (rc == 0)
272 return;
273 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400274 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
275 __func__, rc);
276 return;
277 }
278
Chuck Lever1c00dd02014-05-28 10:33:42 -0400279 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400280}
281
282static void
Chuck Leverbb961932014-07-29 17:25:46 -0400283rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400284{
285 struct rpcrdma_rep *rep =
286 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
287
Chuck Lever85024272015-01-21 11:02:04 -0500288 /* WARNING: Only wr_id and status are reliable at this point */
289 if (wc->status != IB_WC_SUCCESS)
290 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400291
Chuck Lever85024272015-01-21 11:02:04 -0500292 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400293 if (wc->opcode != IB_WC_RECV)
294 return;
295
Chuck Lever85024272015-01-21 11:02:04 -0500296 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
297 __func__, rep, wc->byte_len);
298
Chuck Leverfc664482014-05-28 10:33:25 -0400299 rep->rr_len = wc->byte_len;
300 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
301 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
Chuck Levereba8ff62015-01-21 11:03:02 -0500302 prefetch(rep->rr_base);
Chuck Leverfc664482014-05-28 10:33:25 -0400303
304out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400305 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500306 return;
307out_fail:
308 if (wc->status != IB_WC_WR_FLUSH_ERR)
309 pr_err("RPC: %s: rep %p: %s\n",
310 __func__, rep, COMPLETION_MSG(wc->status));
311 rep->rr_len = ~0U;
312 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400313}
314
315static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400316rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400317{
Chuck Leverbb961932014-07-29 17:25:46 -0400318 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400319 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400320 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400321
Chuck Leverbb961932014-07-29 17:25:46 -0400322 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400323 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400324 do {
325 wcs = ep->rep_recv_wcs;
326
327 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
328 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400329 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400330
331 count = rc;
332 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400333 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400334 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400335 rc = 0;
336
337out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500338 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400339 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400340}
341
342/*
343 * Handle receive completions.
344 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400345 * It is reentrant but processes single events in order to maintain
346 * ordering of receives to keep server credits.
347 *
348 * It is the responsibility of the scheduled tasklet to return
349 * recv buffers to the pool. NOTE: this affects synchronization of
350 * connection shutdown. That is, the structures required for
351 * the completion of the reply handler must remain intact until
352 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 */
354static void
Chuck Leverfc664482014-05-28 10:33:25 -0400355rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400356{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400357 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400358 int rc;
359
Chuck Lever1c00dd02014-05-28 10:33:42 -0400360 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400361 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400362 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400363 __func__, rc);
364 return;
365 }
366
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400367 rc = ib_req_notify_cq(cq,
368 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
369 if (rc == 0)
370 return;
371 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400372 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
373 __func__, rc);
374 return;
375 }
376
Chuck Lever1c00dd02014-05-28 10:33:42 -0400377 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400378}
379
Chuck Levera7bc2112014-07-29 17:23:52 -0400380static void
381rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
382{
Chuck Lever5c166be2014-11-08 20:14:45 -0500383 struct ib_wc wc;
384 LIST_HEAD(sched_list);
385
386 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
387 rpcrdma_recvcq_process_wc(&wc, &sched_list);
388 if (!list_empty(&sched_list))
389 rpcrdma_schedule_tasklet(&sched_list);
390 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
391 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400392}
393
Jeff Laytonf895b252014-11-17 16:58:04 -0500394#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400395static const char * const conn[] = {
396 "address resolved",
397 "address error",
398 "route resolved",
399 "route error",
400 "connect request",
401 "connect response",
402 "connect error",
403 "unreachable",
404 "rejected",
405 "established",
406 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400407 "device removal",
408 "multicast join",
409 "multicast error",
410 "address change",
411 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400412};
Chuck Lever8079fb72014-07-29 17:26:12 -0400413
414#define CONNECTION_MSG(status) \
415 ((status) < ARRAY_SIZE(conn) ? \
416 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400417#endif
418
419static int
420rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
421{
422 struct rpcrdma_xprt *xprt = id->context;
423 struct rpcrdma_ia *ia = &xprt->rx_ia;
424 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500425#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400426 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800427#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400428 struct ib_qp_attr attr;
429 struct ib_qp_init_attr iattr;
430 int connstate = 0;
431
432 switch (event->event) {
433 case RDMA_CM_EVENT_ADDR_RESOLVED:
434 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400435 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400436 complete(&ia->ri_done);
437 break;
438 case RDMA_CM_EVENT_ADDR_ERROR:
439 ia->ri_async_rc = -EHOSTUNREACH;
440 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
441 __func__, ep);
442 complete(&ia->ri_done);
443 break;
444 case RDMA_CM_EVENT_ROUTE_ERROR:
445 ia->ri_async_rc = -ENETUNREACH;
446 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
447 __func__, ep);
448 complete(&ia->ri_done);
449 break;
450 case RDMA_CM_EVENT_ESTABLISHED:
451 connstate = 1;
452 ib_query_qp(ia->ri_id->qp, &attr,
453 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
454 &iattr);
455 dprintk("RPC: %s: %d responder resources"
456 " (%d initiator)\n",
457 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
458 goto connected;
459 case RDMA_CM_EVENT_CONNECT_ERROR:
460 connstate = -ENOTCONN;
461 goto connected;
462 case RDMA_CM_EVENT_UNREACHABLE:
463 connstate = -ENETDOWN;
464 goto connected;
465 case RDMA_CM_EVENT_REJECTED:
466 connstate = -ECONNREFUSED;
467 goto connected;
468 case RDMA_CM_EVENT_DISCONNECTED:
469 connstate = -ECONNABORTED;
470 goto connected;
471 case RDMA_CM_EVENT_DEVICE_REMOVAL:
472 connstate = -ENODEV;
473connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400474 dprintk("RPC: %s: %sconnected\n",
475 __func__, connstate > 0 ? "" : "dis");
476 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500477 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400478 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400479 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400480 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400481 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
482 __func__, &addr->sin_addr.s_addr,
483 ntohs(addr->sin_port), ep,
484 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400485 break;
486 }
487
Jeff Laytonf895b252014-11-17 16:58:04 -0500488#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400489 if (connstate == 1) {
490 int ird = attr.max_dest_rd_atomic;
491 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700492 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400493 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700494 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400495 ntohs(addr->sin_port),
496 ia->ri_id->device->name,
497 ia->ri_memreg_strategy,
498 xprt->rx_buf.rb_max_requests,
499 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
500 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700501 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
502 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400503 ntohs(addr->sin_port),
504 connstate);
505 }
506#endif
507
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400508 return 0;
509}
510
511static struct rdma_cm_id *
512rpcrdma_create_id(struct rpcrdma_xprt *xprt,
513 struct rpcrdma_ia *ia, struct sockaddr *addr)
514{
515 struct rdma_cm_id *id;
516 int rc;
517
Tom Talpey1a954052008-10-09 15:01:31 -0400518 init_completion(&ia->ri_done);
519
Sean Heftyb26f9b92010-04-01 17:08:41 +0000520 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400521 if (IS_ERR(id)) {
522 rc = PTR_ERR(id);
523 dprintk("RPC: %s: rdma_create_id() failed %i\n",
524 __func__, rc);
525 return id;
526 }
527
Tom Talpey5675add2008-10-09 15:01:41 -0400528 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400529 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
530 if (rc) {
531 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
532 __func__, rc);
533 goto out;
534 }
Tom Talpey5675add2008-10-09 15:01:41 -0400535 wait_for_completion_interruptible_timeout(&ia->ri_done,
536 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400537 rc = ia->ri_async_rc;
538 if (rc)
539 goto out;
540
Tom Talpey5675add2008-10-09 15:01:41 -0400541 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400542 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
543 if (rc) {
544 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
545 __func__, rc);
546 goto out;
547 }
Tom Talpey5675add2008-10-09 15:01:41 -0400548 wait_for_completion_interruptible_timeout(&ia->ri_done,
549 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400550 rc = ia->ri_async_rc;
551 if (rc)
552 goto out;
553
554 return id;
555
556out:
557 rdma_destroy_id(id);
558 return ERR_PTR(rc);
559}
560
561/*
562 * Drain any cq, prior to teardown.
563 */
564static void
565rpcrdma_clean_cq(struct ib_cq *cq)
566{
567 struct ib_wc wc;
568 int count = 0;
569
570 while (1 == ib_poll_cq(cq, 1, &wc))
571 ++count;
572
573 if (count)
574 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
575 __func__, count, wc.opcode);
576}
577
578/*
579 * Exported functions.
580 */
581
582/*
583 * Open and initialize an Interface Adapter.
584 * o initializes fields of struct rpcrdma_ia, including
585 * interface and provider attributes and protection zone.
586 */
587int
588rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
589{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400590 int rc, mem_priv;
591 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400592 struct rpcrdma_ia *ia = &xprt->rx_ia;
593
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400594 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
595 if (IS_ERR(ia->ri_id)) {
596 rc = PTR_ERR(ia->ri_id);
597 goto out1;
598 }
599
600 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
601 if (IS_ERR(ia->ri_pd)) {
602 rc = PTR_ERR(ia->ri_pd);
603 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
604 __func__, rc);
605 goto out2;
606 }
607
608 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400609 * Query the device to determine if the requested memory
610 * registration strategy is supported. If it isn't, set the
611 * strategy to a globally supported model.
612 */
613 rc = ib_query_device(ia->ri_id->device, &devattr);
614 if (rc) {
615 dprintk("RPC: %s: ib_query_device failed %d\n",
616 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500617 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400618 }
619
620 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
621 ia->ri_have_dma_lkey = 1;
622 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
623 }
624
Chuck Leverf10eafd2014-05-28 10:32:51 -0400625 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400626 /* Requires both frmr reg and local dma lkey */
627 if ((devattr.device_cap_flags &
628 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
629 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400630 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400631 "not supported by HCA\n", __func__);
632 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400633 } else {
634 /* Mind the ia limit on FRMR page list depth */
635 ia->ri_max_frmr_depth = min_t(unsigned int,
636 RPCRDMA_MAX_DATA_SEGS,
637 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400638 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400639 }
640 if (memreg == RPCRDMA_MTHCAFMR) {
641 if (!ia->ri_id->device->alloc_fmr) {
642 dprintk("RPC: %s: MTHCAFMR registration "
643 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400644 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400645 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400646 }
647
648 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400649 * Optionally obtain an underlying physical identity mapping in
650 * order to do a memory window-based bind. This base registration
651 * is protected from remote access - that is enabled only by binding
652 * for the specific bytes targeted during each RPC operation, and
653 * revoked after the corresponding completion similar to a storage
654 * adapter.
655 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400656 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400657 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400658 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400659 case RPCRDMA_ALLPHYSICAL:
660 mem_priv = IB_ACCESS_LOCAL_WRITE |
661 IB_ACCESS_REMOTE_WRITE |
662 IB_ACCESS_REMOTE_READ;
663 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400664 case RPCRDMA_MTHCAFMR:
665 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400666 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400667 mem_priv = IB_ACCESS_LOCAL_WRITE;
668 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400669 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
670 if (IS_ERR(ia->ri_bind_mem)) {
671 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400672 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400673 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400674 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500675 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400676 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400677 break;
678 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400679 printk(KERN_ERR "RPC: Unsupported memory "
680 "registration mode: %d\n", memreg);
681 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500682 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400683 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400684 dprintk("RPC: %s: memory registration strategy is %d\n",
685 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400686
687 /* Else will do memory reg/dereg for each chunk */
688 ia->ri_memreg_strategy = memreg;
689
Chuck Lever73806c82014-07-29 17:23:25 -0400690 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400691 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500692
693out3:
694 ib_dealloc_pd(ia->ri_pd);
695 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400696out2:
697 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400698 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400699out1:
700 return rc;
701}
702
703/*
704 * Clean up/close an IA.
705 * o if event handles and PD have been initialized, free them.
706 * o close the IA
707 */
708void
709rpcrdma_ia_close(struct rpcrdma_ia *ia)
710{
711 int rc;
712
713 dprintk("RPC: %s: entering\n", __func__);
714 if (ia->ri_bind_mem != NULL) {
715 rc = ib_dereg_mr(ia->ri_bind_mem);
716 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
717 __func__, rc);
718 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400719 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
720 if (ia->ri_id->qp)
721 rdma_destroy_qp(ia->ri_id);
722 rdma_destroy_id(ia->ri_id);
723 ia->ri_id = NULL;
724 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400725 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
726 rc = ib_dealloc_pd(ia->ri_pd);
727 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
728 __func__, rc);
729 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400730}
731
732/*
733 * Create unconnected endpoint.
734 */
735int
736rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
737 struct rpcrdma_create_data_internal *cdata)
738{
739 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400740 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400741 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400742
743 rc = ib_query_device(ia->ri_id->device, &devattr);
744 if (rc) {
745 dprintk("RPC: %s: ib_query_device failed %d\n",
746 __func__, rc);
747 return rc;
748 }
749
750 /* check provider's send/recv wr limits */
751 if (cdata->max_requests > devattr.max_qp_wr)
752 cdata->max_requests = devattr.max_qp_wr;
753
754 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
755 ep->rep_attr.qp_context = ep;
756 /* send_cq and recv_cq initialized below */
757 ep->rep_attr.srq = NULL;
758 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
759 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400760 case RPCRDMA_FRMR: {
761 int depth = 7;
762
Tom Tucker15cdc6442010-08-11 12:47:24 -0400763 /* Add room for frmr register and invalidate WRs.
764 * 1. FRMR reg WR for head
765 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400766 * 3. N FRMR reg WRs for pagelist
767 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400768 * 5. FRMR reg WR for tail
769 * 6. FRMR invalidate WR for tail
770 * 7. The RDMA_SEND WR
771 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400772
773 /* Calculate N if the device max FRMR depth is smaller than
774 * RPCRDMA_MAX_DATA_SEGS.
775 */
776 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
777 int delta = RPCRDMA_MAX_DATA_SEGS -
778 ia->ri_max_frmr_depth;
779
780 do {
781 depth += 2; /* FRMR reg + invalidate */
782 delta -= ia->ri_max_frmr_depth;
783 } while (delta > 0);
784
785 }
786 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400787 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400788 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400789 if (!cdata->max_requests)
790 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400791 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
792 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400793 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400794 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400795 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400796 default:
797 break;
798 }
799 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
800 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
801 ep->rep_attr.cap.max_recv_sge = 1;
802 ep->rep_attr.cap.max_inline_data = 0;
803 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
804 ep->rep_attr.qp_type = IB_QPT_RC;
805 ep->rep_attr.port_num = ~0;
806
807 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
808 "iovs: send %d recv %d\n",
809 __func__,
810 ep->rep_attr.cap.max_send_wr,
811 ep->rep_attr.cap.max_recv_wr,
812 ep->rep_attr.cap.max_send_sge,
813 ep->rep_attr.cap.max_recv_sge);
814
815 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400816 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500817 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
818 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
819 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400820 ep->rep_cqinit = 0;
821 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400822 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400823 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400824
Chuck Leverfc664482014-05-28 10:33:25 -0400825 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400826 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400827 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400828 if (IS_ERR(sendcq)) {
829 rc = PTR_ERR(sendcq);
830 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400831 __func__, rc);
832 goto out1;
833 }
834
Chuck Leverfc664482014-05-28 10:33:25 -0400835 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400836 if (rc) {
837 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
838 __func__, rc);
839 goto out2;
840 }
841
Chuck Leverfc664482014-05-28 10:33:25 -0400842 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400843 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400844 ep->rep_attr.cap.max_recv_wr + 1, 0);
845 if (IS_ERR(recvcq)) {
846 rc = PTR_ERR(recvcq);
847 dprintk("RPC: %s: failed to create recv CQ: %i\n",
848 __func__, rc);
849 goto out2;
850 }
851
852 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
853 if (rc) {
854 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
855 __func__, rc);
856 ib_destroy_cq(recvcq);
857 goto out2;
858 }
859
860 ep->rep_attr.send_cq = sendcq;
861 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400862
863 /* Initialize cma parameters */
864
865 /* RPC/RDMA does not use private data */
866 ep->rep_remote_cma.private_data = NULL;
867 ep->rep_remote_cma.private_data_len = 0;
868
869 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400870 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400871 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400872 ep->rep_remote_cma.responder_resources = 32;
873 else
874 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400875
876 ep->rep_remote_cma.retry_count = 7;
877 ep->rep_remote_cma.flow_control = 0;
878 ep->rep_remote_cma.rnr_retry_count = 0;
879
880 return 0;
881
882out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400883 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400884 if (err)
885 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
886 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400887out1:
888 return rc;
889}
890
891/*
892 * rpcrdma_ep_destroy
893 *
894 * Disconnect and destroy endpoint. After this, the only
895 * valid operations on the ep are to free it (if dynamically
896 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400897 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400898void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400899rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
900{
901 int rc;
902
903 dprintk("RPC: %s: entering, connected is %d\n",
904 __func__, ep->rep_connected);
905
Chuck Lever254f91e2014-05-28 10:32:17 -0400906 cancel_delayed_work_sync(&ep->rep_connect_worker);
907
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400908 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400909 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400910 rdma_destroy_qp(ia->ri_id);
911 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400912 }
913
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400914 /* padding - could be done in rpcrdma_buffer_destroy... */
915 if (ep->rep_pad_mr) {
916 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
917 ep->rep_pad_mr = NULL;
918 }
919
Chuck Leverfc664482014-05-28 10:33:25 -0400920 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
921 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
922 if (rc)
923 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
924 __func__, rc);
925
926 rpcrdma_clean_cq(ep->rep_attr.send_cq);
927 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400928 if (rc)
929 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
930 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400931}
932
933/*
934 * Connect unconnected endpoint.
935 */
936int
937rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
938{
Chuck Lever73806c82014-07-29 17:23:25 -0400939 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400940 int rc = 0;
941 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400942
Tom Talpeyc0555512008-10-10 11:32:45 -0400943 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400944 struct rpcrdma_xprt *xprt;
945retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400946 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400947
948 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400949 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400950
Chuck Lever467c9672014-11-08 20:14:29 -0500951 switch (ia->ri_memreg_strategy) {
952 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400953 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500954 break;
955 case RPCRDMA_MTHCAFMR:
956 rpcrdma_reset_fmrs(ia);
957 break;
958 case RPCRDMA_ALLPHYSICAL:
959 break;
960 default:
961 rc = -EIO;
962 goto out;
963 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400964
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400965 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
966 id = rpcrdma_create_id(xprt, ia,
967 (struct sockaddr *)&xprt->rx_data.addr);
968 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400969 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400970 goto out;
971 }
972 /* TEMP TEMP TEMP - fail if new device:
973 * Deregister/remarshal *all* requests!
974 * Close and recreate adapter, pd, etc!
975 * Re-determine all attributes still sane!
976 * More stuff I haven't thought of!
977 * Rrrgh!
978 */
979 if (ia->ri_id->device != id->device) {
980 printk("RPC: %s: can't reconnect on "
981 "different device!\n", __func__);
982 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400983 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400984 goto out;
985 }
986 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400987 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
988 if (rc) {
989 dprintk("RPC: %s: rdma_create_qp failed %i\n",
990 __func__, rc);
991 rdma_destroy_id(id);
992 rc = -ENETUNREACH;
993 goto out;
994 }
Chuck Lever73806c82014-07-29 17:23:25 -0400995
996 write_lock(&ia->ri_qplock);
997 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400998 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400999 write_unlock(&ia->ri_qplock);
1000
1001 rdma_destroy_qp(old);
1002 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -04001003 } else {
1004 dprintk("RPC: %s: connecting...\n", __func__);
1005 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
1006 if (rc) {
1007 dprintk("RPC: %s: rdma_create_qp failed %i\n",
1008 __func__, rc);
1009 /* do not update ep->rep_connected */
1010 return -ENETUNREACH;
1011 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001012 }
1013
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001014 ep->rep_connected = 0;
1015
1016 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
1017 if (rc) {
1018 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1019 __func__, rc);
1020 goto out;
1021 }
1022
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001023 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1024
1025 /*
1026 * Check state. A non-peer reject indicates no listener
1027 * (ECONNREFUSED), which may be a transient state. All
1028 * others indicate a transport condition which has already
1029 * undergone a best-effort.
1030 */
Joe Perchesf64f9e72009-11-29 16:55:45 -08001031 if (ep->rep_connected == -ECONNREFUSED &&
1032 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001033 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1034 goto retry;
1035 }
1036 if (ep->rep_connected <= 0) {
1037 /* Sometimes, the only way to reliably connect to remote
1038 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001039 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1040 (ep->rep_remote_cma.responder_resources == 0 ||
1041 ep->rep_remote_cma.initiator_depth !=
1042 ep->rep_remote_cma.responder_resources)) {
1043 if (ep->rep_remote_cma.responder_resources == 0)
1044 ep->rep_remote_cma.responder_resources = 1;
1045 ep->rep_remote_cma.initiator_depth =
1046 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001047 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001048 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001049 rc = ep->rep_connected;
1050 } else {
1051 dprintk("RPC: %s: connected\n", __func__);
1052 }
1053
1054out:
1055 if (rc)
1056 ep->rep_connected = rc;
1057 return rc;
1058}
1059
1060/*
1061 * rpcrdma_ep_disconnect
1062 *
1063 * This is separate from destroy to facilitate the ability
1064 * to reconnect without recreating the endpoint.
1065 *
1066 * This call is not reentrant, and must not be made in parallel
1067 * on the same endpoint.
1068 */
Chuck Lever282191c2014-07-29 17:25:55 -04001069void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001070rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1071{
1072 int rc;
1073
Chuck Levera7bc2112014-07-29 17:23:52 -04001074 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001075 rc = rdma_disconnect(ia->ri_id);
1076 if (!rc) {
1077 /* returns without wait if not connected */
1078 wait_event_interruptible(ep->rep_connect_wait,
1079 ep->rep_connected != 1);
1080 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1081 (ep->rep_connected == 1) ? "still " : "dis");
1082 } else {
1083 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1084 ep->rep_connected = rc;
1085 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001086}
1087
Chuck Lever2e845222014-07-29 17:25:38 -04001088static int
1089rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1090{
1091 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1092 struct ib_fmr_attr fmr_attr = {
1093 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1094 .max_maps = 1,
1095 .page_shift = PAGE_SHIFT
1096 };
1097 struct rpcrdma_mw *r;
1098 int i, rc;
1099
1100 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1101 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1102
1103 while (i--) {
1104 r = kzalloc(sizeof(*r), GFP_KERNEL);
1105 if (r == NULL)
1106 return -ENOMEM;
1107
1108 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1109 if (IS_ERR(r->r.fmr)) {
1110 rc = PTR_ERR(r->r.fmr);
1111 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1112 __func__, rc);
1113 goto out_free;
1114 }
1115
1116 list_add(&r->mw_list, &buf->rb_mws);
1117 list_add(&r->mw_all, &buf->rb_all);
1118 }
1119 return 0;
1120
1121out_free:
1122 kfree(r);
1123 return rc;
1124}
1125
1126static int
1127rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1128{
1129 struct rpcrdma_frmr *f;
1130 struct rpcrdma_mw *r;
1131 int i, rc;
1132
1133 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1134 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1135
1136 while (i--) {
1137 r = kzalloc(sizeof(*r), GFP_KERNEL);
1138 if (r == NULL)
1139 return -ENOMEM;
1140 f = &r->r.frmr;
1141
1142 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1143 ia->ri_max_frmr_depth);
1144 if (IS_ERR(f->fr_mr)) {
1145 rc = PTR_ERR(f->fr_mr);
1146 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1147 "failed %i\n", __func__, rc);
1148 goto out_free;
1149 }
1150
1151 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1152 ia->ri_max_frmr_depth);
1153 if (IS_ERR(f->fr_pgl)) {
1154 rc = PTR_ERR(f->fr_pgl);
1155 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1156 "failed %i\n", __func__, rc);
1157
1158 ib_dereg_mr(f->fr_mr);
1159 goto out_free;
1160 }
1161
1162 list_add(&r->mw_list, &buf->rb_mws);
1163 list_add(&r->mw_all, &buf->rb_all);
1164 }
1165
1166 return 0;
1167
1168out_free:
1169 kfree(r);
1170 return rc;
1171}
1172
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001173int
1174rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1175 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1176{
1177 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001178 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001179 int i, rc;
1180
1181 buf->rb_max_requests = cdata->max_requests;
1182 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001183
1184 /* Need to allocate:
1185 * 1. arrays for send and recv pointers
1186 * 2. arrays of struct rpcrdma_req to fill in pointers
1187 * 3. array of struct rpcrdma_rep for replies
1188 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001189 * Send/recv buffers in req/rep need to be registered
1190 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001191 len = buf->rb_max_requests *
1192 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1193 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001194
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001195 p = kzalloc(len, GFP_KERNEL);
1196 if (p == NULL) {
1197 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1198 __func__, len);
1199 rc = -ENOMEM;
1200 goto out;
1201 }
1202 buf->rb_pool = p; /* for freeing it later */
1203
1204 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1205 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1206 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1207 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1208
1209 /*
1210 * Register the zeroed pad buffer, if any.
1211 */
1212 if (cdata->padding) {
1213 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1214 &ep->rep_pad_mr, &ep->rep_pad);
1215 if (rc)
1216 goto out;
1217 }
1218 p += cdata->padding;
1219
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001220 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001221 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001222 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001223 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001224 rc = rpcrdma_init_frmrs(ia, buf);
1225 if (rc)
1226 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001227 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001228 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001229 rc = rpcrdma_init_fmrs(ia, buf);
1230 if (rc)
1231 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001232 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001233 default:
1234 break;
1235 }
1236
1237 /*
1238 * Allocate/init the request/reply buffers. Doing this
1239 * using kmalloc for now -- one for each buf.
1240 */
Chuck Lever65866f82014-05-28 10:33:59 -04001241 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1242 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1243 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1244 __func__, wlen, rlen);
1245
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001246 for (i = 0; i < buf->rb_max_requests; i++) {
1247 struct rpcrdma_req *req;
1248 struct rpcrdma_rep *rep;
1249
Chuck Lever65866f82014-05-28 10:33:59 -04001250 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001251 if (req == NULL) {
1252 dprintk("RPC: %s: request buffer %d alloc"
1253 " failed\n", __func__, i);
1254 rc = -ENOMEM;
1255 goto out;
1256 }
1257 memset(req, 0, sizeof(struct rpcrdma_req));
1258 buf->rb_send_bufs[i] = req;
1259 buf->rb_send_bufs[i]->rl_buffer = buf;
1260
1261 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001262 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001263 &buf->rb_send_bufs[i]->rl_handle,
1264 &buf->rb_send_bufs[i]->rl_iov);
1265 if (rc)
1266 goto out;
1267
Chuck Lever65866f82014-05-28 10:33:59 -04001268 buf->rb_send_bufs[i]->rl_size = wlen -
1269 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001270
Chuck Lever65866f82014-05-28 10:33:59 -04001271 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001272 if (rep == NULL) {
1273 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1274 __func__, i);
1275 rc = -ENOMEM;
1276 goto out;
1277 }
1278 memset(rep, 0, sizeof(struct rpcrdma_rep));
1279 buf->rb_recv_bufs[i] = rep;
1280 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001281
1282 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001283 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001284 &buf->rb_recv_bufs[i]->rr_handle,
1285 &buf->rb_recv_bufs[i]->rr_iov);
1286 if (rc)
1287 goto out;
1288
1289 }
1290 dprintk("RPC: %s: max_requests %d\n",
1291 __func__, buf->rb_max_requests);
1292 /* done */
1293 return 0;
1294out:
1295 rpcrdma_buffer_destroy(buf);
1296 return rc;
1297}
1298
Chuck Lever2e845222014-07-29 17:25:38 -04001299static void
1300rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1301{
1302 struct rpcrdma_mw *r;
1303 int rc;
1304
1305 while (!list_empty(&buf->rb_all)) {
1306 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1307 list_del(&r->mw_all);
1308 list_del(&r->mw_list);
1309
1310 rc = ib_dealloc_fmr(r->r.fmr);
1311 if (rc)
1312 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1313 __func__, rc);
1314
1315 kfree(r);
1316 }
1317}
1318
1319static void
1320rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1321{
1322 struct rpcrdma_mw *r;
1323 int rc;
1324
1325 while (!list_empty(&buf->rb_all)) {
1326 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1327 list_del(&r->mw_all);
1328 list_del(&r->mw_list);
1329
1330 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1331 if (rc)
1332 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1333 __func__, rc);
1334 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1335
1336 kfree(r);
1337 }
1338}
1339
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001340void
1341rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1342{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001343 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001344 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001345
1346 /* clean up in reverse order from create
1347 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001348 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001349 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001350 */
1351 dprintk("RPC: %s: entering\n", __func__);
1352
1353 for (i = 0; i < buf->rb_max_requests; i++) {
1354 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1355 rpcrdma_deregister_internal(ia,
1356 buf->rb_recv_bufs[i]->rr_handle,
1357 &buf->rb_recv_bufs[i]->rr_iov);
1358 kfree(buf->rb_recv_bufs[i]);
1359 }
1360 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001361 rpcrdma_deregister_internal(ia,
1362 buf->rb_send_bufs[i]->rl_handle,
1363 &buf->rb_send_bufs[i]->rl_iov);
1364 kfree(buf->rb_send_bufs[i]);
1365 }
1366 }
1367
Chuck Lever2e845222014-07-29 17:25:38 -04001368 switch (ia->ri_memreg_strategy) {
1369 case RPCRDMA_FRMR:
1370 rpcrdma_destroy_frmrs(buf);
1371 break;
1372 case RPCRDMA_MTHCAFMR:
1373 rpcrdma_destroy_fmrs(buf);
1374 break;
1375 default:
1376 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001377 }
1378
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001379 kfree(buf->rb_pool);
1380}
1381
Chuck Lever467c9672014-11-08 20:14:29 -05001382/* After a disconnect, unmap all FMRs.
1383 *
1384 * This is invoked only in the transport connect worker in order
1385 * to serialize with rpcrdma_register_fmr_external().
1386 */
1387static void
1388rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1389{
1390 struct rpcrdma_xprt *r_xprt =
1391 container_of(ia, struct rpcrdma_xprt, rx_ia);
1392 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1393 struct list_head *pos;
1394 struct rpcrdma_mw *r;
1395 LIST_HEAD(l);
1396 int rc;
1397
1398 list_for_each(pos, &buf->rb_all) {
1399 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1400
1401 INIT_LIST_HEAD(&l);
1402 list_add(&r->r.fmr->list, &l);
1403 rc = ib_unmap_fmr(&l);
1404 if (rc)
1405 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1406 __func__, rc);
1407 }
1408}
1409
Chuck Lever9f9d8022014-07-29 17:24:45 -04001410/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1411 * an unusable state. Find FRMRs in this state and dereg / reg
1412 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1413 * also torn down.
1414 *
1415 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1416 *
1417 * This is invoked only in the transport connect worker in order
1418 * to serialize with rpcrdma_register_frmr_external().
1419 */
1420static void
1421rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1422{
1423 struct rpcrdma_xprt *r_xprt =
1424 container_of(ia, struct rpcrdma_xprt, rx_ia);
1425 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1426 struct list_head *pos;
1427 struct rpcrdma_mw *r;
1428 int rc;
1429
1430 list_for_each(pos, &buf->rb_all) {
1431 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1432
1433 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1434 continue;
1435
1436 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1437 if (rc)
1438 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1439 __func__, rc);
1440 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1441
1442 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1443 ia->ri_max_frmr_depth);
1444 if (IS_ERR(r->r.frmr.fr_mr)) {
1445 rc = PTR_ERR(r->r.frmr.fr_mr);
1446 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1447 " failed %i\n", __func__, rc);
1448 continue;
1449 }
1450 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1451 ia->ri_id->device,
1452 ia->ri_max_frmr_depth);
1453 if (IS_ERR(r->r.frmr.fr_pgl)) {
1454 rc = PTR_ERR(r->r.frmr.fr_pgl);
1455 dprintk("RPC: %s: "
1456 "ib_alloc_fast_reg_page_list "
1457 "failed %i\n", __func__, rc);
1458
1459 ib_dereg_mr(r->r.frmr.fr_mr);
1460 continue;
1461 }
1462 r->r.frmr.fr_state = FRMR_IS_INVALID;
1463 }
1464}
1465
Chuck Leverc2922c02014-07-29 17:24:36 -04001466/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1467 * some req segments uninitialized.
1468 */
1469static void
1470rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1471{
1472 if (*mw) {
1473 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1474 *mw = NULL;
1475 }
1476}
1477
1478/* Cycle mw's back in reverse order, and "spin" them.
1479 * This delays and scrambles reuse as much as possible.
1480 */
1481static void
1482rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1483{
1484 struct rpcrdma_mr_seg *seg = req->rl_segments;
1485 struct rpcrdma_mr_seg *seg1 = seg;
1486 int i;
1487
1488 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001489 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1490 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001491}
1492
1493static void
1494rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1495{
1496 buf->rb_send_bufs[--buf->rb_send_index] = req;
1497 req->rl_niovs = 0;
1498 if (req->rl_reply) {
1499 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1500 req->rl_reply->rr_func = NULL;
1501 req->rl_reply = NULL;
1502 }
1503}
1504
Chuck Leverddb6beb2014-07-29 17:24:54 -04001505/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1506 * Redo only the ib_post_send().
1507 */
1508static void
1509rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1510{
1511 struct rpcrdma_xprt *r_xprt =
1512 container_of(ia, struct rpcrdma_xprt, rx_ia);
1513 struct ib_send_wr invalidate_wr, *bad_wr;
1514 int rc;
1515
1516 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1517
1518 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001519 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001520
1521 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1522 invalidate_wr.wr_id = (unsigned long)(void *)r;
1523 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001524 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1525 DECR_CQCOUNT(&r_xprt->rx_ep);
1526
1527 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1528 __func__, r, r->r.frmr.fr_mr->rkey);
1529
1530 read_lock(&ia->ri_qplock);
1531 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1532 read_unlock(&ia->ri_qplock);
1533 if (rc) {
1534 /* Force rpcrdma_buffer_get() to retry */
1535 r->r.frmr.fr_state = FRMR_IS_STALE;
1536 dprintk("RPC: %s: ib_post_send failed, %i\n",
1537 __func__, rc);
1538 }
1539}
1540
1541static void
1542rpcrdma_retry_flushed_linv(struct list_head *stale,
1543 struct rpcrdma_buffer *buf)
1544{
1545 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1546 struct list_head *pos;
1547 struct rpcrdma_mw *r;
1548 unsigned long flags;
1549
1550 list_for_each(pos, stale) {
1551 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1552 rpcrdma_retry_local_inv(r, ia);
1553 }
1554
1555 spin_lock_irqsave(&buf->rb_lock, flags);
1556 list_splice_tail(stale, &buf->rb_mws);
1557 spin_unlock_irqrestore(&buf->rb_lock, flags);
1558}
1559
Chuck Leverc2922c02014-07-29 17:24:36 -04001560static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001561rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1562 struct list_head *stale)
1563{
1564 struct rpcrdma_mw *r;
1565 int i;
1566
1567 i = RPCRDMA_MAX_SEGS - 1;
1568 while (!list_empty(&buf->rb_mws)) {
1569 r = list_entry(buf->rb_mws.next,
1570 struct rpcrdma_mw, mw_list);
1571 list_del(&r->mw_list);
1572 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1573 list_add(&r->mw_list, stale);
1574 continue;
1575 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001576 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001577 if (unlikely(i-- == 0))
1578 return req; /* Success */
1579 }
1580
1581 /* Not enough entries on rb_mws for this req */
1582 rpcrdma_buffer_put_sendbuf(req, buf);
1583 rpcrdma_buffer_put_mrs(req, buf);
1584 return NULL;
1585}
1586
1587static struct rpcrdma_req *
1588rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001589{
1590 struct rpcrdma_mw *r;
1591 int i;
1592
1593 i = RPCRDMA_MAX_SEGS - 1;
1594 while (!list_empty(&buf->rb_mws)) {
1595 r = list_entry(buf->rb_mws.next,
1596 struct rpcrdma_mw, mw_list);
1597 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001598 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001599 if (unlikely(i-- == 0))
1600 return req; /* Success */
1601 }
1602
1603 /* Not enough entries on rb_mws for this req */
1604 rpcrdma_buffer_put_sendbuf(req, buf);
1605 rpcrdma_buffer_put_mrs(req, buf);
1606 return NULL;
1607}
1608
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001609/*
1610 * Get a set of request/reply buffers.
1611 *
1612 * Reply buffer (if needed) is attached to send buffer upon return.
1613 * Rule:
1614 * rb_send_index and rb_recv_index MUST always be pointing to the
1615 * *next* available buffer (non-NULL). They are incremented after
1616 * removing buffers, and decremented *before* returning them.
1617 */
1618struct rpcrdma_req *
1619rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1620{
Chuck Leverc2922c02014-07-29 17:24:36 -04001621 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001622 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001623 struct rpcrdma_req *req;
1624 unsigned long flags;
1625
1626 spin_lock_irqsave(&buffers->rb_lock, flags);
1627 if (buffers->rb_send_index == buffers->rb_max_requests) {
1628 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1629 dprintk("RPC: %s: out of request buffers\n", __func__);
1630 return ((struct rpcrdma_req *)NULL);
1631 }
1632
1633 req = buffers->rb_send_bufs[buffers->rb_send_index];
1634 if (buffers->rb_send_index < buffers->rb_recv_index) {
1635 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1636 __func__,
1637 buffers->rb_recv_index - buffers->rb_send_index);
1638 req->rl_reply = NULL;
1639 } else {
1640 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1641 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1642 }
1643 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001644
1645 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001646 switch (ia->ri_memreg_strategy) {
1647 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001648 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1649 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001650 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001651 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001652 break;
1653 default:
1654 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001655 }
1656 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001657 if (!list_empty(&stale))
1658 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001659 return req;
1660}
1661
1662/*
1663 * Put request/reply buffers back into pool.
1664 * Pre-decrement counter/array index.
1665 */
1666void
1667rpcrdma_buffer_put(struct rpcrdma_req *req)
1668{
1669 struct rpcrdma_buffer *buffers = req->rl_buffer;
1670 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001671 unsigned long flags;
1672
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001673 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001674 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001675 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001676 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001677 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001678 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001679 break;
1680 default:
1681 break;
1682 }
1683 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1684}
1685
1686/*
1687 * Recover reply buffers from pool.
1688 * This happens when recovering from error conditions.
1689 * Post-increment counter/array index.
1690 */
1691void
1692rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1693{
1694 struct rpcrdma_buffer *buffers = req->rl_buffer;
1695 unsigned long flags;
1696
1697 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1698 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1699 spin_lock_irqsave(&buffers->rb_lock, flags);
1700 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1701 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1702 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1703 }
1704 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1705}
1706
1707/*
1708 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001709 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001710 */
1711void
1712rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1713{
1714 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1715 unsigned long flags;
1716
1717 rep->rr_func = NULL;
1718 spin_lock_irqsave(&buffers->rb_lock, flags);
1719 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1720 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1721}
1722
1723/*
1724 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1725 */
1726
1727int
1728rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1729 struct ib_mr **mrp, struct ib_sge *iov)
1730{
1731 struct ib_phys_buf ipb;
1732 struct ib_mr *mr;
1733 int rc;
1734
1735 /*
1736 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1737 */
1738 iov->addr = ib_dma_map_single(ia->ri_id->device,
1739 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001740 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1741 return -ENOMEM;
1742
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001743 iov->length = len;
1744
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001745 if (ia->ri_have_dma_lkey) {
1746 *mrp = NULL;
1747 iov->lkey = ia->ri_dma_lkey;
1748 return 0;
1749 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001750 *mrp = NULL;
1751 iov->lkey = ia->ri_bind_mem->lkey;
1752 return 0;
1753 }
1754
1755 ipb.addr = iov->addr;
1756 ipb.size = iov->length;
1757 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1758 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1759
1760 dprintk("RPC: %s: phys convert: 0x%llx "
1761 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001762 __func__, (unsigned long long)ipb.addr,
1763 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001764
1765 if (IS_ERR(mr)) {
1766 *mrp = NULL;
1767 rc = PTR_ERR(mr);
1768 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1769 } else {
1770 *mrp = mr;
1771 iov->lkey = mr->lkey;
1772 rc = 0;
1773 }
1774
1775 return rc;
1776}
1777
1778int
1779rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1780 struct ib_mr *mr, struct ib_sge *iov)
1781{
1782 int rc;
1783
1784 ib_dma_unmap_single(ia->ri_id->device,
1785 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1786
1787 if (NULL == mr)
1788 return 0;
1789
1790 rc = ib_dereg_mr(mr);
1791 if (rc)
1792 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1793 return rc;
1794}
1795
1796/*
1797 * Wrappers for chunk registration, shared by read/write chunk code.
1798 */
1799
1800static void
1801rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1802{
1803 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1804 seg->mr_dmalen = seg->mr_len;
1805 if (seg->mr_page)
1806 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1807 seg->mr_page, offset_in_page(seg->mr_offset),
1808 seg->mr_dmalen, seg->mr_dir);
1809 else
1810 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1811 seg->mr_offset,
1812 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001813 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1814 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1815 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001816 (unsigned long long)seg->mr_dma,
1817 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001818 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001819}
1820
1821static void
1822rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1823{
1824 if (seg->mr_page)
1825 ib_dma_unmap_page(ia->ri_id->device,
1826 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1827 else
1828 ib_dma_unmap_single(ia->ri_id->device,
1829 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1830}
1831
Tom Talpey8d4ba032008-10-09 14:59:49 -04001832static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001833rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1834 int *nsegs, int writing, struct rpcrdma_ia *ia,
1835 struct rpcrdma_xprt *r_xprt)
1836{
1837 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever3eb35812015-01-21 11:02:54 -05001838 struct rpcrdma_mw *mw = seg1->rl_mw;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001839 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1840 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001841 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001842 u8 key;
1843 int len, pageoff;
1844 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001845 int seg_len;
1846 u64 pa;
1847 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001848
1849 pageoff = offset_in_page(seg1->mr_offset);
1850 seg1->mr_offset -= pageoff; /* start of page */
1851 seg1->mr_len += pageoff;
1852 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001853 if (*nsegs > ia->ri_max_frmr_depth)
1854 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001855 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001856 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001857 pa = seg->mr_dma;
1858 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001859 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001860 pa += PAGE_SIZE;
1861 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001862 len += seg->mr_len;
1863 ++seg;
1864 ++i;
1865 /* Check for holes */
1866 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1867 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1868 break;
1869 }
1870 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001871 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001872
Chuck Lever05055722014-07-29 17:25:12 -04001873 frmr->fr_state = FRMR_IS_VALID;
1874
Chuck Leverf590e872014-07-29 17:25:29 -04001875 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1876 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1877 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1878 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1879 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1880 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1881 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1882 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1883 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001884 rc = -EIO;
1885 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001886 }
1887
1888 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001889 key = (u8)(mr->rkey & 0x000000FF);
1890 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001891
Chuck Leverf590e872014-07-29 17:25:29 -04001892 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001893 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1894 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001895 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001896 DECR_CQCOUNT(&r_xprt->rx_ep);
1897
Chuck Leverf590e872014-07-29 17:25:29 -04001898 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001899 if (rc) {
1900 dprintk("RPC: %s: failed ib_post_send for register,"
1901 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001902 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001903 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001904 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001905 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001906 seg1->mr_base = seg1->mr_dma + pageoff;
1907 seg1->mr_nsegs = i;
1908 seg1->mr_len = len;
1909 }
1910 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001911 return 0;
1912out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001913 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001914 while (i--)
1915 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001916 return rc;
1917}
1918
1919static int
1920rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1921 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1922{
1923 struct rpcrdma_mr_seg *seg1 = seg;
1924 struct ib_send_wr invalidate_wr, *bad_wr;
1925 int rc;
1926
Chuck Lever3eb35812015-01-21 11:02:54 -05001927 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001928
Tom Talpey3197d3092008-10-09 15:00:20 -04001929 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Chuck Lever3eb35812015-01-21 11:02:54 -05001930 invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001931 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Lever3eb35812015-01-21 11:02:54 -05001932 invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001933 DECR_CQCOUNT(&r_xprt->rx_ep);
1934
Chuck Lever73806c82014-07-29 17:23:25 -04001935 read_lock(&ia->ri_qplock);
1936 while (seg1->mr_nsegs--)
1937 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001938 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001939 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001940 if (rc) {
1941 /* Force rpcrdma_buffer_get() to retry */
Chuck Lever3eb35812015-01-21 11:02:54 -05001942 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001943 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1944 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001945 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001946 return rc;
1947}
1948
1949static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001950rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1951 int *nsegs, int writing, struct rpcrdma_ia *ia)
1952{
1953 struct rpcrdma_mr_seg *seg1 = seg;
1954 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1955 int len, pageoff, i, rc;
1956
1957 pageoff = offset_in_page(seg1->mr_offset);
1958 seg1->mr_offset -= pageoff; /* start of page */
1959 seg1->mr_len += pageoff;
1960 len = -pageoff;
1961 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1962 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1963 for (i = 0; i < *nsegs;) {
1964 rpcrdma_map_one(ia, seg, writing);
1965 physaddrs[i] = seg->mr_dma;
1966 len += seg->mr_len;
1967 ++seg;
1968 ++i;
1969 /* Check for holes */
1970 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1971 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1972 break;
1973 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001974 rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001975 if (rc) {
1976 dprintk("RPC: %s: failed ib_map_phys_fmr "
1977 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1978 len, (unsigned long long)seg1->mr_dma,
1979 pageoff, i, rc);
1980 while (i--)
1981 rpcrdma_unmap_one(ia, --seg);
1982 } else {
Chuck Lever3eb35812015-01-21 11:02:54 -05001983 seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001984 seg1->mr_base = seg1->mr_dma + pageoff;
1985 seg1->mr_nsegs = i;
1986 seg1->mr_len = len;
1987 }
1988 *nsegs = i;
1989 return rc;
1990}
1991
1992static int
1993rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1994 struct rpcrdma_ia *ia)
1995{
1996 struct rpcrdma_mr_seg *seg1 = seg;
1997 LIST_HEAD(l);
1998 int rc;
1999
Chuck Lever3eb35812015-01-21 11:02:54 -05002000 list_add(&seg1->rl_mw->r.fmr->list, &l);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002001 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04002002 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002003 while (seg1->mr_nsegs--)
2004 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04002005 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002006 if (rc)
2007 dprintk("RPC: %s: failed ib_unmap_fmr,"
2008 " status %i\n", __func__, rc);
2009 return rc;
2010}
2011
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002012int
2013rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
2014 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
2015{
2016 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002017 int rc = 0;
2018
2019 switch (ia->ri_memreg_strategy) {
2020
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002021 case RPCRDMA_ALLPHYSICAL:
2022 rpcrdma_map_one(ia, seg, writing);
2023 seg->mr_rkey = ia->ri_bind_mem->rkey;
2024 seg->mr_base = seg->mr_dma;
2025 seg->mr_nsegs = 1;
2026 nsegs = 1;
2027 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002028
Tom Talpey3197d3092008-10-09 15:00:20 -04002029 /* Registration using frmr registration */
2030 case RPCRDMA_FRMR:
2031 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
2032 break;
2033
Tom Talpey8d4ba032008-10-09 14:59:49 -04002034 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002035 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002036 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002037 break;
2038
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002039 default:
Chuck Lever92b98362014-11-08 20:14:12 -05002040 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002041 }
2042 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05002043 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002044
2045 return nsegs;
2046}
2047
2048int
2049rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04002050 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002051{
2052 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002053 int nsegs = seg->mr_nsegs, rc;
2054
2055 switch (ia->ri_memreg_strategy) {
2056
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002057 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04002058 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002059 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04002060 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002061 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002062
Tom Talpey3197d3092008-10-09 15:00:20 -04002063 case RPCRDMA_FRMR:
2064 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2065 break;
2066
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002067 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002068 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002069 break;
2070
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002071 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002072 break;
2073 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002074 return nsegs;
2075}
2076
2077/*
2078 * Prepost any receive buffer, then post send.
2079 *
2080 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2081 */
2082int
2083rpcrdma_ep_post(struct rpcrdma_ia *ia,
2084 struct rpcrdma_ep *ep,
2085 struct rpcrdma_req *req)
2086{
2087 struct ib_send_wr send_wr, *send_wr_fail;
2088 struct rpcrdma_rep *rep = req->rl_reply;
2089 int rc;
2090
2091 if (rep) {
2092 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2093 if (rc)
2094 goto out;
2095 req->rl_reply = NULL;
2096 }
2097
2098 send_wr.next = NULL;
2099 send_wr.wr_id = 0ULL; /* no send cookie */
2100 send_wr.sg_list = req->rl_send_iov;
2101 send_wr.num_sge = req->rl_niovs;
2102 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002103 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2104 ib_dma_sync_single_for_device(ia->ri_id->device,
2105 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2106 DMA_TO_DEVICE);
2107 ib_dma_sync_single_for_device(ia->ri_id->device,
2108 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2109 DMA_TO_DEVICE);
2110 ib_dma_sync_single_for_device(ia->ri_id->device,
2111 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2112 DMA_TO_DEVICE);
2113
2114 if (DECR_CQCOUNT(ep) > 0)
2115 send_wr.send_flags = 0;
2116 else { /* Provider must take a send completion every now and then */
2117 INIT_CQCOUNT(ep);
2118 send_wr.send_flags = IB_SEND_SIGNALED;
2119 }
2120
2121 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2122 if (rc)
2123 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2124 rc);
2125out:
2126 return rc;
2127}
2128
2129/*
2130 * (Re)post a receive buffer.
2131 */
2132int
2133rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2134 struct rpcrdma_ep *ep,
2135 struct rpcrdma_rep *rep)
2136{
2137 struct ib_recv_wr recv_wr, *recv_wr_fail;
2138 int rc;
2139
2140 recv_wr.next = NULL;
2141 recv_wr.wr_id = (u64) (unsigned long) rep;
2142 recv_wr.sg_list = &rep->rr_iov;
2143 recv_wr.num_sge = 1;
2144
2145 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2146 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2147
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002148 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2149
2150 if (rc)
2151 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2152 rc);
2153 return rc;
2154}
Chuck Lever43e95982014-07-29 17:23:34 -04002155
2156/* Physical mapping means one Read/Write list entry per-page.
2157 * All list entries must fit within an inline buffer
2158 *
2159 * NB: The server must return a Write list for NFS READ,
2160 * which has the same constraint. Factor in the inline
2161 * rsize as well.
2162 */
2163static size_t
2164rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2165{
2166 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2167 unsigned int inline_size, pages;
2168
2169 inline_size = min_t(unsigned int,
2170 cdata->inline_wsize, cdata->inline_rsize);
2171 inline_size -= RPCRDMA_HDRLEN_MIN;
2172 pages = inline_size / sizeof(struct rpcrdma_segment);
2173 return pages << PAGE_SHIFT;
2174}
2175
2176static size_t
2177rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2178{
2179 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2180}
2181
2182size_t
2183rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2184{
2185 size_t result;
2186
2187 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2188 case RPCRDMA_ALLPHYSICAL:
2189 result = rpcrdma_physical_max_payload(r_xprt);
2190 break;
2191 default:
2192 result = rpcrdma_mr_max_payload(r_xprt);
2193 }
2194 return result;
2195}