blob: 24ea6dd184e43af4ff7d05c148cc4eee66659440 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever65866f82014-05-28 10:33:59 -040053#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040054
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040055#include "xprt_rdma.h"
56
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040057/*
58 * Globals/Macros
59 */
60
Jeff Laytonf895b252014-11-17 16:58:04 -050061#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040062# define RPCDBG_FACILITY RPCDBG_TRANS
63#endif
64
Chuck Lever9f9d8022014-07-29 17:24:45 -040065static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050066static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040067
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040068/*
69 * internal functions
70 */
71
72/*
73 * handle replies in tasklet context, using a single, global list
74 * rdma tasklet function -- just turn around and call the func
75 * for all replies on the list
76 */
77
78static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
79static LIST_HEAD(rpcrdma_tasklets_g);
80
81static void
82rpcrdma_run_tasklet(unsigned long data)
83{
84 struct rpcrdma_rep *rep;
85 void (*func)(struct rpcrdma_rep *);
86 unsigned long flags;
87
88 data = data;
89 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
90 while (!list_empty(&rpcrdma_tasklets_g)) {
91 rep = list_entry(rpcrdma_tasklets_g.next,
92 struct rpcrdma_rep, rr_list);
93 list_del(&rep->rr_list);
94 func = rep->rr_func;
95 rep->rr_func = NULL;
96 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
97
98 if (func)
99 func(rep);
100 else
101 rpcrdma_recv_buffer_put(rep);
102
103 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
104 }
105 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
106}
107
108static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
109
Chuck Lever7ff11de2014-11-08 20:15:01 -0500110static const char * const async_event[] = {
111 "CQ error",
112 "QP fatal error",
113 "QP request error",
114 "QP access error",
115 "communication established",
116 "send queue drained",
117 "path migration successful",
118 "path mig error",
119 "device fatal error",
120 "port active",
121 "port error",
122 "LID change",
123 "P_key change",
124 "SM change",
125 "SRQ error",
126 "SRQ limit reached",
127 "last WQE reached",
128 "client reregister",
129 "GID change",
130};
131
132#define ASYNC_MSG(status) \
133 ((status) < ARRAY_SIZE(async_event) ? \
134 async_event[(status)] : "unknown async error")
135
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400136static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500137rpcrdma_schedule_tasklet(struct list_head *sched_list)
138{
139 unsigned long flags;
140
141 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
142 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
143 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
144 tasklet_schedule(&rpcrdma_tasklet_g);
145}
146
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147static void
148rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
149{
150 struct rpcrdma_ep *ep = context;
151
Chuck Lever7ff11de2014-11-08 20:15:01 -0500152 pr_err("RPC: %s: %s on device %s ep %p\n",
153 __func__, ASYNC_MSG(event->event),
154 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400155 if (ep->rep_connected == 1) {
156 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500157 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400158 wake_up_all(&ep->rep_connect_wait);
159 }
160}
161
162static void
163rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
164{
165 struct rpcrdma_ep *ep = context;
166
Chuck Lever7ff11de2014-11-08 20:15:01 -0500167 pr_err("RPC: %s: %s on device %s ep %p\n",
168 __func__, ASYNC_MSG(event->event),
169 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400170 if (ep->rep_connected == 1) {
171 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500172 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400173 wake_up_all(&ep->rep_connect_wait);
174 }
175}
176
Chuck Lever85024272015-01-21 11:02:04 -0500177static const char * const wc_status[] = {
178 "success",
179 "local length error",
180 "local QP operation error",
181 "local EE context operation error",
182 "local protection error",
183 "WR flushed",
184 "memory management operation error",
185 "bad response error",
186 "local access error",
187 "remote invalid request error",
188 "remote access error",
189 "remote operation error",
190 "transport retry counter exceeded",
191 "RNR retrycounter exceeded",
192 "local RDD violation error",
193 "remove invalid RD request",
194 "operation aborted",
195 "invalid EE context number",
196 "invalid EE context state",
197 "fatal error",
198 "response timeout error",
199 "general error",
200};
201
202#define COMPLETION_MSG(status) \
203 ((status) < ARRAY_SIZE(wc_status) ? \
204 wc_status[(status)] : "unexpected completion error")
205
Chuck Leverfc664482014-05-28 10:33:25 -0400206static void
207rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400208{
Chuck Lever85024272015-01-21 11:02:04 -0500209 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400210 return;
Chuck Lever85024272015-01-21 11:02:04 -0500211
212 /* WARNING: Only wr_id and status are reliable at this point */
213 if (wc->wr_id == 0ULL) {
214 if (wc->status != IB_WC_WR_FLUSH_ERR)
215 pr_err("RPC: %s: SEND: %s\n",
216 __func__, COMPLETION_MSG(wc->status));
217 } else {
218 struct rpcrdma_mw *r;
219
220 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
221 r->r.frmr.fr_state = FRMR_IS_STALE;
222 pr_err("RPC: %s: frmr %p (stale): %s\n",
223 __func__, r, COMPLETION_MSG(wc->status));
224 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400225}
226
Chuck Leverfc664482014-05-28 10:33:25 -0400227static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400228rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400229{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400230 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400231 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400232
Chuck Lever8301a2c2014-05-28 10:33:51 -0400233 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400234 do {
235 wcs = ep->rep_send_wcs;
236
237 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
238 if (rc <= 0)
239 return rc;
240
241 count = rc;
242 while (count-- > 0)
243 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400244 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400245 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400246}
247
248/*
Chuck Leverfc664482014-05-28 10:33:25 -0400249 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400250 *
Chuck Leverfc664482014-05-28 10:33:25 -0400251 * Send events are typically suppressed and thus do not result
252 * in an upcall. Occasionally one is signaled, however. This
253 * prevents the provider's completion queue from wrapping and
254 * losing a completion.
255 */
256static void
257rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
258{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400259 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400260 int rc;
261
Chuck Lever1c00dd02014-05-28 10:33:42 -0400262 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400263 if (rc) {
264 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
265 __func__, rc);
266 return;
267 }
268
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400269 rc = ib_req_notify_cq(cq,
270 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
271 if (rc == 0)
272 return;
273 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400274 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
275 __func__, rc);
276 return;
277 }
278
Chuck Lever1c00dd02014-05-28 10:33:42 -0400279 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400280}
281
282static void
Chuck Leverbb961932014-07-29 17:25:46 -0400283rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400284{
285 struct rpcrdma_rep *rep =
286 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
287
Chuck Lever85024272015-01-21 11:02:04 -0500288 /* WARNING: Only wr_id and status are reliable at this point */
289 if (wc->status != IB_WC_SUCCESS)
290 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400291
Chuck Lever85024272015-01-21 11:02:04 -0500292 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400293 if (wc->opcode != IB_WC_RECV)
294 return;
295
Chuck Lever85024272015-01-21 11:02:04 -0500296 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
297 __func__, rep, wc->byte_len);
298
Chuck Leverfc664482014-05-28 10:33:25 -0400299 rep->rr_len = wc->byte_len;
300 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
301 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
Chuck Levereba8ff62015-01-21 11:03:02 -0500302 prefetch(rep->rr_base);
Chuck Leverfc664482014-05-28 10:33:25 -0400303
304out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400305 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500306 return;
307out_fail:
308 if (wc->status != IB_WC_WR_FLUSH_ERR)
309 pr_err("RPC: %s: rep %p: %s\n",
310 __func__, rep, COMPLETION_MSG(wc->status));
311 rep->rr_len = ~0U;
312 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400313}
314
315static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400316rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400317{
Chuck Leverbb961932014-07-29 17:25:46 -0400318 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400319 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400320 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400321
Chuck Leverbb961932014-07-29 17:25:46 -0400322 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400323 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400324 do {
325 wcs = ep->rep_recv_wcs;
326
327 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
328 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400329 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400330
331 count = rc;
332 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400333 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400334 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400335 rc = 0;
336
337out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500338 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400339 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400340}
341
342/*
343 * Handle receive completions.
344 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400345 * It is reentrant but processes single events in order to maintain
346 * ordering of receives to keep server credits.
347 *
348 * It is the responsibility of the scheduled tasklet to return
349 * recv buffers to the pool. NOTE: this affects synchronization of
350 * connection shutdown. That is, the structures required for
351 * the completion of the reply handler must remain intact until
352 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 */
354static void
Chuck Leverfc664482014-05-28 10:33:25 -0400355rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400356{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400357 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400358 int rc;
359
Chuck Lever1c00dd02014-05-28 10:33:42 -0400360 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400361 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400362 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400363 __func__, rc);
364 return;
365 }
366
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400367 rc = ib_req_notify_cq(cq,
368 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
369 if (rc == 0)
370 return;
371 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400372 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
373 __func__, rc);
374 return;
375 }
376
Chuck Lever1c00dd02014-05-28 10:33:42 -0400377 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400378}
379
Chuck Levera7bc2112014-07-29 17:23:52 -0400380static void
381rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
382{
Chuck Lever5c166be2014-11-08 20:14:45 -0500383 struct ib_wc wc;
384 LIST_HEAD(sched_list);
385
386 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
387 rpcrdma_recvcq_process_wc(&wc, &sched_list);
388 if (!list_empty(&sched_list))
389 rpcrdma_schedule_tasklet(&sched_list);
390 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
391 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400392}
393
Jeff Laytonf895b252014-11-17 16:58:04 -0500394#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400395static const char * const conn[] = {
396 "address resolved",
397 "address error",
398 "route resolved",
399 "route error",
400 "connect request",
401 "connect response",
402 "connect error",
403 "unreachable",
404 "rejected",
405 "established",
406 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400407 "device removal",
408 "multicast join",
409 "multicast error",
410 "address change",
411 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400412};
Chuck Lever8079fb72014-07-29 17:26:12 -0400413
414#define CONNECTION_MSG(status) \
415 ((status) < ARRAY_SIZE(conn) ? \
416 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400417#endif
418
419static int
420rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
421{
422 struct rpcrdma_xprt *xprt = id->context;
423 struct rpcrdma_ia *ia = &xprt->rx_ia;
424 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500425#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400426 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800427#endif
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500428 struct ib_qp_attr *attr = &ia->ri_qp_attr;
429 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400430 int connstate = 0;
431
432 switch (event->event) {
433 case RDMA_CM_EVENT_ADDR_RESOLVED:
434 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400435 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400436 complete(&ia->ri_done);
437 break;
438 case RDMA_CM_EVENT_ADDR_ERROR:
439 ia->ri_async_rc = -EHOSTUNREACH;
440 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
441 __func__, ep);
442 complete(&ia->ri_done);
443 break;
444 case RDMA_CM_EVENT_ROUTE_ERROR:
445 ia->ri_async_rc = -ENETUNREACH;
446 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
447 __func__, ep);
448 complete(&ia->ri_done);
449 break;
450 case RDMA_CM_EVENT_ESTABLISHED:
451 connstate = 1;
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500452 ib_query_qp(ia->ri_id->qp, attr,
453 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
454 iattr);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400455 dprintk("RPC: %s: %d responder resources"
456 " (%d initiator)\n",
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500457 __func__, attr->max_dest_rd_atomic,
458 attr->max_rd_atomic);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400459 goto connected;
460 case RDMA_CM_EVENT_CONNECT_ERROR:
461 connstate = -ENOTCONN;
462 goto connected;
463 case RDMA_CM_EVENT_UNREACHABLE:
464 connstate = -ENETDOWN;
465 goto connected;
466 case RDMA_CM_EVENT_REJECTED:
467 connstate = -ECONNREFUSED;
468 goto connected;
469 case RDMA_CM_EVENT_DISCONNECTED:
470 connstate = -ECONNABORTED;
471 goto connected;
472 case RDMA_CM_EVENT_DEVICE_REMOVAL:
473 connstate = -ENODEV;
474connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400475 dprintk("RPC: %s: %sconnected\n",
476 __func__, connstate > 0 ? "" : "dis");
477 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500478 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400479 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400480 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400481 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400482 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
483 __func__, &addr->sin_addr.s_addr,
484 ntohs(addr->sin_port), ep,
485 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400486 break;
487 }
488
Jeff Laytonf895b252014-11-17 16:58:04 -0500489#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400490 if (connstate == 1) {
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500491 int ird = attr->max_dest_rd_atomic;
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400492 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700493 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400494 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700495 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400496 ntohs(addr->sin_port),
497 ia->ri_id->device->name,
498 ia->ri_memreg_strategy,
499 xprt->rx_buf.rb_max_requests,
500 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
501 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700502 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
503 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400504 ntohs(addr->sin_port),
505 connstate);
506 }
507#endif
508
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400509 return 0;
510}
511
512static struct rdma_cm_id *
513rpcrdma_create_id(struct rpcrdma_xprt *xprt,
514 struct rpcrdma_ia *ia, struct sockaddr *addr)
515{
516 struct rdma_cm_id *id;
517 int rc;
518
Tom Talpey1a954052008-10-09 15:01:31 -0400519 init_completion(&ia->ri_done);
520
Sean Heftyb26f9b92010-04-01 17:08:41 +0000521 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400522 if (IS_ERR(id)) {
523 rc = PTR_ERR(id);
524 dprintk("RPC: %s: rdma_create_id() failed %i\n",
525 __func__, rc);
526 return id;
527 }
528
Tom Talpey5675add2008-10-09 15:01:41 -0400529 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400530 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
531 if (rc) {
532 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
533 __func__, rc);
534 goto out;
535 }
Tom Talpey5675add2008-10-09 15:01:41 -0400536 wait_for_completion_interruptible_timeout(&ia->ri_done,
537 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400538 rc = ia->ri_async_rc;
539 if (rc)
540 goto out;
541
Tom Talpey5675add2008-10-09 15:01:41 -0400542 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400543 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
544 if (rc) {
545 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
546 __func__, rc);
547 goto out;
548 }
Tom Talpey5675add2008-10-09 15:01:41 -0400549 wait_for_completion_interruptible_timeout(&ia->ri_done,
550 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400551 rc = ia->ri_async_rc;
552 if (rc)
553 goto out;
554
555 return id;
556
557out:
558 rdma_destroy_id(id);
559 return ERR_PTR(rc);
560}
561
562/*
563 * Drain any cq, prior to teardown.
564 */
565static void
566rpcrdma_clean_cq(struct ib_cq *cq)
567{
568 struct ib_wc wc;
569 int count = 0;
570
571 while (1 == ib_poll_cq(cq, 1, &wc))
572 ++count;
573
574 if (count)
575 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
576 __func__, count, wc.opcode);
577}
578
579/*
580 * Exported functions.
581 */
582
583/*
584 * Open and initialize an Interface Adapter.
585 * o initializes fields of struct rpcrdma_ia, including
586 * interface and provider attributes and protection zone.
587 */
588int
589rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
590{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400591 int rc, mem_priv;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400592 struct rpcrdma_ia *ia = &xprt->rx_ia;
Chuck Lever7bc79722015-01-21 11:03:27 -0500593 struct ib_device_attr *devattr = &ia->ri_devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400594
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400595 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
596 if (IS_ERR(ia->ri_id)) {
597 rc = PTR_ERR(ia->ri_id);
598 goto out1;
599 }
600
601 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
602 if (IS_ERR(ia->ri_pd)) {
603 rc = PTR_ERR(ia->ri_pd);
604 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
605 __func__, rc);
606 goto out2;
607 }
608
Chuck Lever7bc79722015-01-21 11:03:27 -0500609 rc = ib_query_device(ia->ri_id->device, devattr);
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400610 if (rc) {
611 dprintk("RPC: %s: ib_query_device failed %d\n",
612 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500613 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400614 }
615
Chuck Lever7bc79722015-01-21 11:03:27 -0500616 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400617 ia->ri_have_dma_lkey = 1;
618 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
619 }
620
Chuck Leverf10eafd2014-05-28 10:32:51 -0400621 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400622 /* Requires both frmr reg and local dma lkey */
Chuck Lever7bc79722015-01-21 11:03:27 -0500623 if ((devattr->device_cap_flags &
Tom Talpey3197d3092008-10-09 15:00:20 -0400624 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
625 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400626 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400627 "not supported by HCA\n", __func__);
628 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400629 } else {
630 /* Mind the ia limit on FRMR page list depth */
631 ia->ri_max_frmr_depth = min_t(unsigned int,
632 RPCRDMA_MAX_DATA_SEGS,
Chuck Lever7bc79722015-01-21 11:03:27 -0500633 devattr->max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400634 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400635 }
636 if (memreg == RPCRDMA_MTHCAFMR) {
637 if (!ia->ri_id->device->alloc_fmr) {
638 dprintk("RPC: %s: MTHCAFMR registration "
639 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400640 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400641 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400642 }
643
644 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400645 * Optionally obtain an underlying physical identity mapping in
646 * order to do a memory window-based bind. This base registration
647 * is protected from remote access - that is enabled only by binding
648 * for the specific bytes targeted during each RPC operation, and
649 * revoked after the corresponding completion similar to a storage
650 * adapter.
651 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400652 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400653 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400654 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400655 case RPCRDMA_ALLPHYSICAL:
656 mem_priv = IB_ACCESS_LOCAL_WRITE |
657 IB_ACCESS_REMOTE_WRITE |
658 IB_ACCESS_REMOTE_READ;
659 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400660 case RPCRDMA_MTHCAFMR:
661 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400662 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400663 mem_priv = IB_ACCESS_LOCAL_WRITE;
664 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400665 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
666 if (IS_ERR(ia->ri_bind_mem)) {
667 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400668 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400669 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400670 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500671 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400672 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400673 break;
674 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400675 printk(KERN_ERR "RPC: Unsupported memory "
676 "registration mode: %d\n", memreg);
677 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500678 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400679 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400680 dprintk("RPC: %s: memory registration strategy is %d\n",
681 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400682
683 /* Else will do memory reg/dereg for each chunk */
684 ia->ri_memreg_strategy = memreg;
685
Chuck Lever73806c82014-07-29 17:23:25 -0400686 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400687 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500688
689out3:
690 ib_dealloc_pd(ia->ri_pd);
691 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400692out2:
693 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400694 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400695out1:
696 return rc;
697}
698
699/*
700 * Clean up/close an IA.
701 * o if event handles and PD have been initialized, free them.
702 * o close the IA
703 */
704void
705rpcrdma_ia_close(struct rpcrdma_ia *ia)
706{
707 int rc;
708
709 dprintk("RPC: %s: entering\n", __func__);
710 if (ia->ri_bind_mem != NULL) {
711 rc = ib_dereg_mr(ia->ri_bind_mem);
712 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
713 __func__, rc);
714 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400715 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
716 if (ia->ri_id->qp)
717 rdma_destroy_qp(ia->ri_id);
718 rdma_destroy_id(ia->ri_id);
719 ia->ri_id = NULL;
720 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400721 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
722 rc = ib_dealloc_pd(ia->ri_pd);
723 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
724 __func__, rc);
725 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400726}
727
728/*
729 * Create unconnected endpoint.
730 */
731int
732rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
733 struct rpcrdma_create_data_internal *cdata)
734{
Chuck Lever7bc79722015-01-21 11:03:27 -0500735 struct ib_device_attr *devattr = &ia->ri_devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400736 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400737 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400738
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400739 /* check provider's send/recv wr limits */
Chuck Lever7bc79722015-01-21 11:03:27 -0500740 if (cdata->max_requests > devattr->max_qp_wr)
741 cdata->max_requests = devattr->max_qp_wr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400742
743 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
744 ep->rep_attr.qp_context = ep;
745 /* send_cq and recv_cq initialized below */
746 ep->rep_attr.srq = NULL;
747 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
748 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400749 case RPCRDMA_FRMR: {
750 int depth = 7;
751
Tom Tucker15cdc6442010-08-11 12:47:24 -0400752 /* Add room for frmr register and invalidate WRs.
753 * 1. FRMR reg WR for head
754 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400755 * 3. N FRMR reg WRs for pagelist
756 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400757 * 5. FRMR reg WR for tail
758 * 6. FRMR invalidate WR for tail
759 * 7. The RDMA_SEND WR
760 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400761
762 /* Calculate N if the device max FRMR depth is smaller than
763 * RPCRDMA_MAX_DATA_SEGS.
764 */
765 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
766 int delta = RPCRDMA_MAX_DATA_SEGS -
767 ia->ri_max_frmr_depth;
768
769 do {
770 depth += 2; /* FRMR reg + invalidate */
771 delta -= ia->ri_max_frmr_depth;
772 } while (delta > 0);
773
774 }
775 ep->rep_attr.cap.max_send_wr *= depth;
Chuck Lever7bc79722015-01-21 11:03:27 -0500776 if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
777 cdata->max_requests = devattr->max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400778 if (!cdata->max_requests)
779 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400780 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
781 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400782 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400783 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400784 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400785 default:
786 break;
787 }
788 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
789 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
790 ep->rep_attr.cap.max_recv_sge = 1;
791 ep->rep_attr.cap.max_inline_data = 0;
792 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
793 ep->rep_attr.qp_type = IB_QPT_RC;
794 ep->rep_attr.port_num = ~0;
795
796 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
797 "iovs: send %d recv %d\n",
798 __func__,
799 ep->rep_attr.cap.max_send_wr,
800 ep->rep_attr.cap.max_recv_wr,
801 ep->rep_attr.cap.max_send_sge,
802 ep->rep_attr.cap.max_recv_sge);
803
804 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400805 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500806 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
807 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
808 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400809 ep->rep_cqinit = 0;
810 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400811 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400812 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400813
Chuck Leverfc664482014-05-28 10:33:25 -0400814 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400815 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400816 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400817 if (IS_ERR(sendcq)) {
818 rc = PTR_ERR(sendcq);
819 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400820 __func__, rc);
821 goto out1;
822 }
823
Chuck Leverfc664482014-05-28 10:33:25 -0400824 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400825 if (rc) {
826 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
827 __func__, rc);
828 goto out2;
829 }
830
Chuck Leverfc664482014-05-28 10:33:25 -0400831 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400832 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400833 ep->rep_attr.cap.max_recv_wr + 1, 0);
834 if (IS_ERR(recvcq)) {
835 rc = PTR_ERR(recvcq);
836 dprintk("RPC: %s: failed to create recv CQ: %i\n",
837 __func__, rc);
838 goto out2;
839 }
840
841 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
842 if (rc) {
843 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
844 __func__, rc);
845 ib_destroy_cq(recvcq);
846 goto out2;
847 }
848
849 ep->rep_attr.send_cq = sendcq;
850 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400851
852 /* Initialize cma parameters */
853
854 /* RPC/RDMA does not use private data */
855 ep->rep_remote_cma.private_data = NULL;
856 ep->rep_remote_cma.private_data_len = 0;
857
858 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400859 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever7bc79722015-01-21 11:03:27 -0500860 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400861 ep->rep_remote_cma.responder_resources = 32;
862 else
Chuck Lever7bc79722015-01-21 11:03:27 -0500863 ep->rep_remote_cma.responder_resources =
864 devattr->max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400865
866 ep->rep_remote_cma.retry_count = 7;
867 ep->rep_remote_cma.flow_control = 0;
868 ep->rep_remote_cma.rnr_retry_count = 0;
869
870 return 0;
871
872out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400873 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400874 if (err)
875 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
876 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400877out1:
878 return rc;
879}
880
881/*
882 * rpcrdma_ep_destroy
883 *
884 * Disconnect and destroy endpoint. After this, the only
885 * valid operations on the ep are to free it (if dynamically
886 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400887 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400888void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400889rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
890{
891 int rc;
892
893 dprintk("RPC: %s: entering, connected is %d\n",
894 __func__, ep->rep_connected);
895
Chuck Lever254f91e2014-05-28 10:32:17 -0400896 cancel_delayed_work_sync(&ep->rep_connect_worker);
897
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400898 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400899 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400900 rdma_destroy_qp(ia->ri_id);
901 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400902 }
903
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400904 /* padding - could be done in rpcrdma_buffer_destroy... */
905 if (ep->rep_pad_mr) {
906 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
907 ep->rep_pad_mr = NULL;
908 }
909
Chuck Leverfc664482014-05-28 10:33:25 -0400910 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
911 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
912 if (rc)
913 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
914 __func__, rc);
915
916 rpcrdma_clean_cq(ep->rep_attr.send_cq);
917 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400918 if (rc)
919 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
920 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400921}
922
923/*
924 * Connect unconnected endpoint.
925 */
926int
927rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
928{
Chuck Lever73806c82014-07-29 17:23:25 -0400929 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400930 int rc = 0;
931 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400932
Tom Talpeyc0555512008-10-10 11:32:45 -0400933 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400934 struct rpcrdma_xprt *xprt;
935retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400936 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400937
938 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400939 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400940
Chuck Lever467c9672014-11-08 20:14:29 -0500941 switch (ia->ri_memreg_strategy) {
942 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400943 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500944 break;
945 case RPCRDMA_MTHCAFMR:
946 rpcrdma_reset_fmrs(ia);
947 break;
948 case RPCRDMA_ALLPHYSICAL:
949 break;
950 default:
951 rc = -EIO;
952 goto out;
953 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400954
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400955 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
956 id = rpcrdma_create_id(xprt, ia,
957 (struct sockaddr *)&xprt->rx_data.addr);
958 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400959 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400960 goto out;
961 }
962 /* TEMP TEMP TEMP - fail if new device:
963 * Deregister/remarshal *all* requests!
964 * Close and recreate adapter, pd, etc!
965 * Re-determine all attributes still sane!
966 * More stuff I haven't thought of!
967 * Rrrgh!
968 */
969 if (ia->ri_id->device != id->device) {
970 printk("RPC: %s: can't reconnect on "
971 "different device!\n", __func__);
972 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400973 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400974 goto out;
975 }
976 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400977 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
978 if (rc) {
979 dprintk("RPC: %s: rdma_create_qp failed %i\n",
980 __func__, rc);
981 rdma_destroy_id(id);
982 rc = -ENETUNREACH;
983 goto out;
984 }
Chuck Lever73806c82014-07-29 17:23:25 -0400985
986 write_lock(&ia->ri_qplock);
987 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400988 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400989 write_unlock(&ia->ri_qplock);
990
991 rdma_destroy_qp(old);
992 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400993 } else {
994 dprintk("RPC: %s: connecting...\n", __func__);
995 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
996 if (rc) {
997 dprintk("RPC: %s: rdma_create_qp failed %i\n",
998 __func__, rc);
999 /* do not update ep->rep_connected */
1000 return -ENETUNREACH;
1001 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001002 }
1003
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001004 ep->rep_connected = 0;
1005
1006 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
1007 if (rc) {
1008 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1009 __func__, rc);
1010 goto out;
1011 }
1012
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001013 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1014
1015 /*
1016 * Check state. A non-peer reject indicates no listener
1017 * (ECONNREFUSED), which may be a transient state. All
1018 * others indicate a transport condition which has already
1019 * undergone a best-effort.
1020 */
Joe Perchesf64f9e72009-11-29 16:55:45 -08001021 if (ep->rep_connected == -ECONNREFUSED &&
1022 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001023 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1024 goto retry;
1025 }
1026 if (ep->rep_connected <= 0) {
1027 /* Sometimes, the only way to reliably connect to remote
1028 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001029 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1030 (ep->rep_remote_cma.responder_resources == 0 ||
1031 ep->rep_remote_cma.initiator_depth !=
1032 ep->rep_remote_cma.responder_resources)) {
1033 if (ep->rep_remote_cma.responder_resources == 0)
1034 ep->rep_remote_cma.responder_resources = 1;
1035 ep->rep_remote_cma.initiator_depth =
1036 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001037 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001038 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001039 rc = ep->rep_connected;
1040 } else {
1041 dprintk("RPC: %s: connected\n", __func__);
1042 }
1043
1044out:
1045 if (rc)
1046 ep->rep_connected = rc;
1047 return rc;
1048}
1049
1050/*
1051 * rpcrdma_ep_disconnect
1052 *
1053 * This is separate from destroy to facilitate the ability
1054 * to reconnect without recreating the endpoint.
1055 *
1056 * This call is not reentrant, and must not be made in parallel
1057 * on the same endpoint.
1058 */
Chuck Lever282191c2014-07-29 17:25:55 -04001059void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001060rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1061{
1062 int rc;
1063
Chuck Levera7bc2112014-07-29 17:23:52 -04001064 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001065 rc = rdma_disconnect(ia->ri_id);
1066 if (!rc) {
1067 /* returns without wait if not connected */
1068 wait_event_interruptible(ep->rep_connect_wait,
1069 ep->rep_connected != 1);
1070 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1071 (ep->rep_connected == 1) ? "still " : "dis");
1072 } else {
1073 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1074 ep->rep_connected = rc;
1075 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001076}
1077
Chuck Lever13924022015-01-21 11:03:52 -05001078static struct rpcrdma_req *
1079rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1080{
1081 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1082 size_t wlen = 1 << fls(cdata->inline_wsize +
1083 sizeof(struct rpcrdma_req));
1084 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1085 struct rpcrdma_req *req;
1086 int rc;
1087
1088 rc = -ENOMEM;
1089 req = kmalloc(wlen, GFP_KERNEL);
1090 if (req == NULL)
1091 goto out;
1092 memset(req, 0, sizeof(struct rpcrdma_req));
1093
1094 rc = rpcrdma_register_internal(ia, req->rl_base, wlen -
1095 offsetof(struct rpcrdma_req, rl_base),
1096 &req->rl_handle, &req->rl_iov);
1097 if (rc)
1098 goto out_free;
1099
1100 req->rl_size = wlen - sizeof(struct rpcrdma_req);
1101 req->rl_buffer = &r_xprt->rx_buf;
1102 return req;
1103
1104out_free:
1105 kfree(req);
1106out:
1107 return ERR_PTR(rc);
1108}
1109
1110static struct rpcrdma_rep *
1111rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1112{
1113 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
1114 size_t rlen = 1 << fls(cdata->inline_rsize +
1115 sizeof(struct rpcrdma_rep));
1116 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1117 struct rpcrdma_rep *rep;
1118 int rc;
1119
1120 rc = -ENOMEM;
1121 rep = kmalloc(rlen, GFP_KERNEL);
1122 if (rep == NULL)
1123 goto out;
1124 memset(rep, 0, sizeof(struct rpcrdma_rep));
1125
1126 rc = rpcrdma_register_internal(ia, rep->rr_base, rlen -
1127 offsetof(struct rpcrdma_rep, rr_base),
1128 &rep->rr_handle, &rep->rr_iov);
1129 if (rc)
1130 goto out_free;
1131
1132 rep->rr_buffer = &r_xprt->rx_buf;
1133 return rep;
1134
1135out_free:
1136 kfree(rep);
1137out:
1138 return ERR_PTR(rc);
1139}
1140
Chuck Lever2e845222014-07-29 17:25:38 -04001141static int
1142rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1143{
1144 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1145 struct ib_fmr_attr fmr_attr = {
1146 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1147 .max_maps = 1,
1148 .page_shift = PAGE_SHIFT
1149 };
1150 struct rpcrdma_mw *r;
1151 int i, rc;
1152
1153 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1154 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1155
1156 while (i--) {
1157 r = kzalloc(sizeof(*r), GFP_KERNEL);
1158 if (r == NULL)
1159 return -ENOMEM;
1160
1161 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1162 if (IS_ERR(r->r.fmr)) {
1163 rc = PTR_ERR(r->r.fmr);
1164 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1165 __func__, rc);
1166 goto out_free;
1167 }
1168
1169 list_add(&r->mw_list, &buf->rb_mws);
1170 list_add(&r->mw_all, &buf->rb_all);
1171 }
1172 return 0;
1173
1174out_free:
1175 kfree(r);
1176 return rc;
1177}
1178
1179static int
1180rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1181{
1182 struct rpcrdma_frmr *f;
1183 struct rpcrdma_mw *r;
1184 int i, rc;
1185
1186 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1187 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1188
1189 while (i--) {
1190 r = kzalloc(sizeof(*r), GFP_KERNEL);
1191 if (r == NULL)
1192 return -ENOMEM;
1193 f = &r->r.frmr;
1194
1195 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1196 ia->ri_max_frmr_depth);
1197 if (IS_ERR(f->fr_mr)) {
1198 rc = PTR_ERR(f->fr_mr);
1199 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1200 "failed %i\n", __func__, rc);
1201 goto out_free;
1202 }
1203
1204 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1205 ia->ri_max_frmr_depth);
1206 if (IS_ERR(f->fr_pgl)) {
1207 rc = PTR_ERR(f->fr_pgl);
1208 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1209 "failed %i\n", __func__, rc);
1210
1211 ib_dereg_mr(f->fr_mr);
1212 goto out_free;
1213 }
1214
1215 list_add(&r->mw_list, &buf->rb_mws);
1216 list_add(&r->mw_all, &buf->rb_all);
1217 }
1218
1219 return 0;
1220
1221out_free:
1222 kfree(r);
1223 return rc;
1224}
1225
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001226int
Chuck Leverac920d02015-01-21 11:03:44 -05001227rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001228{
Chuck Leverac920d02015-01-21 11:03:44 -05001229 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1230 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1231 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001232 char *p;
Chuck Lever13924022015-01-21 11:03:52 -05001233 size_t len;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001234 int i, rc;
1235
1236 buf->rb_max_requests = cdata->max_requests;
1237 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001238
1239 /* Need to allocate:
1240 * 1. arrays for send and recv pointers
1241 * 2. arrays of struct rpcrdma_req to fill in pointers
1242 * 3. array of struct rpcrdma_rep for replies
1243 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001244 * Send/recv buffers in req/rep need to be registered
1245 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001246 len = buf->rb_max_requests *
1247 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1248 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001249
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001250 p = kzalloc(len, GFP_KERNEL);
1251 if (p == NULL) {
1252 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1253 __func__, len);
1254 rc = -ENOMEM;
1255 goto out;
1256 }
1257 buf->rb_pool = p; /* for freeing it later */
1258
1259 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1260 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1261 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1262 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1263
1264 /*
1265 * Register the zeroed pad buffer, if any.
1266 */
1267 if (cdata->padding) {
Chuck Leverac920d02015-01-21 11:03:44 -05001268 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001269 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1270 &ep->rep_pad_mr, &ep->rep_pad);
1271 if (rc)
1272 goto out;
1273 }
1274 p += cdata->padding;
1275
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001276 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001277 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001278 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001279 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001280 rc = rpcrdma_init_frmrs(ia, buf);
1281 if (rc)
1282 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001283 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001284 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001285 rc = rpcrdma_init_fmrs(ia, buf);
1286 if (rc)
1287 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001288 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001289 default:
1290 break;
1291 }
1292
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001293 for (i = 0; i < buf->rb_max_requests; i++) {
1294 struct rpcrdma_req *req;
1295 struct rpcrdma_rep *rep;
1296
Chuck Lever13924022015-01-21 11:03:52 -05001297 req = rpcrdma_create_req(r_xprt);
1298 if (IS_ERR(req)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001299 dprintk("RPC: %s: request buffer %d alloc"
1300 " failed\n", __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001301 rc = PTR_ERR(req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001302 goto out;
1303 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001304 buf->rb_send_bufs[i] = req;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001305
Chuck Lever13924022015-01-21 11:03:52 -05001306 rep = rpcrdma_create_rep(r_xprt);
1307 if (IS_ERR(rep)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001308 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1309 __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001310 rc = PTR_ERR(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001311 goto out;
1312 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001313 buf->rb_recv_bufs[i] = rep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001314 }
Chuck Lever13924022015-01-21 11:03:52 -05001315
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001316 return 0;
1317out:
1318 rpcrdma_buffer_destroy(buf);
1319 return rc;
1320}
1321
Chuck Lever2e845222014-07-29 17:25:38 -04001322static void
Chuck Lever13924022015-01-21 11:03:52 -05001323rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1324{
1325 if (!rep)
1326 return;
1327
1328 rpcrdma_deregister_internal(ia, rep->rr_handle, &rep->rr_iov);
1329 kfree(rep);
1330}
1331
1332static void
1333rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1334{
1335 if (!req)
1336 return;
1337
1338 rpcrdma_deregister_internal(ia, req->rl_handle, &req->rl_iov);
1339 kfree(req);
1340}
1341
1342static void
Chuck Lever2e845222014-07-29 17:25:38 -04001343rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1344{
1345 struct rpcrdma_mw *r;
1346 int rc;
1347
1348 while (!list_empty(&buf->rb_all)) {
1349 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1350 list_del(&r->mw_all);
1351 list_del(&r->mw_list);
1352
1353 rc = ib_dealloc_fmr(r->r.fmr);
1354 if (rc)
1355 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1356 __func__, rc);
1357
1358 kfree(r);
1359 }
1360}
1361
1362static void
1363rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1364{
1365 struct rpcrdma_mw *r;
1366 int rc;
1367
1368 while (!list_empty(&buf->rb_all)) {
1369 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1370 list_del(&r->mw_all);
1371 list_del(&r->mw_list);
1372
1373 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1374 if (rc)
1375 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1376 __func__, rc);
1377 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1378
1379 kfree(r);
1380 }
1381}
1382
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001383void
1384rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1385{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001386 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001387 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001388
1389 /* clean up in reverse order from create
1390 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001391 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001392 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001393 */
1394 dprintk("RPC: %s: entering\n", __func__);
1395
1396 for (i = 0; i < buf->rb_max_requests; i++) {
Chuck Lever13924022015-01-21 11:03:52 -05001397 if (buf->rb_recv_bufs)
1398 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1399 if (buf->rb_send_bufs)
1400 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001401 }
1402
Chuck Lever2e845222014-07-29 17:25:38 -04001403 switch (ia->ri_memreg_strategy) {
1404 case RPCRDMA_FRMR:
1405 rpcrdma_destroy_frmrs(buf);
1406 break;
1407 case RPCRDMA_MTHCAFMR:
1408 rpcrdma_destroy_fmrs(buf);
1409 break;
1410 default:
1411 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001412 }
1413
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001414 kfree(buf->rb_pool);
1415}
1416
Chuck Lever467c9672014-11-08 20:14:29 -05001417/* After a disconnect, unmap all FMRs.
1418 *
1419 * This is invoked only in the transport connect worker in order
1420 * to serialize with rpcrdma_register_fmr_external().
1421 */
1422static void
1423rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1424{
1425 struct rpcrdma_xprt *r_xprt =
1426 container_of(ia, struct rpcrdma_xprt, rx_ia);
1427 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1428 struct list_head *pos;
1429 struct rpcrdma_mw *r;
1430 LIST_HEAD(l);
1431 int rc;
1432
1433 list_for_each(pos, &buf->rb_all) {
1434 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1435
1436 INIT_LIST_HEAD(&l);
1437 list_add(&r->r.fmr->list, &l);
1438 rc = ib_unmap_fmr(&l);
1439 if (rc)
1440 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1441 __func__, rc);
1442 }
1443}
1444
Chuck Lever9f9d8022014-07-29 17:24:45 -04001445/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1446 * an unusable state. Find FRMRs in this state and dereg / reg
1447 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1448 * also torn down.
1449 *
1450 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1451 *
1452 * This is invoked only in the transport connect worker in order
1453 * to serialize with rpcrdma_register_frmr_external().
1454 */
1455static void
1456rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1457{
1458 struct rpcrdma_xprt *r_xprt =
1459 container_of(ia, struct rpcrdma_xprt, rx_ia);
1460 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1461 struct list_head *pos;
1462 struct rpcrdma_mw *r;
1463 int rc;
1464
1465 list_for_each(pos, &buf->rb_all) {
1466 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1467
1468 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1469 continue;
1470
1471 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1472 if (rc)
1473 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1474 __func__, rc);
1475 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1476
1477 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1478 ia->ri_max_frmr_depth);
1479 if (IS_ERR(r->r.frmr.fr_mr)) {
1480 rc = PTR_ERR(r->r.frmr.fr_mr);
1481 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1482 " failed %i\n", __func__, rc);
1483 continue;
1484 }
1485 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1486 ia->ri_id->device,
1487 ia->ri_max_frmr_depth);
1488 if (IS_ERR(r->r.frmr.fr_pgl)) {
1489 rc = PTR_ERR(r->r.frmr.fr_pgl);
1490 dprintk("RPC: %s: "
1491 "ib_alloc_fast_reg_page_list "
1492 "failed %i\n", __func__, rc);
1493
1494 ib_dereg_mr(r->r.frmr.fr_mr);
1495 continue;
1496 }
1497 r->r.frmr.fr_state = FRMR_IS_INVALID;
1498 }
1499}
1500
Chuck Leverc2922c02014-07-29 17:24:36 -04001501/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1502 * some req segments uninitialized.
1503 */
1504static void
1505rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1506{
1507 if (*mw) {
1508 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1509 *mw = NULL;
1510 }
1511}
1512
1513/* Cycle mw's back in reverse order, and "spin" them.
1514 * This delays and scrambles reuse as much as possible.
1515 */
1516static void
1517rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1518{
1519 struct rpcrdma_mr_seg *seg = req->rl_segments;
1520 struct rpcrdma_mr_seg *seg1 = seg;
1521 int i;
1522
1523 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001524 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1525 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001526}
1527
1528static void
1529rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1530{
1531 buf->rb_send_bufs[--buf->rb_send_index] = req;
1532 req->rl_niovs = 0;
1533 if (req->rl_reply) {
1534 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1535 req->rl_reply->rr_func = NULL;
1536 req->rl_reply = NULL;
1537 }
1538}
1539
Chuck Leverddb6beb2014-07-29 17:24:54 -04001540/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1541 * Redo only the ib_post_send().
1542 */
1543static void
1544rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1545{
1546 struct rpcrdma_xprt *r_xprt =
1547 container_of(ia, struct rpcrdma_xprt, rx_ia);
1548 struct ib_send_wr invalidate_wr, *bad_wr;
1549 int rc;
1550
1551 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1552
1553 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001554 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001555
1556 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1557 invalidate_wr.wr_id = (unsigned long)(void *)r;
1558 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001559 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1560 DECR_CQCOUNT(&r_xprt->rx_ep);
1561
1562 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1563 __func__, r, r->r.frmr.fr_mr->rkey);
1564
1565 read_lock(&ia->ri_qplock);
1566 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1567 read_unlock(&ia->ri_qplock);
1568 if (rc) {
1569 /* Force rpcrdma_buffer_get() to retry */
1570 r->r.frmr.fr_state = FRMR_IS_STALE;
1571 dprintk("RPC: %s: ib_post_send failed, %i\n",
1572 __func__, rc);
1573 }
1574}
1575
1576static void
1577rpcrdma_retry_flushed_linv(struct list_head *stale,
1578 struct rpcrdma_buffer *buf)
1579{
1580 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1581 struct list_head *pos;
1582 struct rpcrdma_mw *r;
1583 unsigned long flags;
1584
1585 list_for_each(pos, stale) {
1586 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1587 rpcrdma_retry_local_inv(r, ia);
1588 }
1589
1590 spin_lock_irqsave(&buf->rb_lock, flags);
1591 list_splice_tail(stale, &buf->rb_mws);
1592 spin_unlock_irqrestore(&buf->rb_lock, flags);
1593}
1594
Chuck Leverc2922c02014-07-29 17:24:36 -04001595static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001596rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1597 struct list_head *stale)
1598{
1599 struct rpcrdma_mw *r;
1600 int i;
1601
1602 i = RPCRDMA_MAX_SEGS - 1;
1603 while (!list_empty(&buf->rb_mws)) {
1604 r = list_entry(buf->rb_mws.next,
1605 struct rpcrdma_mw, mw_list);
1606 list_del(&r->mw_list);
1607 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1608 list_add(&r->mw_list, stale);
1609 continue;
1610 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001611 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001612 if (unlikely(i-- == 0))
1613 return req; /* Success */
1614 }
1615
1616 /* Not enough entries on rb_mws for this req */
1617 rpcrdma_buffer_put_sendbuf(req, buf);
1618 rpcrdma_buffer_put_mrs(req, buf);
1619 return NULL;
1620}
1621
1622static struct rpcrdma_req *
1623rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001624{
1625 struct rpcrdma_mw *r;
1626 int i;
1627
1628 i = RPCRDMA_MAX_SEGS - 1;
1629 while (!list_empty(&buf->rb_mws)) {
1630 r = list_entry(buf->rb_mws.next,
1631 struct rpcrdma_mw, mw_list);
1632 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001633 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001634 if (unlikely(i-- == 0))
1635 return req; /* Success */
1636 }
1637
1638 /* Not enough entries on rb_mws for this req */
1639 rpcrdma_buffer_put_sendbuf(req, buf);
1640 rpcrdma_buffer_put_mrs(req, buf);
1641 return NULL;
1642}
1643
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001644/*
1645 * Get a set of request/reply buffers.
1646 *
1647 * Reply buffer (if needed) is attached to send buffer upon return.
1648 * Rule:
1649 * rb_send_index and rb_recv_index MUST always be pointing to the
1650 * *next* available buffer (non-NULL). They are incremented after
1651 * removing buffers, and decremented *before* returning them.
1652 */
1653struct rpcrdma_req *
1654rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1655{
Chuck Leverc2922c02014-07-29 17:24:36 -04001656 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001657 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001658 struct rpcrdma_req *req;
1659 unsigned long flags;
1660
1661 spin_lock_irqsave(&buffers->rb_lock, flags);
1662 if (buffers->rb_send_index == buffers->rb_max_requests) {
1663 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1664 dprintk("RPC: %s: out of request buffers\n", __func__);
1665 return ((struct rpcrdma_req *)NULL);
1666 }
1667
1668 req = buffers->rb_send_bufs[buffers->rb_send_index];
1669 if (buffers->rb_send_index < buffers->rb_recv_index) {
1670 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1671 __func__,
1672 buffers->rb_recv_index - buffers->rb_send_index);
1673 req->rl_reply = NULL;
1674 } else {
1675 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1676 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1677 }
1678 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001679
1680 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001681 switch (ia->ri_memreg_strategy) {
1682 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001683 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1684 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001685 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001686 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001687 break;
1688 default:
1689 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001690 }
1691 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001692 if (!list_empty(&stale))
1693 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001694 return req;
1695}
1696
1697/*
1698 * Put request/reply buffers back into pool.
1699 * Pre-decrement counter/array index.
1700 */
1701void
1702rpcrdma_buffer_put(struct rpcrdma_req *req)
1703{
1704 struct rpcrdma_buffer *buffers = req->rl_buffer;
1705 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001706 unsigned long flags;
1707
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001708 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001709 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001710 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001711 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001712 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001713 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001714 break;
1715 default:
1716 break;
1717 }
1718 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1719}
1720
1721/*
1722 * Recover reply buffers from pool.
1723 * This happens when recovering from error conditions.
1724 * Post-increment counter/array index.
1725 */
1726void
1727rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1728{
1729 struct rpcrdma_buffer *buffers = req->rl_buffer;
1730 unsigned long flags;
1731
1732 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1733 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1734 spin_lock_irqsave(&buffers->rb_lock, flags);
1735 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1736 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1737 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1738 }
1739 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1740}
1741
1742/*
1743 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001744 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001745 */
1746void
1747rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1748{
1749 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1750 unsigned long flags;
1751
1752 rep->rr_func = NULL;
1753 spin_lock_irqsave(&buffers->rb_lock, flags);
1754 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1755 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1756}
1757
1758/*
1759 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1760 */
1761
1762int
1763rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1764 struct ib_mr **mrp, struct ib_sge *iov)
1765{
1766 struct ib_phys_buf ipb;
1767 struct ib_mr *mr;
1768 int rc;
1769
1770 /*
1771 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1772 */
1773 iov->addr = ib_dma_map_single(ia->ri_id->device,
1774 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001775 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1776 return -ENOMEM;
1777
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001778 iov->length = len;
1779
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001780 if (ia->ri_have_dma_lkey) {
1781 *mrp = NULL;
1782 iov->lkey = ia->ri_dma_lkey;
1783 return 0;
1784 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001785 *mrp = NULL;
1786 iov->lkey = ia->ri_bind_mem->lkey;
1787 return 0;
1788 }
1789
1790 ipb.addr = iov->addr;
1791 ipb.size = iov->length;
1792 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1793 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1794
1795 dprintk("RPC: %s: phys convert: 0x%llx "
1796 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001797 __func__, (unsigned long long)ipb.addr,
1798 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001799
1800 if (IS_ERR(mr)) {
1801 *mrp = NULL;
1802 rc = PTR_ERR(mr);
1803 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1804 } else {
1805 *mrp = mr;
1806 iov->lkey = mr->lkey;
1807 rc = 0;
1808 }
1809
1810 return rc;
1811}
1812
1813int
1814rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1815 struct ib_mr *mr, struct ib_sge *iov)
1816{
1817 int rc;
1818
1819 ib_dma_unmap_single(ia->ri_id->device,
1820 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1821
1822 if (NULL == mr)
1823 return 0;
1824
1825 rc = ib_dereg_mr(mr);
1826 if (rc)
1827 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1828 return rc;
1829}
1830
1831/*
1832 * Wrappers for chunk registration, shared by read/write chunk code.
1833 */
1834
1835static void
1836rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1837{
1838 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1839 seg->mr_dmalen = seg->mr_len;
1840 if (seg->mr_page)
1841 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1842 seg->mr_page, offset_in_page(seg->mr_offset),
1843 seg->mr_dmalen, seg->mr_dir);
1844 else
1845 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1846 seg->mr_offset,
1847 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001848 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1849 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1850 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001851 (unsigned long long)seg->mr_dma,
1852 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001853 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001854}
1855
1856static void
1857rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1858{
1859 if (seg->mr_page)
1860 ib_dma_unmap_page(ia->ri_id->device,
1861 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1862 else
1863 ib_dma_unmap_single(ia->ri_id->device,
1864 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1865}
1866
Tom Talpey8d4ba032008-10-09 14:59:49 -04001867static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001868rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1869 int *nsegs, int writing, struct rpcrdma_ia *ia,
1870 struct rpcrdma_xprt *r_xprt)
1871{
1872 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever3eb35812015-01-21 11:02:54 -05001873 struct rpcrdma_mw *mw = seg1->rl_mw;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001874 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1875 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001876 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001877 u8 key;
1878 int len, pageoff;
1879 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001880 int seg_len;
1881 u64 pa;
1882 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001883
1884 pageoff = offset_in_page(seg1->mr_offset);
1885 seg1->mr_offset -= pageoff; /* start of page */
1886 seg1->mr_len += pageoff;
1887 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001888 if (*nsegs > ia->ri_max_frmr_depth)
1889 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001890 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001891 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001892 pa = seg->mr_dma;
1893 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001894 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001895 pa += PAGE_SIZE;
1896 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001897 len += seg->mr_len;
1898 ++seg;
1899 ++i;
1900 /* Check for holes */
1901 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1902 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1903 break;
1904 }
1905 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001906 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001907
Chuck Lever05055722014-07-29 17:25:12 -04001908 frmr->fr_state = FRMR_IS_VALID;
1909
Chuck Leverf590e872014-07-29 17:25:29 -04001910 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1911 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1912 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1913 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1914 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1915 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1916 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1917 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1918 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001919 rc = -EIO;
1920 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001921 }
1922
1923 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001924 key = (u8)(mr->rkey & 0x000000FF);
1925 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001926
Chuck Leverf590e872014-07-29 17:25:29 -04001927 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001928 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1929 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001930 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001931 DECR_CQCOUNT(&r_xprt->rx_ep);
1932
Chuck Leverf590e872014-07-29 17:25:29 -04001933 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001934 if (rc) {
1935 dprintk("RPC: %s: failed ib_post_send for register,"
1936 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001937 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001938 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001939 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001940 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001941 seg1->mr_base = seg1->mr_dma + pageoff;
1942 seg1->mr_nsegs = i;
1943 seg1->mr_len = len;
1944 }
1945 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001946 return 0;
1947out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001948 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001949 while (i--)
1950 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001951 return rc;
1952}
1953
1954static int
1955rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1956 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1957{
1958 struct rpcrdma_mr_seg *seg1 = seg;
1959 struct ib_send_wr invalidate_wr, *bad_wr;
1960 int rc;
1961
Chuck Lever3eb35812015-01-21 11:02:54 -05001962 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001963
Tom Talpey3197d3092008-10-09 15:00:20 -04001964 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Chuck Lever3eb35812015-01-21 11:02:54 -05001965 invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001966 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Lever3eb35812015-01-21 11:02:54 -05001967 invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001968 DECR_CQCOUNT(&r_xprt->rx_ep);
1969
Chuck Lever73806c82014-07-29 17:23:25 -04001970 read_lock(&ia->ri_qplock);
1971 while (seg1->mr_nsegs--)
1972 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001973 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001974 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001975 if (rc) {
1976 /* Force rpcrdma_buffer_get() to retry */
Chuck Lever3eb35812015-01-21 11:02:54 -05001977 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001978 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1979 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001980 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001981 return rc;
1982}
1983
1984static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001985rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1986 int *nsegs, int writing, struct rpcrdma_ia *ia)
1987{
1988 struct rpcrdma_mr_seg *seg1 = seg;
1989 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1990 int len, pageoff, i, rc;
1991
1992 pageoff = offset_in_page(seg1->mr_offset);
1993 seg1->mr_offset -= pageoff; /* start of page */
1994 seg1->mr_len += pageoff;
1995 len = -pageoff;
1996 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1997 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1998 for (i = 0; i < *nsegs;) {
1999 rpcrdma_map_one(ia, seg, writing);
2000 physaddrs[i] = seg->mr_dma;
2001 len += seg->mr_len;
2002 ++seg;
2003 ++i;
2004 /* Check for holes */
2005 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
2006 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
2007 break;
2008 }
Chuck Lever3eb35812015-01-21 11:02:54 -05002009 rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002010 if (rc) {
2011 dprintk("RPC: %s: failed ib_map_phys_fmr "
2012 "%u@0x%llx+%i (%d)... status %i\n", __func__,
2013 len, (unsigned long long)seg1->mr_dma,
2014 pageoff, i, rc);
2015 while (i--)
2016 rpcrdma_unmap_one(ia, --seg);
2017 } else {
Chuck Lever3eb35812015-01-21 11:02:54 -05002018 seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
Tom Talpey8d4ba032008-10-09 14:59:49 -04002019 seg1->mr_base = seg1->mr_dma + pageoff;
2020 seg1->mr_nsegs = i;
2021 seg1->mr_len = len;
2022 }
2023 *nsegs = i;
2024 return rc;
2025}
2026
2027static int
2028rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
2029 struct rpcrdma_ia *ia)
2030{
2031 struct rpcrdma_mr_seg *seg1 = seg;
2032 LIST_HEAD(l);
2033 int rc;
2034
Chuck Lever3eb35812015-01-21 11:02:54 -05002035 list_add(&seg1->rl_mw->r.fmr->list, &l);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002036 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04002037 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002038 while (seg1->mr_nsegs--)
2039 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04002040 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04002041 if (rc)
2042 dprintk("RPC: %s: failed ib_unmap_fmr,"
2043 " status %i\n", __func__, rc);
2044 return rc;
2045}
2046
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002047int
2048rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
2049 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
2050{
2051 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002052 int rc = 0;
2053
2054 switch (ia->ri_memreg_strategy) {
2055
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002056 case RPCRDMA_ALLPHYSICAL:
2057 rpcrdma_map_one(ia, seg, writing);
2058 seg->mr_rkey = ia->ri_bind_mem->rkey;
2059 seg->mr_base = seg->mr_dma;
2060 seg->mr_nsegs = 1;
2061 nsegs = 1;
2062 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002063
Tom Talpey3197d3092008-10-09 15:00:20 -04002064 /* Registration using frmr registration */
2065 case RPCRDMA_FRMR:
2066 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
2067 break;
2068
Tom Talpey8d4ba032008-10-09 14:59:49 -04002069 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002070 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002071 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002072 break;
2073
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002074 default:
Chuck Lever92b98362014-11-08 20:14:12 -05002075 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002076 }
2077 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05002078 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002079
2080 return nsegs;
2081}
2082
2083int
2084rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04002085 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002086{
2087 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002088 int nsegs = seg->mr_nsegs, rc;
2089
2090 switch (ia->ri_memreg_strategy) {
2091
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002092 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04002093 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002094 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04002095 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002096 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002097
Tom Talpey3197d3092008-10-09 15:00:20 -04002098 case RPCRDMA_FRMR:
2099 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2100 break;
2101
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002102 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002103 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002104 break;
2105
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002106 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002107 break;
2108 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002109 return nsegs;
2110}
2111
2112/*
2113 * Prepost any receive buffer, then post send.
2114 *
2115 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2116 */
2117int
2118rpcrdma_ep_post(struct rpcrdma_ia *ia,
2119 struct rpcrdma_ep *ep,
2120 struct rpcrdma_req *req)
2121{
2122 struct ib_send_wr send_wr, *send_wr_fail;
2123 struct rpcrdma_rep *rep = req->rl_reply;
2124 int rc;
2125
2126 if (rep) {
2127 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2128 if (rc)
2129 goto out;
2130 req->rl_reply = NULL;
2131 }
2132
2133 send_wr.next = NULL;
2134 send_wr.wr_id = 0ULL; /* no send cookie */
2135 send_wr.sg_list = req->rl_send_iov;
2136 send_wr.num_sge = req->rl_niovs;
2137 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002138 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2139 ib_dma_sync_single_for_device(ia->ri_id->device,
2140 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2141 DMA_TO_DEVICE);
2142 ib_dma_sync_single_for_device(ia->ri_id->device,
2143 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2144 DMA_TO_DEVICE);
2145 ib_dma_sync_single_for_device(ia->ri_id->device,
2146 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2147 DMA_TO_DEVICE);
2148
2149 if (DECR_CQCOUNT(ep) > 0)
2150 send_wr.send_flags = 0;
2151 else { /* Provider must take a send completion every now and then */
2152 INIT_CQCOUNT(ep);
2153 send_wr.send_flags = IB_SEND_SIGNALED;
2154 }
2155
2156 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2157 if (rc)
2158 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2159 rc);
2160out:
2161 return rc;
2162}
2163
2164/*
2165 * (Re)post a receive buffer.
2166 */
2167int
2168rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2169 struct rpcrdma_ep *ep,
2170 struct rpcrdma_rep *rep)
2171{
2172 struct ib_recv_wr recv_wr, *recv_wr_fail;
2173 int rc;
2174
2175 recv_wr.next = NULL;
2176 recv_wr.wr_id = (u64) (unsigned long) rep;
2177 recv_wr.sg_list = &rep->rr_iov;
2178 recv_wr.num_sge = 1;
2179
2180 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2181 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2182
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002183 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2184
2185 if (rc)
2186 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2187 rc);
2188 return rc;
2189}
Chuck Lever43e95982014-07-29 17:23:34 -04002190
2191/* Physical mapping means one Read/Write list entry per-page.
2192 * All list entries must fit within an inline buffer
2193 *
2194 * NB: The server must return a Write list for NFS READ,
2195 * which has the same constraint. Factor in the inline
2196 * rsize as well.
2197 */
2198static size_t
2199rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2200{
2201 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2202 unsigned int inline_size, pages;
2203
2204 inline_size = min_t(unsigned int,
2205 cdata->inline_wsize, cdata->inline_rsize);
2206 inline_size -= RPCRDMA_HDRLEN_MIN;
2207 pages = inline_size / sizeof(struct rpcrdma_segment);
2208 return pages << PAGE_SHIFT;
2209}
2210
2211static size_t
2212rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2213{
2214 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2215}
2216
2217size_t
2218rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2219{
2220 size_t result;
2221
2222 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2223 case RPCRDMA_ALLPHYSICAL:
2224 result = rpcrdma_physical_max_payload(r_xprt);
2225 break;
2226 default:
2227 result = rpcrdma_mr_max_payload(r_xprt);
2228 }
2229 return result;
2230}