blob: 958b372cb919ca57bc07fcec88644f1798eb4dc4 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever65866f82014-05-28 10:33:59 -040053#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040054
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040055#include "xprt_rdma.h"
56
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040057/*
58 * Globals/Macros
59 */
60
Jeff Laytonf895b252014-11-17 16:58:04 -050061#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040062# define RPCDBG_FACILITY RPCDBG_TRANS
63#endif
64
Chuck Lever9f9d8022014-07-29 17:24:45 -040065static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050066static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040067
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040068/*
69 * internal functions
70 */
71
72/*
73 * handle replies in tasklet context, using a single, global list
74 * rdma tasklet function -- just turn around and call the func
75 * for all replies on the list
76 */
77
78static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
79static LIST_HEAD(rpcrdma_tasklets_g);
80
81static void
82rpcrdma_run_tasklet(unsigned long data)
83{
84 struct rpcrdma_rep *rep;
85 void (*func)(struct rpcrdma_rep *);
86 unsigned long flags;
87
88 data = data;
89 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
90 while (!list_empty(&rpcrdma_tasklets_g)) {
91 rep = list_entry(rpcrdma_tasklets_g.next,
92 struct rpcrdma_rep, rr_list);
93 list_del(&rep->rr_list);
94 func = rep->rr_func;
95 rep->rr_func = NULL;
96 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
97
98 if (func)
99 func(rep);
100 else
101 rpcrdma_recv_buffer_put(rep);
102
103 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
104 }
105 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
106}
107
108static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
109
Chuck Lever7ff11de2014-11-08 20:15:01 -0500110static const char * const async_event[] = {
111 "CQ error",
112 "QP fatal error",
113 "QP request error",
114 "QP access error",
115 "communication established",
116 "send queue drained",
117 "path migration successful",
118 "path mig error",
119 "device fatal error",
120 "port active",
121 "port error",
122 "LID change",
123 "P_key change",
124 "SM change",
125 "SRQ error",
126 "SRQ limit reached",
127 "last WQE reached",
128 "client reregister",
129 "GID change",
130};
131
132#define ASYNC_MSG(status) \
133 ((status) < ARRAY_SIZE(async_event) ? \
134 async_event[(status)] : "unknown async error")
135
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400136static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500137rpcrdma_schedule_tasklet(struct list_head *sched_list)
138{
139 unsigned long flags;
140
141 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
142 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
143 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
144 tasklet_schedule(&rpcrdma_tasklet_g);
145}
146
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147static void
148rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
149{
150 struct rpcrdma_ep *ep = context;
151
Chuck Lever7ff11de2014-11-08 20:15:01 -0500152 pr_err("RPC: %s: %s on device %s ep %p\n",
153 __func__, ASYNC_MSG(event->event),
154 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400155 if (ep->rep_connected == 1) {
156 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500157 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400158 wake_up_all(&ep->rep_connect_wait);
159 }
160}
161
162static void
163rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
164{
165 struct rpcrdma_ep *ep = context;
166
Chuck Lever7ff11de2014-11-08 20:15:01 -0500167 pr_err("RPC: %s: %s on device %s ep %p\n",
168 __func__, ASYNC_MSG(event->event),
169 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400170 if (ep->rep_connected == 1) {
171 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500172 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400173 wake_up_all(&ep->rep_connect_wait);
174 }
175}
176
Chuck Lever85024272015-01-21 11:02:04 -0500177static const char * const wc_status[] = {
178 "success",
179 "local length error",
180 "local QP operation error",
181 "local EE context operation error",
182 "local protection error",
183 "WR flushed",
184 "memory management operation error",
185 "bad response error",
186 "local access error",
187 "remote invalid request error",
188 "remote access error",
189 "remote operation error",
190 "transport retry counter exceeded",
191 "RNR retrycounter exceeded",
192 "local RDD violation error",
193 "remove invalid RD request",
194 "operation aborted",
195 "invalid EE context number",
196 "invalid EE context state",
197 "fatal error",
198 "response timeout error",
199 "general error",
200};
201
202#define COMPLETION_MSG(status) \
203 ((status) < ARRAY_SIZE(wc_status) ? \
204 wc_status[(status)] : "unexpected completion error")
205
Chuck Leverfc664482014-05-28 10:33:25 -0400206static void
207rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400208{
Chuck Lever85024272015-01-21 11:02:04 -0500209 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400210 return;
Chuck Lever85024272015-01-21 11:02:04 -0500211
212 /* WARNING: Only wr_id and status are reliable at this point */
213 if (wc->wr_id == 0ULL) {
214 if (wc->status != IB_WC_WR_FLUSH_ERR)
215 pr_err("RPC: %s: SEND: %s\n",
216 __func__, COMPLETION_MSG(wc->status));
217 } else {
218 struct rpcrdma_mw *r;
219
220 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
221 r->r.frmr.fr_state = FRMR_IS_STALE;
222 pr_err("RPC: %s: frmr %p (stale): %s\n",
223 __func__, r, COMPLETION_MSG(wc->status));
224 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400225}
226
Chuck Leverfc664482014-05-28 10:33:25 -0400227static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400228rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400229{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400230 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400231 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400232
Chuck Lever8301a2c2014-05-28 10:33:51 -0400233 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400234 do {
235 wcs = ep->rep_send_wcs;
236
237 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
238 if (rc <= 0)
239 return rc;
240
241 count = rc;
242 while (count-- > 0)
243 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400244 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400245 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400246}
247
248/*
Chuck Leverfc664482014-05-28 10:33:25 -0400249 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400250 *
Chuck Leverfc664482014-05-28 10:33:25 -0400251 * Send events are typically suppressed and thus do not result
252 * in an upcall. Occasionally one is signaled, however. This
253 * prevents the provider's completion queue from wrapping and
254 * losing a completion.
255 */
256static void
257rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
258{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400259 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400260 int rc;
261
Chuck Lever1c00dd02014-05-28 10:33:42 -0400262 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400263 if (rc) {
264 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
265 __func__, rc);
266 return;
267 }
268
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400269 rc = ib_req_notify_cq(cq,
270 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
271 if (rc == 0)
272 return;
273 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400274 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
275 __func__, rc);
276 return;
277 }
278
Chuck Lever1c00dd02014-05-28 10:33:42 -0400279 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400280}
281
282static void
Chuck Leverbb961932014-07-29 17:25:46 -0400283rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400284{
285 struct rpcrdma_rep *rep =
286 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
287
Chuck Lever85024272015-01-21 11:02:04 -0500288 /* WARNING: Only wr_id and status are reliable at this point */
289 if (wc->status != IB_WC_SUCCESS)
290 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400291
Chuck Lever85024272015-01-21 11:02:04 -0500292 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400293 if (wc->opcode != IB_WC_RECV)
294 return;
295
Chuck Lever85024272015-01-21 11:02:04 -0500296 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
297 __func__, rep, wc->byte_len);
298
Chuck Leverfc664482014-05-28 10:33:25 -0400299 rep->rr_len = wc->byte_len;
300 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
301 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
Chuck Levereba8ff62015-01-21 11:03:02 -0500302 prefetch(rep->rr_base);
Chuck Leverfc664482014-05-28 10:33:25 -0400303
304out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400305 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500306 return;
307out_fail:
308 if (wc->status != IB_WC_WR_FLUSH_ERR)
309 pr_err("RPC: %s: rep %p: %s\n",
310 __func__, rep, COMPLETION_MSG(wc->status));
311 rep->rr_len = ~0U;
312 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400313}
314
315static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400316rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400317{
Chuck Leverbb961932014-07-29 17:25:46 -0400318 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400319 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400320 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400321
Chuck Leverbb961932014-07-29 17:25:46 -0400322 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400323 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400324 do {
325 wcs = ep->rep_recv_wcs;
326
327 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
328 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400329 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400330
331 count = rc;
332 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400333 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400334 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400335 rc = 0;
336
337out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500338 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400339 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400340}
341
342/*
343 * Handle receive completions.
344 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400345 * It is reentrant but processes single events in order to maintain
346 * ordering of receives to keep server credits.
347 *
348 * It is the responsibility of the scheduled tasklet to return
349 * recv buffers to the pool. NOTE: this affects synchronization of
350 * connection shutdown. That is, the structures required for
351 * the completion of the reply handler must remain intact until
352 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 */
354static void
Chuck Leverfc664482014-05-28 10:33:25 -0400355rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400356{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400357 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400358 int rc;
359
Chuck Lever1c00dd02014-05-28 10:33:42 -0400360 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400361 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400362 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400363 __func__, rc);
364 return;
365 }
366
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400367 rc = ib_req_notify_cq(cq,
368 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
369 if (rc == 0)
370 return;
371 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400372 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
373 __func__, rc);
374 return;
375 }
376
Chuck Lever1c00dd02014-05-28 10:33:42 -0400377 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400378}
379
Chuck Levera7bc2112014-07-29 17:23:52 -0400380static void
381rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
382{
Chuck Lever5c166be2014-11-08 20:14:45 -0500383 struct ib_wc wc;
384 LIST_HEAD(sched_list);
385
386 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
387 rpcrdma_recvcq_process_wc(&wc, &sched_list);
388 if (!list_empty(&sched_list))
389 rpcrdma_schedule_tasklet(&sched_list);
390 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
391 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400392}
393
Jeff Laytonf895b252014-11-17 16:58:04 -0500394#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400395static const char * const conn[] = {
396 "address resolved",
397 "address error",
398 "route resolved",
399 "route error",
400 "connect request",
401 "connect response",
402 "connect error",
403 "unreachable",
404 "rejected",
405 "established",
406 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400407 "device removal",
408 "multicast join",
409 "multicast error",
410 "address change",
411 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400412};
Chuck Lever8079fb72014-07-29 17:26:12 -0400413
414#define CONNECTION_MSG(status) \
415 ((status) < ARRAY_SIZE(conn) ? \
416 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400417#endif
418
419static int
420rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
421{
422 struct rpcrdma_xprt *xprt = id->context;
423 struct rpcrdma_ia *ia = &xprt->rx_ia;
424 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500425#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400426 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800427#endif
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500428 struct ib_qp_attr *attr = &ia->ri_qp_attr;
429 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400430 int connstate = 0;
431
432 switch (event->event) {
433 case RDMA_CM_EVENT_ADDR_RESOLVED:
434 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400435 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400436 complete(&ia->ri_done);
437 break;
438 case RDMA_CM_EVENT_ADDR_ERROR:
439 ia->ri_async_rc = -EHOSTUNREACH;
440 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
441 __func__, ep);
442 complete(&ia->ri_done);
443 break;
444 case RDMA_CM_EVENT_ROUTE_ERROR:
445 ia->ri_async_rc = -ENETUNREACH;
446 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
447 __func__, ep);
448 complete(&ia->ri_done);
449 break;
450 case RDMA_CM_EVENT_ESTABLISHED:
451 connstate = 1;
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500452 ib_query_qp(ia->ri_id->qp, attr,
453 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
454 iattr);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400455 dprintk("RPC: %s: %d responder resources"
456 " (%d initiator)\n",
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500457 __func__, attr->max_dest_rd_atomic,
458 attr->max_rd_atomic);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400459 goto connected;
460 case RDMA_CM_EVENT_CONNECT_ERROR:
461 connstate = -ENOTCONN;
462 goto connected;
463 case RDMA_CM_EVENT_UNREACHABLE:
464 connstate = -ENETDOWN;
465 goto connected;
466 case RDMA_CM_EVENT_REJECTED:
467 connstate = -ECONNREFUSED;
468 goto connected;
469 case RDMA_CM_EVENT_DISCONNECTED:
470 connstate = -ECONNABORTED;
471 goto connected;
472 case RDMA_CM_EVENT_DEVICE_REMOVAL:
473 connstate = -ENODEV;
474connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400475 dprintk("RPC: %s: %sconnected\n",
476 __func__, connstate > 0 ? "" : "dis");
477 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500478 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400479 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400480 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400481 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400482 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
483 __func__, &addr->sin_addr.s_addr,
484 ntohs(addr->sin_port), ep,
485 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400486 break;
487 }
488
Jeff Laytonf895b252014-11-17 16:58:04 -0500489#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400490 if (connstate == 1) {
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500491 int ird = attr->max_dest_rd_atomic;
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400492 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700493 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400494 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700495 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400496 ntohs(addr->sin_port),
497 ia->ri_id->device->name,
498 ia->ri_memreg_strategy,
499 xprt->rx_buf.rb_max_requests,
500 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
501 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700502 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
503 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400504 ntohs(addr->sin_port),
505 connstate);
506 }
507#endif
508
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400509 return 0;
510}
511
512static struct rdma_cm_id *
513rpcrdma_create_id(struct rpcrdma_xprt *xprt,
514 struct rpcrdma_ia *ia, struct sockaddr *addr)
515{
516 struct rdma_cm_id *id;
517 int rc;
518
Tom Talpey1a954052008-10-09 15:01:31 -0400519 init_completion(&ia->ri_done);
520
Sean Heftyb26f9b92010-04-01 17:08:41 +0000521 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400522 if (IS_ERR(id)) {
523 rc = PTR_ERR(id);
524 dprintk("RPC: %s: rdma_create_id() failed %i\n",
525 __func__, rc);
526 return id;
527 }
528
Tom Talpey5675add2008-10-09 15:01:41 -0400529 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400530 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
531 if (rc) {
532 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
533 __func__, rc);
534 goto out;
535 }
Tom Talpey5675add2008-10-09 15:01:41 -0400536 wait_for_completion_interruptible_timeout(&ia->ri_done,
537 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400538 rc = ia->ri_async_rc;
539 if (rc)
540 goto out;
541
Tom Talpey5675add2008-10-09 15:01:41 -0400542 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400543 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
544 if (rc) {
545 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
546 __func__, rc);
547 goto out;
548 }
Tom Talpey5675add2008-10-09 15:01:41 -0400549 wait_for_completion_interruptible_timeout(&ia->ri_done,
550 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400551 rc = ia->ri_async_rc;
552 if (rc)
553 goto out;
554
555 return id;
556
557out:
558 rdma_destroy_id(id);
559 return ERR_PTR(rc);
560}
561
562/*
563 * Drain any cq, prior to teardown.
564 */
565static void
566rpcrdma_clean_cq(struct ib_cq *cq)
567{
568 struct ib_wc wc;
569 int count = 0;
570
571 while (1 == ib_poll_cq(cq, 1, &wc))
572 ++count;
573
574 if (count)
575 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
576 __func__, count, wc.opcode);
577}
578
579/*
580 * Exported functions.
581 */
582
583/*
584 * Open and initialize an Interface Adapter.
585 * o initializes fields of struct rpcrdma_ia, including
586 * interface and provider attributes and protection zone.
587 */
588int
589rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
590{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400591 int rc, mem_priv;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400592 struct rpcrdma_ia *ia = &xprt->rx_ia;
Chuck Lever7bc79722015-01-21 11:03:27 -0500593 struct ib_device_attr *devattr = &ia->ri_devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400594
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400595 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
596 if (IS_ERR(ia->ri_id)) {
597 rc = PTR_ERR(ia->ri_id);
598 goto out1;
599 }
600
601 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
602 if (IS_ERR(ia->ri_pd)) {
603 rc = PTR_ERR(ia->ri_pd);
604 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
605 __func__, rc);
606 goto out2;
607 }
608
Chuck Lever7bc79722015-01-21 11:03:27 -0500609 rc = ib_query_device(ia->ri_id->device, devattr);
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400610 if (rc) {
611 dprintk("RPC: %s: ib_query_device failed %d\n",
612 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500613 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400614 }
615
Chuck Lever7bc79722015-01-21 11:03:27 -0500616 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400617 ia->ri_have_dma_lkey = 1;
618 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
619 }
620
Chuck Leverf10eafd2014-05-28 10:32:51 -0400621 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400622 /* Requires both frmr reg and local dma lkey */
Chuck Lever7bc79722015-01-21 11:03:27 -0500623 if ((devattr->device_cap_flags &
Tom Talpey3197d3092008-10-09 15:00:20 -0400624 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
625 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400626 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400627 "not supported by HCA\n", __func__);
628 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400629 } else {
630 /* Mind the ia limit on FRMR page list depth */
631 ia->ri_max_frmr_depth = min_t(unsigned int,
632 RPCRDMA_MAX_DATA_SEGS,
Chuck Lever7bc79722015-01-21 11:03:27 -0500633 devattr->max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400634 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400635 }
636 if (memreg == RPCRDMA_MTHCAFMR) {
637 if (!ia->ri_id->device->alloc_fmr) {
638 dprintk("RPC: %s: MTHCAFMR registration "
639 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400640 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400641 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400642 }
643
644 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400645 * Optionally obtain an underlying physical identity mapping in
646 * order to do a memory window-based bind. This base registration
647 * is protected from remote access - that is enabled only by binding
648 * for the specific bytes targeted during each RPC operation, and
649 * revoked after the corresponding completion similar to a storage
650 * adapter.
651 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400652 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400653 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400654 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400655 case RPCRDMA_ALLPHYSICAL:
656 mem_priv = IB_ACCESS_LOCAL_WRITE |
657 IB_ACCESS_REMOTE_WRITE |
658 IB_ACCESS_REMOTE_READ;
659 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400660 case RPCRDMA_MTHCAFMR:
661 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400662 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400663 mem_priv = IB_ACCESS_LOCAL_WRITE;
664 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400665 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
666 if (IS_ERR(ia->ri_bind_mem)) {
667 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400668 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400669 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400670 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500671 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400672 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400673 break;
674 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400675 printk(KERN_ERR "RPC: Unsupported memory "
676 "registration mode: %d\n", memreg);
677 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500678 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400679 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400680 dprintk("RPC: %s: memory registration strategy is %d\n",
681 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400682
683 /* Else will do memory reg/dereg for each chunk */
684 ia->ri_memreg_strategy = memreg;
685
Chuck Lever73806c82014-07-29 17:23:25 -0400686 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400687 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500688
689out3:
690 ib_dealloc_pd(ia->ri_pd);
691 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400692out2:
693 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400694 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400695out1:
696 return rc;
697}
698
699/*
700 * Clean up/close an IA.
701 * o if event handles and PD have been initialized, free them.
702 * o close the IA
703 */
704void
705rpcrdma_ia_close(struct rpcrdma_ia *ia)
706{
707 int rc;
708
709 dprintk("RPC: %s: entering\n", __func__);
710 if (ia->ri_bind_mem != NULL) {
711 rc = ib_dereg_mr(ia->ri_bind_mem);
712 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
713 __func__, rc);
714 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400715 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
716 if (ia->ri_id->qp)
717 rdma_destroy_qp(ia->ri_id);
718 rdma_destroy_id(ia->ri_id);
719 ia->ri_id = NULL;
720 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400721 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
722 rc = ib_dealloc_pd(ia->ri_pd);
723 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
724 __func__, rc);
725 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400726}
727
728/*
729 * Create unconnected endpoint.
730 */
731int
732rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
733 struct rpcrdma_create_data_internal *cdata)
734{
Chuck Lever7bc79722015-01-21 11:03:27 -0500735 struct ib_device_attr *devattr = &ia->ri_devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400736 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400737 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400738
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400739 /* check provider's send/recv wr limits */
Chuck Lever7bc79722015-01-21 11:03:27 -0500740 if (cdata->max_requests > devattr->max_qp_wr)
741 cdata->max_requests = devattr->max_qp_wr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400742
743 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
744 ep->rep_attr.qp_context = ep;
745 /* send_cq and recv_cq initialized below */
746 ep->rep_attr.srq = NULL;
747 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
748 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400749 case RPCRDMA_FRMR: {
750 int depth = 7;
751
Tom Tucker15cdc6442010-08-11 12:47:24 -0400752 /* Add room for frmr register and invalidate WRs.
753 * 1. FRMR reg WR for head
754 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400755 * 3. N FRMR reg WRs for pagelist
756 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400757 * 5. FRMR reg WR for tail
758 * 6. FRMR invalidate WR for tail
759 * 7. The RDMA_SEND WR
760 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400761
762 /* Calculate N if the device max FRMR depth is smaller than
763 * RPCRDMA_MAX_DATA_SEGS.
764 */
765 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
766 int delta = RPCRDMA_MAX_DATA_SEGS -
767 ia->ri_max_frmr_depth;
768
769 do {
770 depth += 2; /* FRMR reg + invalidate */
771 delta -= ia->ri_max_frmr_depth;
772 } while (delta > 0);
773
774 }
775 ep->rep_attr.cap.max_send_wr *= depth;
Chuck Lever7bc79722015-01-21 11:03:27 -0500776 if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
777 cdata->max_requests = devattr->max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400778 if (!cdata->max_requests)
779 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400780 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
781 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400782 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400783 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400784 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400785 default:
786 break;
787 }
788 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
789 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
790 ep->rep_attr.cap.max_recv_sge = 1;
791 ep->rep_attr.cap.max_inline_data = 0;
792 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
793 ep->rep_attr.qp_type = IB_QPT_RC;
794 ep->rep_attr.port_num = ~0;
795
796 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
797 "iovs: send %d recv %d\n",
798 __func__,
799 ep->rep_attr.cap.max_send_wr,
800 ep->rep_attr.cap.max_recv_wr,
801 ep->rep_attr.cap.max_send_sge,
802 ep->rep_attr.cap.max_recv_sge);
803
804 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400805 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500806 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
807 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
808 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400809 ep->rep_cqinit = 0;
810 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400811 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400812 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400813
Chuck Leverfc664482014-05-28 10:33:25 -0400814 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400815 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400816 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400817 if (IS_ERR(sendcq)) {
818 rc = PTR_ERR(sendcq);
819 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400820 __func__, rc);
821 goto out1;
822 }
823
Chuck Leverfc664482014-05-28 10:33:25 -0400824 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400825 if (rc) {
826 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
827 __func__, rc);
828 goto out2;
829 }
830
Chuck Leverfc664482014-05-28 10:33:25 -0400831 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400832 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400833 ep->rep_attr.cap.max_recv_wr + 1, 0);
834 if (IS_ERR(recvcq)) {
835 rc = PTR_ERR(recvcq);
836 dprintk("RPC: %s: failed to create recv CQ: %i\n",
837 __func__, rc);
838 goto out2;
839 }
840
841 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
842 if (rc) {
843 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
844 __func__, rc);
845 ib_destroy_cq(recvcq);
846 goto out2;
847 }
848
849 ep->rep_attr.send_cq = sendcq;
850 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400851
852 /* Initialize cma parameters */
853
854 /* RPC/RDMA does not use private data */
855 ep->rep_remote_cma.private_data = NULL;
856 ep->rep_remote_cma.private_data_len = 0;
857
858 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400859 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever7bc79722015-01-21 11:03:27 -0500860 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400861 ep->rep_remote_cma.responder_resources = 32;
862 else
Chuck Lever7bc79722015-01-21 11:03:27 -0500863 ep->rep_remote_cma.responder_resources =
864 devattr->max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400865
866 ep->rep_remote_cma.retry_count = 7;
867 ep->rep_remote_cma.flow_control = 0;
868 ep->rep_remote_cma.rnr_retry_count = 0;
869
870 return 0;
871
872out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400873 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400874 if (err)
875 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
876 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400877out1:
878 return rc;
879}
880
881/*
882 * rpcrdma_ep_destroy
883 *
884 * Disconnect and destroy endpoint. After this, the only
885 * valid operations on the ep are to free it (if dynamically
886 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400887 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400888void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400889rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
890{
891 int rc;
892
893 dprintk("RPC: %s: entering, connected is %d\n",
894 __func__, ep->rep_connected);
895
Chuck Lever254f91e2014-05-28 10:32:17 -0400896 cancel_delayed_work_sync(&ep->rep_connect_worker);
897
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400898 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400899 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400900 rdma_destroy_qp(ia->ri_id);
901 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400902 }
903
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400904 /* padding - could be done in rpcrdma_buffer_destroy... */
905 if (ep->rep_pad_mr) {
906 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
907 ep->rep_pad_mr = NULL;
908 }
909
Chuck Leverfc664482014-05-28 10:33:25 -0400910 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
911 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
912 if (rc)
913 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
914 __func__, rc);
915
916 rpcrdma_clean_cq(ep->rep_attr.send_cq);
917 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400918 if (rc)
919 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
920 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400921}
922
923/*
924 * Connect unconnected endpoint.
925 */
926int
927rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
928{
Chuck Lever73806c82014-07-29 17:23:25 -0400929 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400930 int rc = 0;
931 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400932
Tom Talpeyc0555512008-10-10 11:32:45 -0400933 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400934 struct rpcrdma_xprt *xprt;
935retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400936 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400937
938 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400939 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400940
Chuck Lever467c9672014-11-08 20:14:29 -0500941 switch (ia->ri_memreg_strategy) {
942 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400943 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500944 break;
945 case RPCRDMA_MTHCAFMR:
946 rpcrdma_reset_fmrs(ia);
947 break;
948 case RPCRDMA_ALLPHYSICAL:
949 break;
950 default:
951 rc = -EIO;
952 goto out;
953 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400954
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400955 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
956 id = rpcrdma_create_id(xprt, ia,
957 (struct sockaddr *)&xprt->rx_data.addr);
958 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400959 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400960 goto out;
961 }
962 /* TEMP TEMP TEMP - fail if new device:
963 * Deregister/remarshal *all* requests!
964 * Close and recreate adapter, pd, etc!
965 * Re-determine all attributes still sane!
966 * More stuff I haven't thought of!
967 * Rrrgh!
968 */
969 if (ia->ri_id->device != id->device) {
970 printk("RPC: %s: can't reconnect on "
971 "different device!\n", __func__);
972 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400973 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400974 goto out;
975 }
976 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400977 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
978 if (rc) {
979 dprintk("RPC: %s: rdma_create_qp failed %i\n",
980 __func__, rc);
981 rdma_destroy_id(id);
982 rc = -ENETUNREACH;
983 goto out;
984 }
Chuck Lever73806c82014-07-29 17:23:25 -0400985
986 write_lock(&ia->ri_qplock);
987 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400988 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400989 write_unlock(&ia->ri_qplock);
990
991 rdma_destroy_qp(old);
992 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400993 } else {
994 dprintk("RPC: %s: connecting...\n", __func__);
995 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
996 if (rc) {
997 dprintk("RPC: %s: rdma_create_qp failed %i\n",
998 __func__, rc);
999 /* do not update ep->rep_connected */
1000 return -ENETUNREACH;
1001 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001002 }
1003
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001004 ep->rep_connected = 0;
1005
1006 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
1007 if (rc) {
1008 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1009 __func__, rc);
1010 goto out;
1011 }
1012
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001013 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1014
1015 /*
1016 * Check state. A non-peer reject indicates no listener
1017 * (ECONNREFUSED), which may be a transient state. All
1018 * others indicate a transport condition which has already
1019 * undergone a best-effort.
1020 */
Joe Perchesf64f9e72009-11-29 16:55:45 -08001021 if (ep->rep_connected == -ECONNREFUSED &&
1022 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001023 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1024 goto retry;
1025 }
1026 if (ep->rep_connected <= 0) {
1027 /* Sometimes, the only way to reliably connect to remote
1028 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001029 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1030 (ep->rep_remote_cma.responder_resources == 0 ||
1031 ep->rep_remote_cma.initiator_depth !=
1032 ep->rep_remote_cma.responder_resources)) {
1033 if (ep->rep_remote_cma.responder_resources == 0)
1034 ep->rep_remote_cma.responder_resources = 1;
1035 ep->rep_remote_cma.initiator_depth =
1036 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001037 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001038 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001039 rc = ep->rep_connected;
1040 } else {
1041 dprintk("RPC: %s: connected\n", __func__);
1042 }
1043
1044out:
1045 if (rc)
1046 ep->rep_connected = rc;
1047 return rc;
1048}
1049
1050/*
1051 * rpcrdma_ep_disconnect
1052 *
1053 * This is separate from destroy to facilitate the ability
1054 * to reconnect without recreating the endpoint.
1055 *
1056 * This call is not reentrant, and must not be made in parallel
1057 * on the same endpoint.
1058 */
Chuck Lever282191c2014-07-29 17:25:55 -04001059void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001060rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1061{
1062 int rc;
1063
Chuck Levera7bc2112014-07-29 17:23:52 -04001064 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001065 rc = rdma_disconnect(ia->ri_id);
1066 if (!rc) {
1067 /* returns without wait if not connected */
1068 wait_event_interruptible(ep->rep_connect_wait,
1069 ep->rep_connected != 1);
1070 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1071 (ep->rep_connected == 1) ? "still " : "dis");
1072 } else {
1073 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1074 ep->rep_connected = rc;
1075 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001076}
1077
Chuck Lever2e845222014-07-29 17:25:38 -04001078static int
1079rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1080{
1081 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1082 struct ib_fmr_attr fmr_attr = {
1083 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1084 .max_maps = 1,
1085 .page_shift = PAGE_SHIFT
1086 };
1087 struct rpcrdma_mw *r;
1088 int i, rc;
1089
1090 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1091 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1092
1093 while (i--) {
1094 r = kzalloc(sizeof(*r), GFP_KERNEL);
1095 if (r == NULL)
1096 return -ENOMEM;
1097
1098 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1099 if (IS_ERR(r->r.fmr)) {
1100 rc = PTR_ERR(r->r.fmr);
1101 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1102 __func__, rc);
1103 goto out_free;
1104 }
1105
1106 list_add(&r->mw_list, &buf->rb_mws);
1107 list_add(&r->mw_all, &buf->rb_all);
1108 }
1109 return 0;
1110
1111out_free:
1112 kfree(r);
1113 return rc;
1114}
1115
1116static int
1117rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1118{
1119 struct rpcrdma_frmr *f;
1120 struct rpcrdma_mw *r;
1121 int i, rc;
1122
1123 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1124 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1125
1126 while (i--) {
1127 r = kzalloc(sizeof(*r), GFP_KERNEL);
1128 if (r == NULL)
1129 return -ENOMEM;
1130 f = &r->r.frmr;
1131
1132 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1133 ia->ri_max_frmr_depth);
1134 if (IS_ERR(f->fr_mr)) {
1135 rc = PTR_ERR(f->fr_mr);
1136 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1137 "failed %i\n", __func__, rc);
1138 goto out_free;
1139 }
1140
1141 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1142 ia->ri_max_frmr_depth);
1143 if (IS_ERR(f->fr_pgl)) {
1144 rc = PTR_ERR(f->fr_pgl);
1145 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1146 "failed %i\n", __func__, rc);
1147
1148 ib_dereg_mr(f->fr_mr);
1149 goto out_free;
1150 }
1151
1152 list_add(&r->mw_list, &buf->rb_mws);
1153 list_add(&r->mw_all, &buf->rb_all);
1154 }
1155
1156 return 0;
1157
1158out_free:
1159 kfree(r);
1160 return rc;
1161}
1162
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001163int
1164rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1165 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1166{
1167 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001168 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001169 int i, rc;
1170
1171 buf->rb_max_requests = cdata->max_requests;
1172 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001173
1174 /* Need to allocate:
1175 * 1. arrays for send and recv pointers
1176 * 2. arrays of struct rpcrdma_req to fill in pointers
1177 * 3. array of struct rpcrdma_rep for replies
1178 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001179 * Send/recv buffers in req/rep need to be registered
1180 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001181 len = buf->rb_max_requests *
1182 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1183 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001184
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001185 p = kzalloc(len, GFP_KERNEL);
1186 if (p == NULL) {
1187 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1188 __func__, len);
1189 rc = -ENOMEM;
1190 goto out;
1191 }
1192 buf->rb_pool = p; /* for freeing it later */
1193
1194 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1195 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1196 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1197 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1198
1199 /*
1200 * Register the zeroed pad buffer, if any.
1201 */
1202 if (cdata->padding) {
1203 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1204 &ep->rep_pad_mr, &ep->rep_pad);
1205 if (rc)
1206 goto out;
1207 }
1208 p += cdata->padding;
1209
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001210 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001211 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001212 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001213 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001214 rc = rpcrdma_init_frmrs(ia, buf);
1215 if (rc)
1216 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001217 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001218 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001219 rc = rpcrdma_init_fmrs(ia, buf);
1220 if (rc)
1221 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001222 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001223 default:
1224 break;
1225 }
1226
1227 /*
1228 * Allocate/init the request/reply buffers. Doing this
1229 * using kmalloc for now -- one for each buf.
1230 */
Chuck Lever65866f82014-05-28 10:33:59 -04001231 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1232 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1233 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1234 __func__, wlen, rlen);
1235
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001236 for (i = 0; i < buf->rb_max_requests; i++) {
1237 struct rpcrdma_req *req;
1238 struct rpcrdma_rep *rep;
1239
Chuck Lever65866f82014-05-28 10:33:59 -04001240 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001241 if (req == NULL) {
1242 dprintk("RPC: %s: request buffer %d alloc"
1243 " failed\n", __func__, i);
1244 rc = -ENOMEM;
1245 goto out;
1246 }
1247 memset(req, 0, sizeof(struct rpcrdma_req));
1248 buf->rb_send_bufs[i] = req;
1249 buf->rb_send_bufs[i]->rl_buffer = buf;
1250
1251 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001252 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001253 &buf->rb_send_bufs[i]->rl_handle,
1254 &buf->rb_send_bufs[i]->rl_iov);
1255 if (rc)
1256 goto out;
1257
Chuck Lever65866f82014-05-28 10:33:59 -04001258 buf->rb_send_bufs[i]->rl_size = wlen -
1259 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001260
Chuck Lever65866f82014-05-28 10:33:59 -04001261 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001262 if (rep == NULL) {
1263 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1264 __func__, i);
1265 rc = -ENOMEM;
1266 goto out;
1267 }
1268 memset(rep, 0, sizeof(struct rpcrdma_rep));
1269 buf->rb_recv_bufs[i] = rep;
1270 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001271
1272 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001273 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001274 &buf->rb_recv_bufs[i]->rr_handle,
1275 &buf->rb_recv_bufs[i]->rr_iov);
1276 if (rc)
1277 goto out;
1278
1279 }
1280 dprintk("RPC: %s: max_requests %d\n",
1281 __func__, buf->rb_max_requests);
1282 /* done */
1283 return 0;
1284out:
1285 rpcrdma_buffer_destroy(buf);
1286 return rc;
1287}
1288
Chuck Lever2e845222014-07-29 17:25:38 -04001289static void
1290rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1291{
1292 struct rpcrdma_mw *r;
1293 int rc;
1294
1295 while (!list_empty(&buf->rb_all)) {
1296 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1297 list_del(&r->mw_all);
1298 list_del(&r->mw_list);
1299
1300 rc = ib_dealloc_fmr(r->r.fmr);
1301 if (rc)
1302 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1303 __func__, rc);
1304
1305 kfree(r);
1306 }
1307}
1308
1309static void
1310rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1311{
1312 struct rpcrdma_mw *r;
1313 int rc;
1314
1315 while (!list_empty(&buf->rb_all)) {
1316 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1317 list_del(&r->mw_all);
1318 list_del(&r->mw_list);
1319
1320 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1321 if (rc)
1322 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1323 __func__, rc);
1324 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1325
1326 kfree(r);
1327 }
1328}
1329
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001330void
1331rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1332{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001333 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001334 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001335
1336 /* clean up in reverse order from create
1337 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001338 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001339 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001340 */
1341 dprintk("RPC: %s: entering\n", __func__);
1342
1343 for (i = 0; i < buf->rb_max_requests; i++) {
1344 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1345 rpcrdma_deregister_internal(ia,
1346 buf->rb_recv_bufs[i]->rr_handle,
1347 &buf->rb_recv_bufs[i]->rr_iov);
1348 kfree(buf->rb_recv_bufs[i]);
1349 }
1350 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001351 rpcrdma_deregister_internal(ia,
1352 buf->rb_send_bufs[i]->rl_handle,
1353 &buf->rb_send_bufs[i]->rl_iov);
1354 kfree(buf->rb_send_bufs[i]);
1355 }
1356 }
1357
Chuck Lever2e845222014-07-29 17:25:38 -04001358 switch (ia->ri_memreg_strategy) {
1359 case RPCRDMA_FRMR:
1360 rpcrdma_destroy_frmrs(buf);
1361 break;
1362 case RPCRDMA_MTHCAFMR:
1363 rpcrdma_destroy_fmrs(buf);
1364 break;
1365 default:
1366 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001367 }
1368
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001369 kfree(buf->rb_pool);
1370}
1371
Chuck Lever467c9672014-11-08 20:14:29 -05001372/* After a disconnect, unmap all FMRs.
1373 *
1374 * This is invoked only in the transport connect worker in order
1375 * to serialize with rpcrdma_register_fmr_external().
1376 */
1377static void
1378rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1379{
1380 struct rpcrdma_xprt *r_xprt =
1381 container_of(ia, struct rpcrdma_xprt, rx_ia);
1382 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1383 struct list_head *pos;
1384 struct rpcrdma_mw *r;
1385 LIST_HEAD(l);
1386 int rc;
1387
1388 list_for_each(pos, &buf->rb_all) {
1389 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1390
1391 INIT_LIST_HEAD(&l);
1392 list_add(&r->r.fmr->list, &l);
1393 rc = ib_unmap_fmr(&l);
1394 if (rc)
1395 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1396 __func__, rc);
1397 }
1398}
1399
Chuck Lever9f9d8022014-07-29 17:24:45 -04001400/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1401 * an unusable state. Find FRMRs in this state and dereg / reg
1402 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1403 * also torn down.
1404 *
1405 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1406 *
1407 * This is invoked only in the transport connect worker in order
1408 * to serialize with rpcrdma_register_frmr_external().
1409 */
1410static void
1411rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1412{
1413 struct rpcrdma_xprt *r_xprt =
1414 container_of(ia, struct rpcrdma_xprt, rx_ia);
1415 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1416 struct list_head *pos;
1417 struct rpcrdma_mw *r;
1418 int rc;
1419
1420 list_for_each(pos, &buf->rb_all) {
1421 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1422
1423 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1424 continue;
1425
1426 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1427 if (rc)
1428 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1429 __func__, rc);
1430 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1431
1432 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1433 ia->ri_max_frmr_depth);
1434 if (IS_ERR(r->r.frmr.fr_mr)) {
1435 rc = PTR_ERR(r->r.frmr.fr_mr);
1436 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1437 " failed %i\n", __func__, rc);
1438 continue;
1439 }
1440 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1441 ia->ri_id->device,
1442 ia->ri_max_frmr_depth);
1443 if (IS_ERR(r->r.frmr.fr_pgl)) {
1444 rc = PTR_ERR(r->r.frmr.fr_pgl);
1445 dprintk("RPC: %s: "
1446 "ib_alloc_fast_reg_page_list "
1447 "failed %i\n", __func__, rc);
1448
1449 ib_dereg_mr(r->r.frmr.fr_mr);
1450 continue;
1451 }
1452 r->r.frmr.fr_state = FRMR_IS_INVALID;
1453 }
1454}
1455
Chuck Leverc2922c02014-07-29 17:24:36 -04001456/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1457 * some req segments uninitialized.
1458 */
1459static void
1460rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1461{
1462 if (*mw) {
1463 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1464 *mw = NULL;
1465 }
1466}
1467
1468/* Cycle mw's back in reverse order, and "spin" them.
1469 * This delays and scrambles reuse as much as possible.
1470 */
1471static void
1472rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1473{
1474 struct rpcrdma_mr_seg *seg = req->rl_segments;
1475 struct rpcrdma_mr_seg *seg1 = seg;
1476 int i;
1477
1478 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001479 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1480 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001481}
1482
1483static void
1484rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1485{
1486 buf->rb_send_bufs[--buf->rb_send_index] = req;
1487 req->rl_niovs = 0;
1488 if (req->rl_reply) {
1489 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1490 req->rl_reply->rr_func = NULL;
1491 req->rl_reply = NULL;
1492 }
1493}
1494
Chuck Leverddb6beb2014-07-29 17:24:54 -04001495/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1496 * Redo only the ib_post_send().
1497 */
1498static void
1499rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1500{
1501 struct rpcrdma_xprt *r_xprt =
1502 container_of(ia, struct rpcrdma_xprt, rx_ia);
1503 struct ib_send_wr invalidate_wr, *bad_wr;
1504 int rc;
1505
1506 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1507
1508 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001509 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001510
1511 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1512 invalidate_wr.wr_id = (unsigned long)(void *)r;
1513 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001514 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1515 DECR_CQCOUNT(&r_xprt->rx_ep);
1516
1517 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1518 __func__, r, r->r.frmr.fr_mr->rkey);
1519
1520 read_lock(&ia->ri_qplock);
1521 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1522 read_unlock(&ia->ri_qplock);
1523 if (rc) {
1524 /* Force rpcrdma_buffer_get() to retry */
1525 r->r.frmr.fr_state = FRMR_IS_STALE;
1526 dprintk("RPC: %s: ib_post_send failed, %i\n",
1527 __func__, rc);
1528 }
1529}
1530
1531static void
1532rpcrdma_retry_flushed_linv(struct list_head *stale,
1533 struct rpcrdma_buffer *buf)
1534{
1535 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1536 struct list_head *pos;
1537 struct rpcrdma_mw *r;
1538 unsigned long flags;
1539
1540 list_for_each(pos, stale) {
1541 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1542 rpcrdma_retry_local_inv(r, ia);
1543 }
1544
1545 spin_lock_irqsave(&buf->rb_lock, flags);
1546 list_splice_tail(stale, &buf->rb_mws);
1547 spin_unlock_irqrestore(&buf->rb_lock, flags);
1548}
1549
Chuck Leverc2922c02014-07-29 17:24:36 -04001550static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001551rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1552 struct list_head *stale)
1553{
1554 struct rpcrdma_mw *r;
1555 int i;
1556
1557 i = RPCRDMA_MAX_SEGS - 1;
1558 while (!list_empty(&buf->rb_mws)) {
1559 r = list_entry(buf->rb_mws.next,
1560 struct rpcrdma_mw, mw_list);
1561 list_del(&r->mw_list);
1562 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1563 list_add(&r->mw_list, stale);
1564 continue;
1565 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001566 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001567 if (unlikely(i-- == 0))
1568 return req; /* Success */
1569 }
1570
1571 /* Not enough entries on rb_mws for this req */
1572 rpcrdma_buffer_put_sendbuf(req, buf);
1573 rpcrdma_buffer_put_mrs(req, buf);
1574 return NULL;
1575}
1576
1577static struct rpcrdma_req *
1578rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001579{
1580 struct rpcrdma_mw *r;
1581 int i;
1582
1583 i = RPCRDMA_MAX_SEGS - 1;
1584 while (!list_empty(&buf->rb_mws)) {
1585 r = list_entry(buf->rb_mws.next,
1586 struct rpcrdma_mw, mw_list);
1587 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001588 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001589 if (unlikely(i-- == 0))
1590 return req; /* Success */
1591 }
1592
1593 /* Not enough entries on rb_mws for this req */
1594 rpcrdma_buffer_put_sendbuf(req, buf);
1595 rpcrdma_buffer_put_mrs(req, buf);
1596 return NULL;
1597}
1598
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001599/*
1600 * Get a set of request/reply buffers.
1601 *
1602 * Reply buffer (if needed) is attached to send buffer upon return.
1603 * Rule:
1604 * rb_send_index and rb_recv_index MUST always be pointing to the
1605 * *next* available buffer (non-NULL). They are incremented after
1606 * removing buffers, and decremented *before* returning them.
1607 */
1608struct rpcrdma_req *
1609rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1610{
Chuck Leverc2922c02014-07-29 17:24:36 -04001611 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001612 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001613 struct rpcrdma_req *req;
1614 unsigned long flags;
1615
1616 spin_lock_irqsave(&buffers->rb_lock, flags);
1617 if (buffers->rb_send_index == buffers->rb_max_requests) {
1618 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1619 dprintk("RPC: %s: out of request buffers\n", __func__);
1620 return ((struct rpcrdma_req *)NULL);
1621 }
1622
1623 req = buffers->rb_send_bufs[buffers->rb_send_index];
1624 if (buffers->rb_send_index < buffers->rb_recv_index) {
1625 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1626 __func__,
1627 buffers->rb_recv_index - buffers->rb_send_index);
1628 req->rl_reply = NULL;
1629 } else {
1630 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1631 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1632 }
1633 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001634
1635 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001636 switch (ia->ri_memreg_strategy) {
1637 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001638 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1639 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001640 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001641 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001642 break;
1643 default:
1644 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001645 }
1646 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001647 if (!list_empty(&stale))
1648 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001649 return req;
1650}
1651
1652/*
1653 * Put request/reply buffers back into pool.
1654 * Pre-decrement counter/array index.
1655 */
1656void
1657rpcrdma_buffer_put(struct rpcrdma_req *req)
1658{
1659 struct rpcrdma_buffer *buffers = req->rl_buffer;
1660 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001661 unsigned long flags;
1662
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001663 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001664 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001665 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001666 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001667 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001668 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001669 break;
1670 default:
1671 break;
1672 }
1673 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1674}
1675
1676/*
1677 * Recover reply buffers from pool.
1678 * This happens when recovering from error conditions.
1679 * Post-increment counter/array index.
1680 */
1681void
1682rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1683{
1684 struct rpcrdma_buffer *buffers = req->rl_buffer;
1685 unsigned long flags;
1686
1687 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1688 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1689 spin_lock_irqsave(&buffers->rb_lock, flags);
1690 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1691 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1692 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1693 }
1694 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1695}
1696
1697/*
1698 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001699 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001700 */
1701void
1702rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1703{
1704 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1705 unsigned long flags;
1706
1707 rep->rr_func = NULL;
1708 spin_lock_irqsave(&buffers->rb_lock, flags);
1709 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1710 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1711}
1712
1713/*
1714 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1715 */
1716
1717int
1718rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1719 struct ib_mr **mrp, struct ib_sge *iov)
1720{
1721 struct ib_phys_buf ipb;
1722 struct ib_mr *mr;
1723 int rc;
1724
1725 /*
1726 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1727 */
1728 iov->addr = ib_dma_map_single(ia->ri_id->device,
1729 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001730 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1731 return -ENOMEM;
1732
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001733 iov->length = len;
1734
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001735 if (ia->ri_have_dma_lkey) {
1736 *mrp = NULL;
1737 iov->lkey = ia->ri_dma_lkey;
1738 return 0;
1739 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001740 *mrp = NULL;
1741 iov->lkey = ia->ri_bind_mem->lkey;
1742 return 0;
1743 }
1744
1745 ipb.addr = iov->addr;
1746 ipb.size = iov->length;
1747 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1748 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1749
1750 dprintk("RPC: %s: phys convert: 0x%llx "
1751 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001752 __func__, (unsigned long long)ipb.addr,
1753 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001754
1755 if (IS_ERR(mr)) {
1756 *mrp = NULL;
1757 rc = PTR_ERR(mr);
1758 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1759 } else {
1760 *mrp = mr;
1761 iov->lkey = mr->lkey;
1762 rc = 0;
1763 }
1764
1765 return rc;
1766}
1767
1768int
1769rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1770 struct ib_mr *mr, struct ib_sge *iov)
1771{
1772 int rc;
1773
1774 ib_dma_unmap_single(ia->ri_id->device,
1775 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1776
1777 if (NULL == mr)
1778 return 0;
1779
1780 rc = ib_dereg_mr(mr);
1781 if (rc)
1782 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1783 return rc;
1784}
1785
1786/*
1787 * Wrappers for chunk registration, shared by read/write chunk code.
1788 */
1789
1790static void
1791rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1792{
1793 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1794 seg->mr_dmalen = seg->mr_len;
1795 if (seg->mr_page)
1796 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1797 seg->mr_page, offset_in_page(seg->mr_offset),
1798 seg->mr_dmalen, seg->mr_dir);
1799 else
1800 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1801 seg->mr_offset,
1802 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001803 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1804 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1805 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001806 (unsigned long long)seg->mr_dma,
1807 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001808 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001809}
1810
1811static void
1812rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1813{
1814 if (seg->mr_page)
1815 ib_dma_unmap_page(ia->ri_id->device,
1816 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1817 else
1818 ib_dma_unmap_single(ia->ri_id->device,
1819 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1820}
1821
Tom Talpey8d4ba032008-10-09 14:59:49 -04001822static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001823rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1824 int *nsegs, int writing, struct rpcrdma_ia *ia,
1825 struct rpcrdma_xprt *r_xprt)
1826{
1827 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever3eb35812015-01-21 11:02:54 -05001828 struct rpcrdma_mw *mw = seg1->rl_mw;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001829 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1830 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001831 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001832 u8 key;
1833 int len, pageoff;
1834 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001835 int seg_len;
1836 u64 pa;
1837 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001838
1839 pageoff = offset_in_page(seg1->mr_offset);
1840 seg1->mr_offset -= pageoff; /* start of page */
1841 seg1->mr_len += pageoff;
1842 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001843 if (*nsegs > ia->ri_max_frmr_depth)
1844 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001845 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001846 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001847 pa = seg->mr_dma;
1848 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001849 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001850 pa += PAGE_SIZE;
1851 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001852 len += seg->mr_len;
1853 ++seg;
1854 ++i;
1855 /* Check for holes */
1856 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1857 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1858 break;
1859 }
1860 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001861 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001862
Chuck Lever05055722014-07-29 17:25:12 -04001863 frmr->fr_state = FRMR_IS_VALID;
1864
Chuck Leverf590e872014-07-29 17:25:29 -04001865 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1866 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1867 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1868 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1869 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1870 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1871 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1872 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1873 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001874 rc = -EIO;
1875 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001876 }
1877
1878 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001879 key = (u8)(mr->rkey & 0x000000FF);
1880 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001881
Chuck Leverf590e872014-07-29 17:25:29 -04001882 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001883 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1884 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001885 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001886 DECR_CQCOUNT(&r_xprt->rx_ep);
1887
Chuck Leverf590e872014-07-29 17:25:29 -04001888 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001889 if (rc) {
1890 dprintk("RPC: %s: failed ib_post_send for register,"
1891 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001892 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001893 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001894 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001895 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001896 seg1->mr_base = seg1->mr_dma + pageoff;
1897 seg1->mr_nsegs = i;
1898 seg1->mr_len = len;
1899 }
1900 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001901 return 0;
1902out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001903 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001904 while (i--)
1905 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001906 return rc;
1907}
1908
1909static int
1910rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1911 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1912{
1913 struct rpcrdma_mr_seg *seg1 = seg;
1914 struct ib_send_wr invalidate_wr, *bad_wr;
1915 int rc;
1916
Chuck Lever3eb35812015-01-21 11:02:54 -05001917 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001918
Tom Talpey3197d3092008-10-09 15:00:20 -04001919 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Chuck Lever3eb35812015-01-21 11:02:54 -05001920 invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001921 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Lever3eb35812015-01-21 11:02:54 -05001922 invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001923 DECR_CQCOUNT(&r_xprt->rx_ep);
1924
Chuck Lever73806c82014-07-29 17:23:25 -04001925 read_lock(&ia->ri_qplock);
1926 while (seg1->mr_nsegs--)
1927 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001928 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001929 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001930 if (rc) {
1931 /* Force rpcrdma_buffer_get() to retry */
Chuck Lever3eb35812015-01-21 11:02:54 -05001932 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001933 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1934 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001935 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001936 return rc;
1937}
1938
1939static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001940rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1941 int *nsegs, int writing, struct rpcrdma_ia *ia)
1942{
1943 struct rpcrdma_mr_seg *seg1 = seg;
1944 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1945 int len, pageoff, i, rc;
1946
1947 pageoff = offset_in_page(seg1->mr_offset);
1948 seg1->mr_offset -= pageoff; /* start of page */
1949 seg1->mr_len += pageoff;
1950 len = -pageoff;
1951 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1952 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1953 for (i = 0; i < *nsegs;) {
1954 rpcrdma_map_one(ia, seg, writing);
1955 physaddrs[i] = seg->mr_dma;
1956 len += seg->mr_len;
1957 ++seg;
1958 ++i;
1959 /* Check for holes */
1960 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1961 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1962 break;
1963 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001964 rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001965 if (rc) {
1966 dprintk("RPC: %s: failed ib_map_phys_fmr "
1967 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1968 len, (unsigned long long)seg1->mr_dma,
1969 pageoff, i, rc);
1970 while (i--)
1971 rpcrdma_unmap_one(ia, --seg);
1972 } else {
Chuck Lever3eb35812015-01-21 11:02:54 -05001973 seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001974 seg1->mr_base = seg1->mr_dma + pageoff;
1975 seg1->mr_nsegs = i;
1976 seg1->mr_len = len;
1977 }
1978 *nsegs = i;
1979 return rc;
1980}
1981
1982static int
1983rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1984 struct rpcrdma_ia *ia)
1985{
1986 struct rpcrdma_mr_seg *seg1 = seg;
1987 LIST_HEAD(l);
1988 int rc;
1989
Chuck Lever3eb35812015-01-21 11:02:54 -05001990 list_add(&seg1->rl_mw->r.fmr->list, &l);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001991 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001992 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001993 while (seg1->mr_nsegs--)
1994 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04001995 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001996 if (rc)
1997 dprintk("RPC: %s: failed ib_unmap_fmr,"
1998 " status %i\n", __func__, rc);
1999 return rc;
2000}
2001
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002002int
2003rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
2004 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
2005{
2006 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002007 int rc = 0;
2008
2009 switch (ia->ri_memreg_strategy) {
2010
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002011 case RPCRDMA_ALLPHYSICAL:
2012 rpcrdma_map_one(ia, seg, writing);
2013 seg->mr_rkey = ia->ri_bind_mem->rkey;
2014 seg->mr_base = seg->mr_dma;
2015 seg->mr_nsegs = 1;
2016 nsegs = 1;
2017 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002018
Tom Talpey3197d3092008-10-09 15:00:20 -04002019 /* Registration using frmr registration */
2020 case RPCRDMA_FRMR:
2021 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
2022 break;
2023
Tom Talpey8d4ba032008-10-09 14:59:49 -04002024 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002025 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002026 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002027 break;
2028
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002029 default:
Chuck Lever92b98362014-11-08 20:14:12 -05002030 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002031 }
2032 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05002033 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002034
2035 return nsegs;
2036}
2037
2038int
2039rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04002040 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002041{
2042 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002043 int nsegs = seg->mr_nsegs, rc;
2044
2045 switch (ia->ri_memreg_strategy) {
2046
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002047 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04002048 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002049 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04002050 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002051 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002052
Tom Talpey3197d3092008-10-09 15:00:20 -04002053 case RPCRDMA_FRMR:
2054 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2055 break;
2056
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002057 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002058 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002059 break;
2060
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002061 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002062 break;
2063 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002064 return nsegs;
2065}
2066
2067/*
2068 * Prepost any receive buffer, then post send.
2069 *
2070 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2071 */
2072int
2073rpcrdma_ep_post(struct rpcrdma_ia *ia,
2074 struct rpcrdma_ep *ep,
2075 struct rpcrdma_req *req)
2076{
2077 struct ib_send_wr send_wr, *send_wr_fail;
2078 struct rpcrdma_rep *rep = req->rl_reply;
2079 int rc;
2080
2081 if (rep) {
2082 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2083 if (rc)
2084 goto out;
2085 req->rl_reply = NULL;
2086 }
2087
2088 send_wr.next = NULL;
2089 send_wr.wr_id = 0ULL; /* no send cookie */
2090 send_wr.sg_list = req->rl_send_iov;
2091 send_wr.num_sge = req->rl_niovs;
2092 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002093 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2094 ib_dma_sync_single_for_device(ia->ri_id->device,
2095 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2096 DMA_TO_DEVICE);
2097 ib_dma_sync_single_for_device(ia->ri_id->device,
2098 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2099 DMA_TO_DEVICE);
2100 ib_dma_sync_single_for_device(ia->ri_id->device,
2101 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2102 DMA_TO_DEVICE);
2103
2104 if (DECR_CQCOUNT(ep) > 0)
2105 send_wr.send_flags = 0;
2106 else { /* Provider must take a send completion every now and then */
2107 INIT_CQCOUNT(ep);
2108 send_wr.send_flags = IB_SEND_SIGNALED;
2109 }
2110
2111 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2112 if (rc)
2113 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2114 rc);
2115out:
2116 return rc;
2117}
2118
2119/*
2120 * (Re)post a receive buffer.
2121 */
2122int
2123rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2124 struct rpcrdma_ep *ep,
2125 struct rpcrdma_rep *rep)
2126{
2127 struct ib_recv_wr recv_wr, *recv_wr_fail;
2128 int rc;
2129
2130 recv_wr.next = NULL;
2131 recv_wr.wr_id = (u64) (unsigned long) rep;
2132 recv_wr.sg_list = &rep->rr_iov;
2133 recv_wr.num_sge = 1;
2134
2135 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2136 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2137
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002138 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2139
2140 if (rc)
2141 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2142 rc);
2143 return rc;
2144}
Chuck Lever43e95982014-07-29 17:23:34 -04002145
2146/* Physical mapping means one Read/Write list entry per-page.
2147 * All list entries must fit within an inline buffer
2148 *
2149 * NB: The server must return a Write list for NFS READ,
2150 * which has the same constraint. Factor in the inline
2151 * rsize as well.
2152 */
2153static size_t
2154rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2155{
2156 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2157 unsigned int inline_size, pages;
2158
2159 inline_size = min_t(unsigned int,
2160 cdata->inline_wsize, cdata->inline_rsize);
2161 inline_size -= RPCRDMA_HDRLEN_MIN;
2162 pages = inline_size / sizeof(struct rpcrdma_segment);
2163 return pages << PAGE_SHIFT;
2164}
2165
2166static size_t
2167rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2168{
2169 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2170}
2171
2172size_t
2173rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2174{
2175 size_t result;
2176
2177 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2178 case RPCRDMA_ALLPHYSICAL:
2179 result = rpcrdma_physical_max_payload(r_xprt);
2180 break;
2181 default:
2182 result = rpcrdma_mr_max_payload(r_xprt);
2183 }
2184 return result;
2185}