blob: 123bb04dd823d9851292d259f022a8ac5ce8a534 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever65866f82014-05-28 10:33:59 -040053#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040054
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040055#include "xprt_rdma.h"
56
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040057/*
58 * Globals/Macros
59 */
60
Jeff Laytonf895b252014-11-17 16:58:04 -050061#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040062# define RPCDBG_FACILITY RPCDBG_TRANS
63#endif
64
Chuck Lever9f9d8022014-07-29 17:24:45 -040065static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050066static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040067
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040068/*
69 * internal functions
70 */
71
72/*
73 * handle replies in tasklet context, using a single, global list
74 * rdma tasklet function -- just turn around and call the func
75 * for all replies on the list
76 */
77
78static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
79static LIST_HEAD(rpcrdma_tasklets_g);
80
81static void
82rpcrdma_run_tasklet(unsigned long data)
83{
84 struct rpcrdma_rep *rep;
85 void (*func)(struct rpcrdma_rep *);
86 unsigned long flags;
87
88 data = data;
89 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
90 while (!list_empty(&rpcrdma_tasklets_g)) {
91 rep = list_entry(rpcrdma_tasklets_g.next,
92 struct rpcrdma_rep, rr_list);
93 list_del(&rep->rr_list);
94 func = rep->rr_func;
95 rep->rr_func = NULL;
96 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
97
98 if (func)
99 func(rep);
100 else
101 rpcrdma_recv_buffer_put(rep);
102
103 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
104 }
105 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
106}
107
108static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
109
Chuck Lever7ff11de2014-11-08 20:15:01 -0500110static const char * const async_event[] = {
111 "CQ error",
112 "QP fatal error",
113 "QP request error",
114 "QP access error",
115 "communication established",
116 "send queue drained",
117 "path migration successful",
118 "path mig error",
119 "device fatal error",
120 "port active",
121 "port error",
122 "LID change",
123 "P_key change",
124 "SM change",
125 "SRQ error",
126 "SRQ limit reached",
127 "last WQE reached",
128 "client reregister",
129 "GID change",
130};
131
132#define ASYNC_MSG(status) \
133 ((status) < ARRAY_SIZE(async_event) ? \
134 async_event[(status)] : "unknown async error")
135
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400136static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500137rpcrdma_schedule_tasklet(struct list_head *sched_list)
138{
139 unsigned long flags;
140
141 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
142 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
143 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
144 tasklet_schedule(&rpcrdma_tasklet_g);
145}
146
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147static void
148rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
149{
150 struct rpcrdma_ep *ep = context;
151
Chuck Lever7ff11de2014-11-08 20:15:01 -0500152 pr_err("RPC: %s: %s on device %s ep %p\n",
153 __func__, ASYNC_MSG(event->event),
154 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400155 if (ep->rep_connected == 1) {
156 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500157 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400158 wake_up_all(&ep->rep_connect_wait);
159 }
160}
161
162static void
163rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
164{
165 struct rpcrdma_ep *ep = context;
166
Chuck Lever7ff11de2014-11-08 20:15:01 -0500167 pr_err("RPC: %s: %s on device %s ep %p\n",
168 __func__, ASYNC_MSG(event->event),
169 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400170 if (ep->rep_connected == 1) {
171 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500172 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400173 wake_up_all(&ep->rep_connect_wait);
174 }
175}
176
Chuck Lever85024272015-01-21 11:02:04 -0500177static const char * const wc_status[] = {
178 "success",
179 "local length error",
180 "local QP operation error",
181 "local EE context operation error",
182 "local protection error",
183 "WR flushed",
184 "memory management operation error",
185 "bad response error",
186 "local access error",
187 "remote invalid request error",
188 "remote access error",
189 "remote operation error",
190 "transport retry counter exceeded",
191 "RNR retrycounter exceeded",
192 "local RDD violation error",
193 "remove invalid RD request",
194 "operation aborted",
195 "invalid EE context number",
196 "invalid EE context state",
197 "fatal error",
198 "response timeout error",
199 "general error",
200};
201
202#define COMPLETION_MSG(status) \
203 ((status) < ARRAY_SIZE(wc_status) ? \
204 wc_status[(status)] : "unexpected completion error")
205
Chuck Leverfc664482014-05-28 10:33:25 -0400206static void
207rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400208{
Chuck Lever85024272015-01-21 11:02:04 -0500209 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400210 return;
Chuck Lever85024272015-01-21 11:02:04 -0500211
212 /* WARNING: Only wr_id and status are reliable at this point */
213 if (wc->wr_id == 0ULL) {
214 if (wc->status != IB_WC_WR_FLUSH_ERR)
215 pr_err("RPC: %s: SEND: %s\n",
216 __func__, COMPLETION_MSG(wc->status));
217 } else {
218 struct rpcrdma_mw *r;
219
220 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
221 r->r.frmr.fr_state = FRMR_IS_STALE;
222 pr_err("RPC: %s: frmr %p (stale): %s\n",
223 __func__, r, COMPLETION_MSG(wc->status));
224 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400225}
226
Chuck Leverfc664482014-05-28 10:33:25 -0400227static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400228rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400229{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400230 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400231 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400232
Chuck Lever8301a2c2014-05-28 10:33:51 -0400233 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400234 do {
235 wcs = ep->rep_send_wcs;
236
237 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
238 if (rc <= 0)
239 return rc;
240
241 count = rc;
242 while (count-- > 0)
243 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400244 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400245 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400246}
247
248/*
Chuck Leverfc664482014-05-28 10:33:25 -0400249 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400250 *
Chuck Leverfc664482014-05-28 10:33:25 -0400251 * Send events are typically suppressed and thus do not result
252 * in an upcall. Occasionally one is signaled, however. This
253 * prevents the provider's completion queue from wrapping and
254 * losing a completion.
255 */
256static void
257rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
258{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400259 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400260 int rc;
261
Chuck Lever1c00dd02014-05-28 10:33:42 -0400262 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400263 if (rc) {
264 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
265 __func__, rc);
266 return;
267 }
268
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400269 rc = ib_req_notify_cq(cq,
270 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
271 if (rc == 0)
272 return;
273 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400274 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
275 __func__, rc);
276 return;
277 }
278
Chuck Lever1c00dd02014-05-28 10:33:42 -0400279 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400280}
281
282static void
Chuck Leverbb961932014-07-29 17:25:46 -0400283rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400284{
285 struct rpcrdma_rep *rep =
286 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
287
Chuck Lever85024272015-01-21 11:02:04 -0500288 /* WARNING: Only wr_id and status are reliable at this point */
289 if (wc->status != IB_WC_SUCCESS)
290 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400291
Chuck Lever85024272015-01-21 11:02:04 -0500292 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400293 if (wc->opcode != IB_WC_RECV)
294 return;
295
Chuck Lever85024272015-01-21 11:02:04 -0500296 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
297 __func__, rep, wc->byte_len);
298
Chuck Leverfc664482014-05-28 10:33:25 -0400299 rep->rr_len = wc->byte_len;
300 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
301 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
Chuck Levereba8ff62015-01-21 11:03:02 -0500302 prefetch(rep->rr_base);
Chuck Leverfc664482014-05-28 10:33:25 -0400303
304out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400305 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500306 return;
307out_fail:
308 if (wc->status != IB_WC_WR_FLUSH_ERR)
309 pr_err("RPC: %s: rep %p: %s\n",
310 __func__, rep, COMPLETION_MSG(wc->status));
311 rep->rr_len = ~0U;
312 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400313}
314
315static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400316rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400317{
Chuck Leverbb961932014-07-29 17:25:46 -0400318 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400319 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400320 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400321
Chuck Leverbb961932014-07-29 17:25:46 -0400322 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400323 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400324 do {
325 wcs = ep->rep_recv_wcs;
326
327 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
328 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400329 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400330
331 count = rc;
332 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400333 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400334 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400335 rc = 0;
336
337out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500338 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400339 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400340}
341
342/*
343 * Handle receive completions.
344 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400345 * It is reentrant but processes single events in order to maintain
346 * ordering of receives to keep server credits.
347 *
348 * It is the responsibility of the scheduled tasklet to return
349 * recv buffers to the pool. NOTE: this affects synchronization of
350 * connection shutdown. That is, the structures required for
351 * the completion of the reply handler must remain intact until
352 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 */
354static void
Chuck Leverfc664482014-05-28 10:33:25 -0400355rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400356{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400357 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400358 int rc;
359
Chuck Lever1c00dd02014-05-28 10:33:42 -0400360 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400361 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400362 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400363 __func__, rc);
364 return;
365 }
366
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400367 rc = ib_req_notify_cq(cq,
368 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
369 if (rc == 0)
370 return;
371 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400372 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
373 __func__, rc);
374 return;
375 }
376
Chuck Lever1c00dd02014-05-28 10:33:42 -0400377 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400378}
379
Chuck Levera7bc2112014-07-29 17:23:52 -0400380static void
381rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
382{
Chuck Lever5c166be2014-11-08 20:14:45 -0500383 struct ib_wc wc;
384 LIST_HEAD(sched_list);
385
386 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
387 rpcrdma_recvcq_process_wc(&wc, &sched_list);
388 if (!list_empty(&sched_list))
389 rpcrdma_schedule_tasklet(&sched_list);
390 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
391 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400392}
393
Jeff Laytonf895b252014-11-17 16:58:04 -0500394#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400395static const char * const conn[] = {
396 "address resolved",
397 "address error",
398 "route resolved",
399 "route error",
400 "connect request",
401 "connect response",
402 "connect error",
403 "unreachable",
404 "rejected",
405 "established",
406 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400407 "device removal",
408 "multicast join",
409 "multicast error",
410 "address change",
411 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400412};
Chuck Lever8079fb72014-07-29 17:26:12 -0400413
414#define CONNECTION_MSG(status) \
415 ((status) < ARRAY_SIZE(conn) ? \
416 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400417#endif
418
419static int
420rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
421{
422 struct rpcrdma_xprt *xprt = id->context;
423 struct rpcrdma_ia *ia = &xprt->rx_ia;
424 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500425#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400426 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800427#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400428 struct ib_qp_attr attr;
429 struct ib_qp_init_attr iattr;
430 int connstate = 0;
431
432 switch (event->event) {
433 case RDMA_CM_EVENT_ADDR_RESOLVED:
434 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400435 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400436 complete(&ia->ri_done);
437 break;
438 case RDMA_CM_EVENT_ADDR_ERROR:
439 ia->ri_async_rc = -EHOSTUNREACH;
440 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
441 __func__, ep);
442 complete(&ia->ri_done);
443 break;
444 case RDMA_CM_EVENT_ROUTE_ERROR:
445 ia->ri_async_rc = -ENETUNREACH;
446 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
447 __func__, ep);
448 complete(&ia->ri_done);
449 break;
450 case RDMA_CM_EVENT_ESTABLISHED:
451 connstate = 1;
452 ib_query_qp(ia->ri_id->qp, &attr,
453 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
454 &iattr);
455 dprintk("RPC: %s: %d responder resources"
456 " (%d initiator)\n",
457 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
458 goto connected;
459 case RDMA_CM_EVENT_CONNECT_ERROR:
460 connstate = -ENOTCONN;
461 goto connected;
462 case RDMA_CM_EVENT_UNREACHABLE:
463 connstate = -ENETDOWN;
464 goto connected;
465 case RDMA_CM_EVENT_REJECTED:
466 connstate = -ECONNREFUSED;
467 goto connected;
468 case RDMA_CM_EVENT_DISCONNECTED:
469 connstate = -ECONNABORTED;
470 goto connected;
471 case RDMA_CM_EVENT_DEVICE_REMOVAL:
472 connstate = -ENODEV;
473connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400474 dprintk("RPC: %s: %sconnected\n",
475 __func__, connstate > 0 ? "" : "dis");
476 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500477 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400478 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400479 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400480 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400481 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
482 __func__, &addr->sin_addr.s_addr,
483 ntohs(addr->sin_port), ep,
484 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400485 break;
486 }
487
Jeff Laytonf895b252014-11-17 16:58:04 -0500488#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400489 if (connstate == 1) {
490 int ird = attr.max_dest_rd_atomic;
491 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700492 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400493 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700494 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400495 ntohs(addr->sin_port),
496 ia->ri_id->device->name,
497 ia->ri_memreg_strategy,
498 xprt->rx_buf.rb_max_requests,
499 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
500 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700501 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
502 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400503 ntohs(addr->sin_port),
504 connstate);
505 }
506#endif
507
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400508 return 0;
509}
510
511static struct rdma_cm_id *
512rpcrdma_create_id(struct rpcrdma_xprt *xprt,
513 struct rpcrdma_ia *ia, struct sockaddr *addr)
514{
515 struct rdma_cm_id *id;
516 int rc;
517
Tom Talpey1a954052008-10-09 15:01:31 -0400518 init_completion(&ia->ri_done);
519
Sean Heftyb26f9b92010-04-01 17:08:41 +0000520 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400521 if (IS_ERR(id)) {
522 rc = PTR_ERR(id);
523 dprintk("RPC: %s: rdma_create_id() failed %i\n",
524 __func__, rc);
525 return id;
526 }
527
Tom Talpey5675add2008-10-09 15:01:41 -0400528 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400529 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
530 if (rc) {
531 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
532 __func__, rc);
533 goto out;
534 }
Tom Talpey5675add2008-10-09 15:01:41 -0400535 wait_for_completion_interruptible_timeout(&ia->ri_done,
536 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400537 rc = ia->ri_async_rc;
538 if (rc)
539 goto out;
540
Tom Talpey5675add2008-10-09 15:01:41 -0400541 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400542 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
543 if (rc) {
544 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
545 __func__, rc);
546 goto out;
547 }
Tom Talpey5675add2008-10-09 15:01:41 -0400548 wait_for_completion_interruptible_timeout(&ia->ri_done,
549 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400550 rc = ia->ri_async_rc;
551 if (rc)
552 goto out;
553
554 return id;
555
556out:
557 rdma_destroy_id(id);
558 return ERR_PTR(rc);
559}
560
561/*
562 * Drain any cq, prior to teardown.
563 */
564static void
565rpcrdma_clean_cq(struct ib_cq *cq)
566{
567 struct ib_wc wc;
568 int count = 0;
569
570 while (1 == ib_poll_cq(cq, 1, &wc))
571 ++count;
572
573 if (count)
574 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
575 __func__, count, wc.opcode);
576}
577
578/*
579 * Exported functions.
580 */
581
582/*
583 * Open and initialize an Interface Adapter.
584 * o initializes fields of struct rpcrdma_ia, including
585 * interface and provider attributes and protection zone.
586 */
587int
588rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
589{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400590 int rc, mem_priv;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400591 struct rpcrdma_ia *ia = &xprt->rx_ia;
Chuck Lever7bc79722015-01-21 11:03:27 -0500592 struct ib_device_attr *devattr = &ia->ri_devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400593
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400594 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
595 if (IS_ERR(ia->ri_id)) {
596 rc = PTR_ERR(ia->ri_id);
597 goto out1;
598 }
599
600 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
601 if (IS_ERR(ia->ri_pd)) {
602 rc = PTR_ERR(ia->ri_pd);
603 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
604 __func__, rc);
605 goto out2;
606 }
607
Chuck Lever7bc79722015-01-21 11:03:27 -0500608 rc = ib_query_device(ia->ri_id->device, devattr);
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400609 if (rc) {
610 dprintk("RPC: %s: ib_query_device failed %d\n",
611 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500612 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400613 }
614
Chuck Lever7bc79722015-01-21 11:03:27 -0500615 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400616 ia->ri_have_dma_lkey = 1;
617 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
618 }
619
Chuck Leverf10eafd2014-05-28 10:32:51 -0400620 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400621 /* Requires both frmr reg and local dma lkey */
Chuck Lever7bc79722015-01-21 11:03:27 -0500622 if ((devattr->device_cap_flags &
Tom Talpey3197d3092008-10-09 15:00:20 -0400623 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
624 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400625 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400626 "not supported by HCA\n", __func__);
627 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400628 } else {
629 /* Mind the ia limit on FRMR page list depth */
630 ia->ri_max_frmr_depth = min_t(unsigned int,
631 RPCRDMA_MAX_DATA_SEGS,
Chuck Lever7bc79722015-01-21 11:03:27 -0500632 devattr->max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400633 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400634 }
635 if (memreg == RPCRDMA_MTHCAFMR) {
636 if (!ia->ri_id->device->alloc_fmr) {
637 dprintk("RPC: %s: MTHCAFMR registration "
638 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400639 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400640 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400641 }
642
643 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400644 * Optionally obtain an underlying physical identity mapping in
645 * order to do a memory window-based bind. This base registration
646 * is protected from remote access - that is enabled only by binding
647 * for the specific bytes targeted during each RPC operation, and
648 * revoked after the corresponding completion similar to a storage
649 * adapter.
650 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400651 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400652 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400653 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400654 case RPCRDMA_ALLPHYSICAL:
655 mem_priv = IB_ACCESS_LOCAL_WRITE |
656 IB_ACCESS_REMOTE_WRITE |
657 IB_ACCESS_REMOTE_READ;
658 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400659 case RPCRDMA_MTHCAFMR:
660 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400661 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400662 mem_priv = IB_ACCESS_LOCAL_WRITE;
663 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400664 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
665 if (IS_ERR(ia->ri_bind_mem)) {
666 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400667 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400668 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400669 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500670 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400671 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400672 break;
673 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400674 printk(KERN_ERR "RPC: Unsupported memory "
675 "registration mode: %d\n", memreg);
676 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500677 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400678 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400679 dprintk("RPC: %s: memory registration strategy is %d\n",
680 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400681
682 /* Else will do memory reg/dereg for each chunk */
683 ia->ri_memreg_strategy = memreg;
684
Chuck Lever73806c82014-07-29 17:23:25 -0400685 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400686 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500687
688out3:
689 ib_dealloc_pd(ia->ri_pd);
690 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400691out2:
692 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400693 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400694out1:
695 return rc;
696}
697
698/*
699 * Clean up/close an IA.
700 * o if event handles and PD have been initialized, free them.
701 * o close the IA
702 */
703void
704rpcrdma_ia_close(struct rpcrdma_ia *ia)
705{
706 int rc;
707
708 dprintk("RPC: %s: entering\n", __func__);
709 if (ia->ri_bind_mem != NULL) {
710 rc = ib_dereg_mr(ia->ri_bind_mem);
711 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
712 __func__, rc);
713 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400714 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
715 if (ia->ri_id->qp)
716 rdma_destroy_qp(ia->ri_id);
717 rdma_destroy_id(ia->ri_id);
718 ia->ri_id = NULL;
719 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400720 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
721 rc = ib_dealloc_pd(ia->ri_pd);
722 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
723 __func__, rc);
724 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400725}
726
727/*
728 * Create unconnected endpoint.
729 */
730int
731rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
732 struct rpcrdma_create_data_internal *cdata)
733{
Chuck Lever7bc79722015-01-21 11:03:27 -0500734 struct ib_device_attr *devattr = &ia->ri_devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400735 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400736 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400737
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400738 /* check provider's send/recv wr limits */
Chuck Lever7bc79722015-01-21 11:03:27 -0500739 if (cdata->max_requests > devattr->max_qp_wr)
740 cdata->max_requests = devattr->max_qp_wr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400741
742 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
743 ep->rep_attr.qp_context = ep;
744 /* send_cq and recv_cq initialized below */
745 ep->rep_attr.srq = NULL;
746 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
747 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400748 case RPCRDMA_FRMR: {
749 int depth = 7;
750
Tom Tucker15cdc6442010-08-11 12:47:24 -0400751 /* Add room for frmr register and invalidate WRs.
752 * 1. FRMR reg WR for head
753 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400754 * 3. N FRMR reg WRs for pagelist
755 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400756 * 5. FRMR reg WR for tail
757 * 6. FRMR invalidate WR for tail
758 * 7. The RDMA_SEND WR
759 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400760
761 /* Calculate N if the device max FRMR depth is smaller than
762 * RPCRDMA_MAX_DATA_SEGS.
763 */
764 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
765 int delta = RPCRDMA_MAX_DATA_SEGS -
766 ia->ri_max_frmr_depth;
767
768 do {
769 depth += 2; /* FRMR reg + invalidate */
770 delta -= ia->ri_max_frmr_depth;
771 } while (delta > 0);
772
773 }
774 ep->rep_attr.cap.max_send_wr *= depth;
Chuck Lever7bc79722015-01-21 11:03:27 -0500775 if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
776 cdata->max_requests = devattr->max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400777 if (!cdata->max_requests)
778 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400779 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
780 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400781 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400782 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400783 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400784 default:
785 break;
786 }
787 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
788 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
789 ep->rep_attr.cap.max_recv_sge = 1;
790 ep->rep_attr.cap.max_inline_data = 0;
791 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
792 ep->rep_attr.qp_type = IB_QPT_RC;
793 ep->rep_attr.port_num = ~0;
794
795 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
796 "iovs: send %d recv %d\n",
797 __func__,
798 ep->rep_attr.cap.max_send_wr,
799 ep->rep_attr.cap.max_recv_wr,
800 ep->rep_attr.cap.max_send_sge,
801 ep->rep_attr.cap.max_recv_sge);
802
803 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400804 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500805 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
806 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
807 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400808 ep->rep_cqinit = 0;
809 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400810 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400811 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400812
Chuck Leverfc664482014-05-28 10:33:25 -0400813 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400814 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400815 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400816 if (IS_ERR(sendcq)) {
817 rc = PTR_ERR(sendcq);
818 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400819 __func__, rc);
820 goto out1;
821 }
822
Chuck Leverfc664482014-05-28 10:33:25 -0400823 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400824 if (rc) {
825 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
826 __func__, rc);
827 goto out2;
828 }
829
Chuck Leverfc664482014-05-28 10:33:25 -0400830 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400831 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400832 ep->rep_attr.cap.max_recv_wr + 1, 0);
833 if (IS_ERR(recvcq)) {
834 rc = PTR_ERR(recvcq);
835 dprintk("RPC: %s: failed to create recv CQ: %i\n",
836 __func__, rc);
837 goto out2;
838 }
839
840 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
841 if (rc) {
842 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
843 __func__, rc);
844 ib_destroy_cq(recvcq);
845 goto out2;
846 }
847
848 ep->rep_attr.send_cq = sendcq;
849 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400850
851 /* Initialize cma parameters */
852
853 /* RPC/RDMA does not use private data */
854 ep->rep_remote_cma.private_data = NULL;
855 ep->rep_remote_cma.private_data_len = 0;
856
857 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400858 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever7bc79722015-01-21 11:03:27 -0500859 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400860 ep->rep_remote_cma.responder_resources = 32;
861 else
Chuck Lever7bc79722015-01-21 11:03:27 -0500862 ep->rep_remote_cma.responder_resources =
863 devattr->max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400864
865 ep->rep_remote_cma.retry_count = 7;
866 ep->rep_remote_cma.flow_control = 0;
867 ep->rep_remote_cma.rnr_retry_count = 0;
868
869 return 0;
870
871out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400872 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400873 if (err)
874 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
875 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400876out1:
877 return rc;
878}
879
880/*
881 * rpcrdma_ep_destroy
882 *
883 * Disconnect and destroy endpoint. After this, the only
884 * valid operations on the ep are to free it (if dynamically
885 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400886 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400887void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400888rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
889{
890 int rc;
891
892 dprintk("RPC: %s: entering, connected is %d\n",
893 __func__, ep->rep_connected);
894
Chuck Lever254f91e2014-05-28 10:32:17 -0400895 cancel_delayed_work_sync(&ep->rep_connect_worker);
896
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400897 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400898 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400899 rdma_destroy_qp(ia->ri_id);
900 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400901 }
902
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400903 /* padding - could be done in rpcrdma_buffer_destroy... */
904 if (ep->rep_pad_mr) {
905 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
906 ep->rep_pad_mr = NULL;
907 }
908
Chuck Leverfc664482014-05-28 10:33:25 -0400909 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
910 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
911 if (rc)
912 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
913 __func__, rc);
914
915 rpcrdma_clean_cq(ep->rep_attr.send_cq);
916 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400917 if (rc)
918 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
919 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400920}
921
922/*
923 * Connect unconnected endpoint.
924 */
925int
926rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
927{
Chuck Lever73806c82014-07-29 17:23:25 -0400928 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400929 int rc = 0;
930 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400931
Tom Talpeyc0555512008-10-10 11:32:45 -0400932 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400933 struct rpcrdma_xprt *xprt;
934retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400935 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400936
937 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400938 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400939
Chuck Lever467c9672014-11-08 20:14:29 -0500940 switch (ia->ri_memreg_strategy) {
941 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400942 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500943 break;
944 case RPCRDMA_MTHCAFMR:
945 rpcrdma_reset_fmrs(ia);
946 break;
947 case RPCRDMA_ALLPHYSICAL:
948 break;
949 default:
950 rc = -EIO;
951 goto out;
952 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400953
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400954 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
955 id = rpcrdma_create_id(xprt, ia,
956 (struct sockaddr *)&xprt->rx_data.addr);
957 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400958 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400959 goto out;
960 }
961 /* TEMP TEMP TEMP - fail if new device:
962 * Deregister/remarshal *all* requests!
963 * Close and recreate adapter, pd, etc!
964 * Re-determine all attributes still sane!
965 * More stuff I haven't thought of!
966 * Rrrgh!
967 */
968 if (ia->ri_id->device != id->device) {
969 printk("RPC: %s: can't reconnect on "
970 "different device!\n", __func__);
971 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400972 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400973 goto out;
974 }
975 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400976 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
977 if (rc) {
978 dprintk("RPC: %s: rdma_create_qp failed %i\n",
979 __func__, rc);
980 rdma_destroy_id(id);
981 rc = -ENETUNREACH;
982 goto out;
983 }
Chuck Lever73806c82014-07-29 17:23:25 -0400984
985 write_lock(&ia->ri_qplock);
986 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400987 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400988 write_unlock(&ia->ri_qplock);
989
990 rdma_destroy_qp(old);
991 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400992 } else {
993 dprintk("RPC: %s: connecting...\n", __func__);
994 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
995 if (rc) {
996 dprintk("RPC: %s: rdma_create_qp failed %i\n",
997 __func__, rc);
998 /* do not update ep->rep_connected */
999 return -ENETUNREACH;
1000 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001001 }
1002
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001003 ep->rep_connected = 0;
1004
1005 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
1006 if (rc) {
1007 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1008 __func__, rc);
1009 goto out;
1010 }
1011
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001012 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
1013
1014 /*
1015 * Check state. A non-peer reject indicates no listener
1016 * (ECONNREFUSED), which may be a transient state. All
1017 * others indicate a transport condition which has already
1018 * undergone a best-effort.
1019 */
Joe Perchesf64f9e72009-11-29 16:55:45 -08001020 if (ep->rep_connected == -ECONNREFUSED &&
1021 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001022 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
1023 goto retry;
1024 }
1025 if (ep->rep_connected <= 0) {
1026 /* Sometimes, the only way to reliably connect to remote
1027 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001028 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
1029 (ep->rep_remote_cma.responder_resources == 0 ||
1030 ep->rep_remote_cma.initiator_depth !=
1031 ep->rep_remote_cma.responder_resources)) {
1032 if (ep->rep_remote_cma.responder_resources == 0)
1033 ep->rep_remote_cma.responder_resources = 1;
1034 ep->rep_remote_cma.initiator_depth =
1035 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001036 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -04001037 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001038 rc = ep->rep_connected;
1039 } else {
1040 dprintk("RPC: %s: connected\n", __func__);
1041 }
1042
1043out:
1044 if (rc)
1045 ep->rep_connected = rc;
1046 return rc;
1047}
1048
1049/*
1050 * rpcrdma_ep_disconnect
1051 *
1052 * This is separate from destroy to facilitate the ability
1053 * to reconnect without recreating the endpoint.
1054 *
1055 * This call is not reentrant, and must not be made in parallel
1056 * on the same endpoint.
1057 */
Chuck Lever282191c2014-07-29 17:25:55 -04001058void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001059rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1060{
1061 int rc;
1062
Chuck Levera7bc2112014-07-29 17:23:52 -04001063 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001064 rc = rdma_disconnect(ia->ri_id);
1065 if (!rc) {
1066 /* returns without wait if not connected */
1067 wait_event_interruptible(ep->rep_connect_wait,
1068 ep->rep_connected != 1);
1069 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1070 (ep->rep_connected == 1) ? "still " : "dis");
1071 } else {
1072 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1073 ep->rep_connected = rc;
1074 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001075}
1076
Chuck Lever2e845222014-07-29 17:25:38 -04001077static int
1078rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1079{
1080 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1081 struct ib_fmr_attr fmr_attr = {
1082 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1083 .max_maps = 1,
1084 .page_shift = PAGE_SHIFT
1085 };
1086 struct rpcrdma_mw *r;
1087 int i, rc;
1088
1089 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1090 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1091
1092 while (i--) {
1093 r = kzalloc(sizeof(*r), GFP_KERNEL);
1094 if (r == NULL)
1095 return -ENOMEM;
1096
1097 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1098 if (IS_ERR(r->r.fmr)) {
1099 rc = PTR_ERR(r->r.fmr);
1100 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1101 __func__, rc);
1102 goto out_free;
1103 }
1104
1105 list_add(&r->mw_list, &buf->rb_mws);
1106 list_add(&r->mw_all, &buf->rb_all);
1107 }
1108 return 0;
1109
1110out_free:
1111 kfree(r);
1112 return rc;
1113}
1114
1115static int
1116rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1117{
1118 struct rpcrdma_frmr *f;
1119 struct rpcrdma_mw *r;
1120 int i, rc;
1121
1122 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1123 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1124
1125 while (i--) {
1126 r = kzalloc(sizeof(*r), GFP_KERNEL);
1127 if (r == NULL)
1128 return -ENOMEM;
1129 f = &r->r.frmr;
1130
1131 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1132 ia->ri_max_frmr_depth);
1133 if (IS_ERR(f->fr_mr)) {
1134 rc = PTR_ERR(f->fr_mr);
1135 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1136 "failed %i\n", __func__, rc);
1137 goto out_free;
1138 }
1139
1140 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1141 ia->ri_max_frmr_depth);
1142 if (IS_ERR(f->fr_pgl)) {
1143 rc = PTR_ERR(f->fr_pgl);
1144 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1145 "failed %i\n", __func__, rc);
1146
1147 ib_dereg_mr(f->fr_mr);
1148 goto out_free;
1149 }
1150
1151 list_add(&r->mw_list, &buf->rb_mws);
1152 list_add(&r->mw_all, &buf->rb_all);
1153 }
1154
1155 return 0;
1156
1157out_free:
1158 kfree(r);
1159 return rc;
1160}
1161
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001162int
1163rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1164 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1165{
1166 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001167 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001168 int i, rc;
1169
1170 buf->rb_max_requests = cdata->max_requests;
1171 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001172
1173 /* Need to allocate:
1174 * 1. arrays for send and recv pointers
1175 * 2. arrays of struct rpcrdma_req to fill in pointers
1176 * 3. array of struct rpcrdma_rep for replies
1177 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001178 * Send/recv buffers in req/rep need to be registered
1179 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001180 len = buf->rb_max_requests *
1181 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1182 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001183
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001184 p = kzalloc(len, GFP_KERNEL);
1185 if (p == NULL) {
1186 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1187 __func__, len);
1188 rc = -ENOMEM;
1189 goto out;
1190 }
1191 buf->rb_pool = p; /* for freeing it later */
1192
1193 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1194 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1195 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1196 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1197
1198 /*
1199 * Register the zeroed pad buffer, if any.
1200 */
1201 if (cdata->padding) {
1202 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1203 &ep->rep_pad_mr, &ep->rep_pad);
1204 if (rc)
1205 goto out;
1206 }
1207 p += cdata->padding;
1208
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001209 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001210 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001211 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001212 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001213 rc = rpcrdma_init_frmrs(ia, buf);
1214 if (rc)
1215 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001216 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001217 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001218 rc = rpcrdma_init_fmrs(ia, buf);
1219 if (rc)
1220 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001221 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001222 default:
1223 break;
1224 }
1225
1226 /*
1227 * Allocate/init the request/reply buffers. Doing this
1228 * using kmalloc for now -- one for each buf.
1229 */
Chuck Lever65866f82014-05-28 10:33:59 -04001230 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1231 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1232 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1233 __func__, wlen, rlen);
1234
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001235 for (i = 0; i < buf->rb_max_requests; i++) {
1236 struct rpcrdma_req *req;
1237 struct rpcrdma_rep *rep;
1238
Chuck Lever65866f82014-05-28 10:33:59 -04001239 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001240 if (req == NULL) {
1241 dprintk("RPC: %s: request buffer %d alloc"
1242 " failed\n", __func__, i);
1243 rc = -ENOMEM;
1244 goto out;
1245 }
1246 memset(req, 0, sizeof(struct rpcrdma_req));
1247 buf->rb_send_bufs[i] = req;
1248 buf->rb_send_bufs[i]->rl_buffer = buf;
1249
1250 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001251 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001252 &buf->rb_send_bufs[i]->rl_handle,
1253 &buf->rb_send_bufs[i]->rl_iov);
1254 if (rc)
1255 goto out;
1256
Chuck Lever65866f82014-05-28 10:33:59 -04001257 buf->rb_send_bufs[i]->rl_size = wlen -
1258 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001259
Chuck Lever65866f82014-05-28 10:33:59 -04001260 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001261 if (rep == NULL) {
1262 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1263 __func__, i);
1264 rc = -ENOMEM;
1265 goto out;
1266 }
1267 memset(rep, 0, sizeof(struct rpcrdma_rep));
1268 buf->rb_recv_bufs[i] = rep;
1269 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001270
1271 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001272 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001273 &buf->rb_recv_bufs[i]->rr_handle,
1274 &buf->rb_recv_bufs[i]->rr_iov);
1275 if (rc)
1276 goto out;
1277
1278 }
1279 dprintk("RPC: %s: max_requests %d\n",
1280 __func__, buf->rb_max_requests);
1281 /* done */
1282 return 0;
1283out:
1284 rpcrdma_buffer_destroy(buf);
1285 return rc;
1286}
1287
Chuck Lever2e845222014-07-29 17:25:38 -04001288static void
1289rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1290{
1291 struct rpcrdma_mw *r;
1292 int rc;
1293
1294 while (!list_empty(&buf->rb_all)) {
1295 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1296 list_del(&r->mw_all);
1297 list_del(&r->mw_list);
1298
1299 rc = ib_dealloc_fmr(r->r.fmr);
1300 if (rc)
1301 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1302 __func__, rc);
1303
1304 kfree(r);
1305 }
1306}
1307
1308static void
1309rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1310{
1311 struct rpcrdma_mw *r;
1312 int rc;
1313
1314 while (!list_empty(&buf->rb_all)) {
1315 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1316 list_del(&r->mw_all);
1317 list_del(&r->mw_list);
1318
1319 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1320 if (rc)
1321 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1322 __func__, rc);
1323 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1324
1325 kfree(r);
1326 }
1327}
1328
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001329void
1330rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1331{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001332 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001333 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001334
1335 /* clean up in reverse order from create
1336 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001337 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001338 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001339 */
1340 dprintk("RPC: %s: entering\n", __func__);
1341
1342 for (i = 0; i < buf->rb_max_requests; i++) {
1343 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1344 rpcrdma_deregister_internal(ia,
1345 buf->rb_recv_bufs[i]->rr_handle,
1346 &buf->rb_recv_bufs[i]->rr_iov);
1347 kfree(buf->rb_recv_bufs[i]);
1348 }
1349 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001350 rpcrdma_deregister_internal(ia,
1351 buf->rb_send_bufs[i]->rl_handle,
1352 &buf->rb_send_bufs[i]->rl_iov);
1353 kfree(buf->rb_send_bufs[i]);
1354 }
1355 }
1356
Chuck Lever2e845222014-07-29 17:25:38 -04001357 switch (ia->ri_memreg_strategy) {
1358 case RPCRDMA_FRMR:
1359 rpcrdma_destroy_frmrs(buf);
1360 break;
1361 case RPCRDMA_MTHCAFMR:
1362 rpcrdma_destroy_fmrs(buf);
1363 break;
1364 default:
1365 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001366 }
1367
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001368 kfree(buf->rb_pool);
1369}
1370
Chuck Lever467c9672014-11-08 20:14:29 -05001371/* After a disconnect, unmap all FMRs.
1372 *
1373 * This is invoked only in the transport connect worker in order
1374 * to serialize with rpcrdma_register_fmr_external().
1375 */
1376static void
1377rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1378{
1379 struct rpcrdma_xprt *r_xprt =
1380 container_of(ia, struct rpcrdma_xprt, rx_ia);
1381 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1382 struct list_head *pos;
1383 struct rpcrdma_mw *r;
1384 LIST_HEAD(l);
1385 int rc;
1386
1387 list_for_each(pos, &buf->rb_all) {
1388 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1389
1390 INIT_LIST_HEAD(&l);
1391 list_add(&r->r.fmr->list, &l);
1392 rc = ib_unmap_fmr(&l);
1393 if (rc)
1394 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1395 __func__, rc);
1396 }
1397}
1398
Chuck Lever9f9d8022014-07-29 17:24:45 -04001399/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1400 * an unusable state. Find FRMRs in this state and dereg / reg
1401 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1402 * also torn down.
1403 *
1404 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1405 *
1406 * This is invoked only in the transport connect worker in order
1407 * to serialize with rpcrdma_register_frmr_external().
1408 */
1409static void
1410rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1411{
1412 struct rpcrdma_xprt *r_xprt =
1413 container_of(ia, struct rpcrdma_xprt, rx_ia);
1414 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1415 struct list_head *pos;
1416 struct rpcrdma_mw *r;
1417 int rc;
1418
1419 list_for_each(pos, &buf->rb_all) {
1420 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1421
1422 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1423 continue;
1424
1425 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1426 if (rc)
1427 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1428 __func__, rc);
1429 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1430
1431 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1432 ia->ri_max_frmr_depth);
1433 if (IS_ERR(r->r.frmr.fr_mr)) {
1434 rc = PTR_ERR(r->r.frmr.fr_mr);
1435 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1436 " failed %i\n", __func__, rc);
1437 continue;
1438 }
1439 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1440 ia->ri_id->device,
1441 ia->ri_max_frmr_depth);
1442 if (IS_ERR(r->r.frmr.fr_pgl)) {
1443 rc = PTR_ERR(r->r.frmr.fr_pgl);
1444 dprintk("RPC: %s: "
1445 "ib_alloc_fast_reg_page_list "
1446 "failed %i\n", __func__, rc);
1447
1448 ib_dereg_mr(r->r.frmr.fr_mr);
1449 continue;
1450 }
1451 r->r.frmr.fr_state = FRMR_IS_INVALID;
1452 }
1453}
1454
Chuck Leverc2922c02014-07-29 17:24:36 -04001455/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1456 * some req segments uninitialized.
1457 */
1458static void
1459rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1460{
1461 if (*mw) {
1462 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1463 *mw = NULL;
1464 }
1465}
1466
1467/* Cycle mw's back in reverse order, and "spin" them.
1468 * This delays and scrambles reuse as much as possible.
1469 */
1470static void
1471rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1472{
1473 struct rpcrdma_mr_seg *seg = req->rl_segments;
1474 struct rpcrdma_mr_seg *seg1 = seg;
1475 int i;
1476
1477 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001478 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1479 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001480}
1481
1482static void
1483rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1484{
1485 buf->rb_send_bufs[--buf->rb_send_index] = req;
1486 req->rl_niovs = 0;
1487 if (req->rl_reply) {
1488 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1489 req->rl_reply->rr_func = NULL;
1490 req->rl_reply = NULL;
1491 }
1492}
1493
Chuck Leverddb6beb2014-07-29 17:24:54 -04001494/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1495 * Redo only the ib_post_send().
1496 */
1497static void
1498rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1499{
1500 struct rpcrdma_xprt *r_xprt =
1501 container_of(ia, struct rpcrdma_xprt, rx_ia);
1502 struct ib_send_wr invalidate_wr, *bad_wr;
1503 int rc;
1504
1505 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1506
1507 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001508 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001509
1510 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1511 invalidate_wr.wr_id = (unsigned long)(void *)r;
1512 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001513 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1514 DECR_CQCOUNT(&r_xprt->rx_ep);
1515
1516 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1517 __func__, r, r->r.frmr.fr_mr->rkey);
1518
1519 read_lock(&ia->ri_qplock);
1520 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1521 read_unlock(&ia->ri_qplock);
1522 if (rc) {
1523 /* Force rpcrdma_buffer_get() to retry */
1524 r->r.frmr.fr_state = FRMR_IS_STALE;
1525 dprintk("RPC: %s: ib_post_send failed, %i\n",
1526 __func__, rc);
1527 }
1528}
1529
1530static void
1531rpcrdma_retry_flushed_linv(struct list_head *stale,
1532 struct rpcrdma_buffer *buf)
1533{
1534 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1535 struct list_head *pos;
1536 struct rpcrdma_mw *r;
1537 unsigned long flags;
1538
1539 list_for_each(pos, stale) {
1540 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1541 rpcrdma_retry_local_inv(r, ia);
1542 }
1543
1544 spin_lock_irqsave(&buf->rb_lock, flags);
1545 list_splice_tail(stale, &buf->rb_mws);
1546 spin_unlock_irqrestore(&buf->rb_lock, flags);
1547}
1548
Chuck Leverc2922c02014-07-29 17:24:36 -04001549static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001550rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1551 struct list_head *stale)
1552{
1553 struct rpcrdma_mw *r;
1554 int i;
1555
1556 i = RPCRDMA_MAX_SEGS - 1;
1557 while (!list_empty(&buf->rb_mws)) {
1558 r = list_entry(buf->rb_mws.next,
1559 struct rpcrdma_mw, mw_list);
1560 list_del(&r->mw_list);
1561 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1562 list_add(&r->mw_list, stale);
1563 continue;
1564 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001565 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001566 if (unlikely(i-- == 0))
1567 return req; /* Success */
1568 }
1569
1570 /* Not enough entries on rb_mws for this req */
1571 rpcrdma_buffer_put_sendbuf(req, buf);
1572 rpcrdma_buffer_put_mrs(req, buf);
1573 return NULL;
1574}
1575
1576static struct rpcrdma_req *
1577rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001578{
1579 struct rpcrdma_mw *r;
1580 int i;
1581
1582 i = RPCRDMA_MAX_SEGS - 1;
1583 while (!list_empty(&buf->rb_mws)) {
1584 r = list_entry(buf->rb_mws.next,
1585 struct rpcrdma_mw, mw_list);
1586 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001587 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001588 if (unlikely(i-- == 0))
1589 return req; /* Success */
1590 }
1591
1592 /* Not enough entries on rb_mws for this req */
1593 rpcrdma_buffer_put_sendbuf(req, buf);
1594 rpcrdma_buffer_put_mrs(req, buf);
1595 return NULL;
1596}
1597
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001598/*
1599 * Get a set of request/reply buffers.
1600 *
1601 * Reply buffer (if needed) is attached to send buffer upon return.
1602 * Rule:
1603 * rb_send_index and rb_recv_index MUST always be pointing to the
1604 * *next* available buffer (non-NULL). They are incremented after
1605 * removing buffers, and decremented *before* returning them.
1606 */
1607struct rpcrdma_req *
1608rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1609{
Chuck Leverc2922c02014-07-29 17:24:36 -04001610 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001611 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001612 struct rpcrdma_req *req;
1613 unsigned long flags;
1614
1615 spin_lock_irqsave(&buffers->rb_lock, flags);
1616 if (buffers->rb_send_index == buffers->rb_max_requests) {
1617 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1618 dprintk("RPC: %s: out of request buffers\n", __func__);
1619 return ((struct rpcrdma_req *)NULL);
1620 }
1621
1622 req = buffers->rb_send_bufs[buffers->rb_send_index];
1623 if (buffers->rb_send_index < buffers->rb_recv_index) {
1624 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1625 __func__,
1626 buffers->rb_recv_index - buffers->rb_send_index);
1627 req->rl_reply = NULL;
1628 } else {
1629 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1630 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1631 }
1632 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001633
1634 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001635 switch (ia->ri_memreg_strategy) {
1636 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001637 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1638 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001639 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001640 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001641 break;
1642 default:
1643 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001644 }
1645 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001646 if (!list_empty(&stale))
1647 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001648 return req;
1649}
1650
1651/*
1652 * Put request/reply buffers back into pool.
1653 * Pre-decrement counter/array index.
1654 */
1655void
1656rpcrdma_buffer_put(struct rpcrdma_req *req)
1657{
1658 struct rpcrdma_buffer *buffers = req->rl_buffer;
1659 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001660 unsigned long flags;
1661
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001662 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001663 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001664 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001665 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001666 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001667 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001668 break;
1669 default:
1670 break;
1671 }
1672 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1673}
1674
1675/*
1676 * Recover reply buffers from pool.
1677 * This happens when recovering from error conditions.
1678 * Post-increment counter/array index.
1679 */
1680void
1681rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1682{
1683 struct rpcrdma_buffer *buffers = req->rl_buffer;
1684 unsigned long flags;
1685
1686 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1687 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1688 spin_lock_irqsave(&buffers->rb_lock, flags);
1689 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1690 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1691 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1692 }
1693 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1694}
1695
1696/*
1697 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001698 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001699 */
1700void
1701rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1702{
1703 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1704 unsigned long flags;
1705
1706 rep->rr_func = NULL;
1707 spin_lock_irqsave(&buffers->rb_lock, flags);
1708 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1709 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1710}
1711
1712/*
1713 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1714 */
1715
1716int
1717rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1718 struct ib_mr **mrp, struct ib_sge *iov)
1719{
1720 struct ib_phys_buf ipb;
1721 struct ib_mr *mr;
1722 int rc;
1723
1724 /*
1725 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1726 */
1727 iov->addr = ib_dma_map_single(ia->ri_id->device,
1728 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001729 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1730 return -ENOMEM;
1731
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001732 iov->length = len;
1733
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001734 if (ia->ri_have_dma_lkey) {
1735 *mrp = NULL;
1736 iov->lkey = ia->ri_dma_lkey;
1737 return 0;
1738 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001739 *mrp = NULL;
1740 iov->lkey = ia->ri_bind_mem->lkey;
1741 return 0;
1742 }
1743
1744 ipb.addr = iov->addr;
1745 ipb.size = iov->length;
1746 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1747 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1748
1749 dprintk("RPC: %s: phys convert: 0x%llx "
1750 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001751 __func__, (unsigned long long)ipb.addr,
1752 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001753
1754 if (IS_ERR(mr)) {
1755 *mrp = NULL;
1756 rc = PTR_ERR(mr);
1757 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1758 } else {
1759 *mrp = mr;
1760 iov->lkey = mr->lkey;
1761 rc = 0;
1762 }
1763
1764 return rc;
1765}
1766
1767int
1768rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1769 struct ib_mr *mr, struct ib_sge *iov)
1770{
1771 int rc;
1772
1773 ib_dma_unmap_single(ia->ri_id->device,
1774 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1775
1776 if (NULL == mr)
1777 return 0;
1778
1779 rc = ib_dereg_mr(mr);
1780 if (rc)
1781 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1782 return rc;
1783}
1784
1785/*
1786 * Wrappers for chunk registration, shared by read/write chunk code.
1787 */
1788
1789static void
1790rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1791{
1792 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1793 seg->mr_dmalen = seg->mr_len;
1794 if (seg->mr_page)
1795 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1796 seg->mr_page, offset_in_page(seg->mr_offset),
1797 seg->mr_dmalen, seg->mr_dir);
1798 else
1799 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1800 seg->mr_offset,
1801 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001802 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1803 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1804 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001805 (unsigned long long)seg->mr_dma,
1806 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001807 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001808}
1809
1810static void
1811rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1812{
1813 if (seg->mr_page)
1814 ib_dma_unmap_page(ia->ri_id->device,
1815 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1816 else
1817 ib_dma_unmap_single(ia->ri_id->device,
1818 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1819}
1820
Tom Talpey8d4ba032008-10-09 14:59:49 -04001821static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001822rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1823 int *nsegs, int writing, struct rpcrdma_ia *ia,
1824 struct rpcrdma_xprt *r_xprt)
1825{
1826 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever3eb35812015-01-21 11:02:54 -05001827 struct rpcrdma_mw *mw = seg1->rl_mw;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001828 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1829 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001830 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001831 u8 key;
1832 int len, pageoff;
1833 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001834 int seg_len;
1835 u64 pa;
1836 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001837
1838 pageoff = offset_in_page(seg1->mr_offset);
1839 seg1->mr_offset -= pageoff; /* start of page */
1840 seg1->mr_len += pageoff;
1841 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001842 if (*nsegs > ia->ri_max_frmr_depth)
1843 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001844 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001845 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001846 pa = seg->mr_dma;
1847 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001848 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001849 pa += PAGE_SIZE;
1850 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001851 len += seg->mr_len;
1852 ++seg;
1853 ++i;
1854 /* Check for holes */
1855 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1856 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1857 break;
1858 }
1859 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001860 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001861
Chuck Lever05055722014-07-29 17:25:12 -04001862 frmr->fr_state = FRMR_IS_VALID;
1863
Chuck Leverf590e872014-07-29 17:25:29 -04001864 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1865 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1866 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1867 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1868 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1869 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1870 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1871 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1872 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001873 rc = -EIO;
1874 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001875 }
1876
1877 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001878 key = (u8)(mr->rkey & 0x000000FF);
1879 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001880
Chuck Leverf590e872014-07-29 17:25:29 -04001881 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001882 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1883 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001884 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001885 DECR_CQCOUNT(&r_xprt->rx_ep);
1886
Chuck Leverf590e872014-07-29 17:25:29 -04001887 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001888 if (rc) {
1889 dprintk("RPC: %s: failed ib_post_send for register,"
1890 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001891 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001892 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001893 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001894 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001895 seg1->mr_base = seg1->mr_dma + pageoff;
1896 seg1->mr_nsegs = i;
1897 seg1->mr_len = len;
1898 }
1899 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001900 return 0;
1901out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001902 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001903 while (i--)
1904 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001905 return rc;
1906}
1907
1908static int
1909rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1910 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1911{
1912 struct rpcrdma_mr_seg *seg1 = seg;
1913 struct ib_send_wr invalidate_wr, *bad_wr;
1914 int rc;
1915
Chuck Lever3eb35812015-01-21 11:02:54 -05001916 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001917
Tom Talpey3197d3092008-10-09 15:00:20 -04001918 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Chuck Lever3eb35812015-01-21 11:02:54 -05001919 invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001920 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Lever3eb35812015-01-21 11:02:54 -05001921 invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001922 DECR_CQCOUNT(&r_xprt->rx_ep);
1923
Chuck Lever73806c82014-07-29 17:23:25 -04001924 read_lock(&ia->ri_qplock);
1925 while (seg1->mr_nsegs--)
1926 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001927 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001928 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001929 if (rc) {
1930 /* Force rpcrdma_buffer_get() to retry */
Chuck Lever3eb35812015-01-21 11:02:54 -05001931 seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001932 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1933 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001934 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001935 return rc;
1936}
1937
1938static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001939rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1940 int *nsegs, int writing, struct rpcrdma_ia *ia)
1941{
1942 struct rpcrdma_mr_seg *seg1 = seg;
1943 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1944 int len, pageoff, i, rc;
1945
1946 pageoff = offset_in_page(seg1->mr_offset);
1947 seg1->mr_offset -= pageoff; /* start of page */
1948 seg1->mr_len += pageoff;
1949 len = -pageoff;
1950 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1951 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1952 for (i = 0; i < *nsegs;) {
1953 rpcrdma_map_one(ia, seg, writing);
1954 physaddrs[i] = seg->mr_dma;
1955 len += seg->mr_len;
1956 ++seg;
1957 ++i;
1958 /* Check for holes */
1959 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1960 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1961 break;
1962 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001963 rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001964 if (rc) {
1965 dprintk("RPC: %s: failed ib_map_phys_fmr "
1966 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1967 len, (unsigned long long)seg1->mr_dma,
1968 pageoff, i, rc);
1969 while (i--)
1970 rpcrdma_unmap_one(ia, --seg);
1971 } else {
Chuck Lever3eb35812015-01-21 11:02:54 -05001972 seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001973 seg1->mr_base = seg1->mr_dma + pageoff;
1974 seg1->mr_nsegs = i;
1975 seg1->mr_len = len;
1976 }
1977 *nsegs = i;
1978 return rc;
1979}
1980
1981static int
1982rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1983 struct rpcrdma_ia *ia)
1984{
1985 struct rpcrdma_mr_seg *seg1 = seg;
1986 LIST_HEAD(l);
1987 int rc;
1988
Chuck Lever3eb35812015-01-21 11:02:54 -05001989 list_add(&seg1->rl_mw->r.fmr->list, &l);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001990 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001991 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001992 while (seg1->mr_nsegs--)
1993 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04001994 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001995 if (rc)
1996 dprintk("RPC: %s: failed ib_unmap_fmr,"
1997 " status %i\n", __func__, rc);
1998 return rc;
1999}
2000
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002001int
2002rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
2003 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
2004{
2005 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002006 int rc = 0;
2007
2008 switch (ia->ri_memreg_strategy) {
2009
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002010 case RPCRDMA_ALLPHYSICAL:
2011 rpcrdma_map_one(ia, seg, writing);
2012 seg->mr_rkey = ia->ri_bind_mem->rkey;
2013 seg->mr_base = seg->mr_dma;
2014 seg->mr_nsegs = 1;
2015 nsegs = 1;
2016 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002017
Tom Talpey3197d3092008-10-09 15:00:20 -04002018 /* Registration using frmr registration */
2019 case RPCRDMA_FRMR:
2020 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
2021 break;
2022
Tom Talpey8d4ba032008-10-09 14:59:49 -04002023 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002024 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002025 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002026 break;
2027
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002028 default:
Chuck Lever92b98362014-11-08 20:14:12 -05002029 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002030 }
2031 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05002032 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002033
2034 return nsegs;
2035}
2036
2037int
2038rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04002039 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002040{
2041 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002042 int nsegs = seg->mr_nsegs, rc;
2043
2044 switch (ia->ri_memreg_strategy) {
2045
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002046 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04002047 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002048 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04002049 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002050 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002051
Tom Talpey3197d3092008-10-09 15:00:20 -04002052 case RPCRDMA_FRMR:
2053 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2054 break;
2055
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002056 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002057 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002058 break;
2059
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002060 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002061 break;
2062 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002063 return nsegs;
2064}
2065
2066/*
2067 * Prepost any receive buffer, then post send.
2068 *
2069 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2070 */
2071int
2072rpcrdma_ep_post(struct rpcrdma_ia *ia,
2073 struct rpcrdma_ep *ep,
2074 struct rpcrdma_req *req)
2075{
2076 struct ib_send_wr send_wr, *send_wr_fail;
2077 struct rpcrdma_rep *rep = req->rl_reply;
2078 int rc;
2079
2080 if (rep) {
2081 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2082 if (rc)
2083 goto out;
2084 req->rl_reply = NULL;
2085 }
2086
2087 send_wr.next = NULL;
2088 send_wr.wr_id = 0ULL; /* no send cookie */
2089 send_wr.sg_list = req->rl_send_iov;
2090 send_wr.num_sge = req->rl_niovs;
2091 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002092 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2093 ib_dma_sync_single_for_device(ia->ri_id->device,
2094 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2095 DMA_TO_DEVICE);
2096 ib_dma_sync_single_for_device(ia->ri_id->device,
2097 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2098 DMA_TO_DEVICE);
2099 ib_dma_sync_single_for_device(ia->ri_id->device,
2100 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2101 DMA_TO_DEVICE);
2102
2103 if (DECR_CQCOUNT(ep) > 0)
2104 send_wr.send_flags = 0;
2105 else { /* Provider must take a send completion every now and then */
2106 INIT_CQCOUNT(ep);
2107 send_wr.send_flags = IB_SEND_SIGNALED;
2108 }
2109
2110 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2111 if (rc)
2112 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2113 rc);
2114out:
2115 return rc;
2116}
2117
2118/*
2119 * (Re)post a receive buffer.
2120 */
2121int
2122rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2123 struct rpcrdma_ep *ep,
2124 struct rpcrdma_rep *rep)
2125{
2126 struct ib_recv_wr recv_wr, *recv_wr_fail;
2127 int rc;
2128
2129 recv_wr.next = NULL;
2130 recv_wr.wr_id = (u64) (unsigned long) rep;
2131 recv_wr.sg_list = &rep->rr_iov;
2132 recv_wr.num_sge = 1;
2133
2134 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2135 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2136
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002137 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2138
2139 if (rc)
2140 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2141 rc);
2142 return rc;
2143}
Chuck Lever43e95982014-07-29 17:23:34 -04002144
2145/* Physical mapping means one Read/Write list entry per-page.
2146 * All list entries must fit within an inline buffer
2147 *
2148 * NB: The server must return a Write list for NFS READ,
2149 * which has the same constraint. Factor in the inline
2150 * rsize as well.
2151 */
2152static size_t
2153rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2154{
2155 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2156 unsigned int inline_size, pages;
2157
2158 inline_size = min_t(unsigned int,
2159 cdata->inline_wsize, cdata->inline_rsize);
2160 inline_size -= RPCRDMA_HDRLEN_MIN;
2161 pages = inline_size / sizeof(struct rpcrdma_segment);
2162 return pages << PAGE_SHIFT;
2163}
2164
2165static size_t
2166rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2167{
2168 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2169}
2170
2171size_t
2172rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2173{
2174 size_t result;
2175
2176 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2177 case RPCRDMA_ALLPHYSICAL:
2178 result = rpcrdma_physical_max_payload(r_xprt);
2179 break;
2180 default:
2181 result = rpcrdma_mr_max_payload(r_xprt);
2182 }
2183 return result;
2184}