blob: 8e0bd84c8df873169871272c646fdc0241fb3540 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever0dd39ca2015-03-30 14:33:43 -040053#include <linux/sunrpc/addr.h>
Chuck Lever65866f82014-05-28 10:33:59 -040054#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040055
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040056#include "xprt_rdma.h"
57
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040058/*
59 * Globals/Macros
60 */
61
Jeff Laytonf895b252014-11-17 16:58:04 -050062#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040063# define RPCDBG_FACILITY RPCDBG_TRANS
64#endif
65
66/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040083 unsigned long flags;
84
85 data = data;
86 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
87 while (!list_empty(&rpcrdma_tasklets_g)) {
88 rep = list_entry(rpcrdma_tasklets_g.next,
89 struct rpcrdma_rep, rr_list);
90 list_del(&rep->rr_list);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040091 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
92
Chuck Lever494ae302015-05-26 11:51:46 -040093 rpcrdma_reply_handler(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040094
95 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
96 }
97 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
98}
99
100static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
101
Chuck Lever7ff11de2014-11-08 20:15:01 -0500102static const char * const async_event[] = {
103 "CQ error",
104 "QP fatal error",
105 "QP request error",
106 "QP access error",
107 "communication established",
108 "send queue drained",
109 "path migration successful",
110 "path mig error",
111 "device fatal error",
112 "port active",
113 "port error",
114 "LID change",
115 "P_key change",
116 "SM change",
117 "SRQ error",
118 "SRQ limit reached",
119 "last WQE reached",
120 "client reregister",
121 "GID change",
122};
123
124#define ASYNC_MSG(status) \
125 ((status) < ARRAY_SIZE(async_event) ? \
126 async_event[(status)] : "unknown async error")
127
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400128static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500129rpcrdma_schedule_tasklet(struct list_head *sched_list)
130{
131 unsigned long flags;
132
133 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
134 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
135 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
136 tasklet_schedule(&rpcrdma_tasklet_g);
137}
138
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400139static void
140rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
141{
142 struct rpcrdma_ep *ep = context;
143
Chuck Lever7ff11de2014-11-08 20:15:01 -0500144 pr_err("RPC: %s: %s on device %s ep %p\n",
145 __func__, ASYNC_MSG(event->event),
146 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147 if (ep->rep_connected == 1) {
148 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500149 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400150 wake_up_all(&ep->rep_connect_wait);
151 }
152}
153
154static void
155rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
156{
157 struct rpcrdma_ep *ep = context;
158
Chuck Lever7ff11de2014-11-08 20:15:01 -0500159 pr_err("RPC: %s: %s on device %s ep %p\n",
160 __func__, ASYNC_MSG(event->event),
161 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400162 if (ep->rep_connected == 1) {
163 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500164 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400165 wake_up_all(&ep->rep_connect_wait);
166 }
167}
168
Chuck Lever85024272015-01-21 11:02:04 -0500169static const char * const wc_status[] = {
170 "success",
171 "local length error",
172 "local QP operation error",
173 "local EE context operation error",
174 "local protection error",
175 "WR flushed",
176 "memory management operation error",
177 "bad response error",
178 "local access error",
179 "remote invalid request error",
180 "remote access error",
181 "remote operation error",
182 "transport retry counter exceeded",
Chuck Levere46ac342015-03-30 14:35:35 -0400183 "RNR retry counter exceeded",
Chuck Lever85024272015-01-21 11:02:04 -0500184 "local RDD violation error",
185 "remove invalid RD request",
186 "operation aborted",
187 "invalid EE context number",
188 "invalid EE context state",
189 "fatal error",
190 "response timeout error",
191 "general error",
192};
193
194#define COMPLETION_MSG(status) \
195 ((status) < ARRAY_SIZE(wc_status) ? \
196 wc_status[(status)] : "unexpected completion error")
197
Chuck Leverfc664482014-05-28 10:33:25 -0400198static void
199rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400200{
Chuck Lever85024272015-01-21 11:02:04 -0500201 /* WARNING: Only wr_id and status are reliable at this point */
Chuck Levere46ac342015-03-30 14:35:35 -0400202 if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) {
203 if (wc->status != IB_WC_SUCCESS &&
204 wc->status != IB_WC_WR_FLUSH_ERR)
Chuck Lever85024272015-01-21 11:02:04 -0500205 pr_err("RPC: %s: SEND: %s\n",
206 __func__, COMPLETION_MSG(wc->status));
207 } else {
208 struct rpcrdma_mw *r;
209
210 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
Chuck Levere46ac342015-03-30 14:35:35 -0400211 r->mw_sendcompletion(wc);
Chuck Lever85024272015-01-21 11:02:04 -0500212 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400213}
214
Chuck Leverfc664482014-05-28 10:33:25 -0400215static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400216rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400217{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400218 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400219 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400220
Chuck Lever8301a2c2014-05-28 10:33:51 -0400221 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400222 do {
223 wcs = ep->rep_send_wcs;
224
225 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
226 if (rc <= 0)
227 return rc;
228
229 count = rc;
230 while (count-- > 0)
231 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400232 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400233 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400234}
235
236/*
Chuck Leverfc664482014-05-28 10:33:25 -0400237 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400238 *
Chuck Leverfc664482014-05-28 10:33:25 -0400239 * Send events are typically suppressed and thus do not result
240 * in an upcall. Occasionally one is signaled, however. This
241 * prevents the provider's completion queue from wrapping and
242 * losing a completion.
243 */
244static void
245rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
246{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400247 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400248 int rc;
249
Chuck Lever1c00dd02014-05-28 10:33:42 -0400250 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400251 if (rc) {
252 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
253 __func__, rc);
254 return;
255 }
256
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400257 rc = ib_req_notify_cq(cq,
258 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
259 if (rc == 0)
260 return;
261 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400262 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
263 __func__, rc);
264 return;
265 }
266
Chuck Lever1c00dd02014-05-28 10:33:42 -0400267 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400268}
269
270static void
Chuck Leverbb961932014-07-29 17:25:46 -0400271rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400272{
273 struct rpcrdma_rep *rep =
274 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
Chuck Leverfed171b2015-05-26 11:51:37 -0400275 struct rpcrdma_ia *ia;
Chuck Leverfc664482014-05-28 10:33:25 -0400276
Chuck Lever85024272015-01-21 11:02:04 -0500277 /* WARNING: Only wr_id and status are reliable at this point */
278 if (wc->status != IB_WC_SUCCESS)
279 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400280
Chuck Lever85024272015-01-21 11:02:04 -0500281 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400282 if (wc->opcode != IB_WC_RECV)
283 return;
284
Chuck Lever85024272015-01-21 11:02:04 -0500285 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
286 __func__, rep, wc->byte_len);
287
Chuck Leverfed171b2015-05-26 11:51:37 -0400288 ia = &rep->rr_rxprt->rx_ia;
Chuck Leverfc664482014-05-28 10:33:25 -0400289 rep->rr_len = wc->byte_len;
Chuck Leverfed171b2015-05-26 11:51:37 -0400290 ib_dma_sync_single_for_cpu(ia->ri_id->device,
Chuck Lever6b1184c2015-01-21 11:04:25 -0500291 rdmab_addr(rep->rr_rdmabuf),
292 rep->rr_len, DMA_FROM_DEVICE);
293 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
Chuck Leverfc664482014-05-28 10:33:25 -0400294
295out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400296 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500297 return;
298out_fail:
299 if (wc->status != IB_WC_WR_FLUSH_ERR)
300 pr_err("RPC: %s: rep %p: %s\n",
301 __func__, rep, COMPLETION_MSG(wc->status));
302 rep->rr_len = ~0U;
303 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400304}
305
306static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400307rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400308{
Chuck Leverbb961932014-07-29 17:25:46 -0400309 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400310 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400311 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400312
Chuck Leverbb961932014-07-29 17:25:46 -0400313 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400314 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400315 do {
316 wcs = ep->rep_recv_wcs;
317
318 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
319 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400320 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400321
322 count = rc;
323 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400324 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400325 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400326 rc = 0;
327
328out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500329 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400330 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400331}
332
333/*
334 * Handle receive completions.
335 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400336 * It is reentrant but processes single events in order to maintain
337 * ordering of receives to keep server credits.
338 *
339 * It is the responsibility of the scheduled tasklet to return
340 * recv buffers to the pool. NOTE: this affects synchronization of
341 * connection shutdown. That is, the structures required for
342 * the completion of the reply handler must remain intact until
343 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400344 */
345static void
Chuck Leverfc664482014-05-28 10:33:25 -0400346rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400347{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400348 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400349 int rc;
350
Chuck Lever1c00dd02014-05-28 10:33:42 -0400351 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400352 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400353 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400354 __func__, rc);
355 return;
356 }
357
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400358 rc = ib_req_notify_cq(cq,
359 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
360 if (rc == 0)
361 return;
362 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400363 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
364 __func__, rc);
365 return;
366 }
367
Chuck Lever1c00dd02014-05-28 10:33:42 -0400368 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400369}
370
Chuck Levera7bc2112014-07-29 17:23:52 -0400371static void
372rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
373{
Chuck Lever5c166be2014-11-08 20:14:45 -0500374 struct ib_wc wc;
375 LIST_HEAD(sched_list);
376
377 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
378 rpcrdma_recvcq_process_wc(&wc, &sched_list);
379 if (!list_empty(&sched_list))
380 rpcrdma_schedule_tasklet(&sched_list);
381 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
382 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400383}
384
Jeff Laytonf895b252014-11-17 16:58:04 -0500385#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400386static const char * const conn[] = {
387 "address resolved",
388 "address error",
389 "route resolved",
390 "route error",
391 "connect request",
392 "connect response",
393 "connect error",
394 "unreachable",
395 "rejected",
396 "established",
397 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400398 "device removal",
399 "multicast join",
400 "multicast error",
401 "address change",
402 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400403};
Chuck Lever8079fb72014-07-29 17:26:12 -0400404
405#define CONNECTION_MSG(status) \
406 ((status) < ARRAY_SIZE(conn) ? \
407 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400408#endif
409
410static int
411rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
412{
413 struct rpcrdma_xprt *xprt = id->context;
414 struct rpcrdma_ia *ia = &xprt->rx_ia;
415 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500416#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400417 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800418#endif
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500419 struct ib_qp_attr *attr = &ia->ri_qp_attr;
420 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400421 int connstate = 0;
422
423 switch (event->event) {
424 case RDMA_CM_EVENT_ADDR_RESOLVED:
425 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400426 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400427 complete(&ia->ri_done);
428 break;
429 case RDMA_CM_EVENT_ADDR_ERROR:
430 ia->ri_async_rc = -EHOSTUNREACH;
431 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
432 __func__, ep);
433 complete(&ia->ri_done);
434 break;
435 case RDMA_CM_EVENT_ROUTE_ERROR:
436 ia->ri_async_rc = -ENETUNREACH;
437 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
438 __func__, ep);
439 complete(&ia->ri_done);
440 break;
441 case RDMA_CM_EVENT_ESTABLISHED:
442 connstate = 1;
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500443 ib_query_qp(ia->ri_id->qp, attr,
444 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
445 iattr);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400446 dprintk("RPC: %s: %d responder resources"
447 " (%d initiator)\n",
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500448 __func__, attr->max_dest_rd_atomic,
449 attr->max_rd_atomic);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400450 goto connected;
451 case RDMA_CM_EVENT_CONNECT_ERROR:
452 connstate = -ENOTCONN;
453 goto connected;
454 case RDMA_CM_EVENT_UNREACHABLE:
455 connstate = -ENETDOWN;
456 goto connected;
457 case RDMA_CM_EVENT_REJECTED:
458 connstate = -ECONNREFUSED;
459 goto connected;
460 case RDMA_CM_EVENT_DISCONNECTED:
461 connstate = -ECONNABORTED;
462 goto connected;
463 case RDMA_CM_EVENT_DEVICE_REMOVAL:
464 connstate = -ENODEV;
465connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400466 dprintk("RPC: %s: %sconnected\n",
467 __func__, connstate > 0 ? "" : "dis");
468 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500469 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400470 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400471 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400472 default:
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400473 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
474 __func__, sap, rpc_get_port(sap), ep,
Chuck Lever8079fb72014-07-29 17:26:12 -0400475 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400476 break;
477 }
478
Jeff Laytonf895b252014-11-17 16:58:04 -0500479#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400480 if (connstate == 1) {
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500481 int ird = attr->max_dest_rd_atomic;
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400482 int tird = ep->rep_remote_cma.responder_resources;
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400483
Chuck Levera0ce85f2015-03-30 14:34:21 -0400484 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400485 sap, rpc_get_port(sap),
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400486 ia->ri_id->device->name,
Chuck Levera0ce85f2015-03-30 14:34:21 -0400487 ia->ri_ops->ro_displayname,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400488 xprt->rx_buf.rb_max_requests,
489 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
490 } else if (connstate < 0) {
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400491 pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
492 sap, rpc_get_port(sap), connstate);
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400493 }
494#endif
495
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400496 return 0;
497}
498
499static struct rdma_cm_id *
500rpcrdma_create_id(struct rpcrdma_xprt *xprt,
501 struct rpcrdma_ia *ia, struct sockaddr *addr)
502{
503 struct rdma_cm_id *id;
504 int rc;
505
Tom Talpey1a954052008-10-09 15:01:31 -0400506 init_completion(&ia->ri_done);
507
Sean Heftyb26f9b92010-04-01 17:08:41 +0000508 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400509 if (IS_ERR(id)) {
510 rc = PTR_ERR(id);
511 dprintk("RPC: %s: rdma_create_id() failed %i\n",
512 __func__, rc);
513 return id;
514 }
515
Tom Talpey5675add2008-10-09 15:01:41 -0400516 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400517 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
518 if (rc) {
519 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
520 __func__, rc);
521 goto out;
522 }
Tom Talpey5675add2008-10-09 15:01:41 -0400523 wait_for_completion_interruptible_timeout(&ia->ri_done,
524 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400525 rc = ia->ri_async_rc;
526 if (rc)
527 goto out;
528
Tom Talpey5675add2008-10-09 15:01:41 -0400529 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400530 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
531 if (rc) {
532 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
533 __func__, rc);
534 goto out;
535 }
Tom Talpey5675add2008-10-09 15:01:41 -0400536 wait_for_completion_interruptible_timeout(&ia->ri_done,
537 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400538 rc = ia->ri_async_rc;
539 if (rc)
540 goto out;
541
542 return id;
543
544out:
545 rdma_destroy_id(id);
546 return ERR_PTR(rc);
547}
548
549/*
550 * Drain any cq, prior to teardown.
551 */
552static void
553rpcrdma_clean_cq(struct ib_cq *cq)
554{
555 struct ib_wc wc;
556 int count = 0;
557
558 while (1 == ib_poll_cq(cq, 1, &wc))
559 ++count;
560
561 if (count)
562 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
563 __func__, count, wc.opcode);
564}
565
566/*
567 * Exported functions.
568 */
569
570/*
571 * Open and initialize an Interface Adapter.
572 * o initializes fields of struct rpcrdma_ia, including
573 * interface and provider attributes and protection zone.
574 */
575int
576rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
577{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400578 int rc, mem_priv;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400579 struct rpcrdma_ia *ia = &xprt->rx_ia;
Chuck Lever7bc79722015-01-21 11:03:27 -0500580 struct ib_device_attr *devattr = &ia->ri_devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400581
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400582 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
583 if (IS_ERR(ia->ri_id)) {
584 rc = PTR_ERR(ia->ri_id);
585 goto out1;
586 }
587
588 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
589 if (IS_ERR(ia->ri_pd)) {
590 rc = PTR_ERR(ia->ri_pd);
591 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
592 __func__, rc);
593 goto out2;
594 }
595
Chuck Lever7bc79722015-01-21 11:03:27 -0500596 rc = ib_query_device(ia->ri_id->device, devattr);
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400597 if (rc) {
598 dprintk("RPC: %s: ib_query_device failed %d\n",
599 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500600 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400601 }
602
Chuck Lever7bc79722015-01-21 11:03:27 -0500603 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400604 ia->ri_have_dma_lkey = 1;
605 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
606 }
607
Chuck Leverf10eafd2014-05-28 10:32:51 -0400608 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400609 /* Requires both frmr reg and local dma lkey */
Chuck Lever41f97022015-03-30 14:34:12 -0400610 if (((devattr->device_cap_flags &
Tom Talpey3197d3092008-10-09 15:00:20 -0400611 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
Chuck Lever41f97022015-03-30 14:34:12 -0400612 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
613 (devattr->max_fast_reg_page_list_len == 0)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400614 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400615 "not supported by HCA\n", __func__);
616 memreg = RPCRDMA_MTHCAFMR;
Tom Talpey3197d3092008-10-09 15:00:20 -0400617 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400618 }
619 if (memreg == RPCRDMA_MTHCAFMR) {
620 if (!ia->ri_id->device->alloc_fmr) {
621 dprintk("RPC: %s: MTHCAFMR registration "
622 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400623 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400624 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400625 }
626
627 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400628 * Optionally obtain an underlying physical identity mapping in
629 * order to do a memory window-based bind. This base registration
630 * is protected from remote access - that is enabled only by binding
631 * for the specific bytes targeted during each RPC operation, and
632 * revoked after the corresponding completion similar to a storage
633 * adapter.
634 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400635 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400636 case RPCRDMA_FRMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400637 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400638 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400639 case RPCRDMA_ALLPHYSICAL:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400640 ia->ri_ops = &rpcrdma_physical_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400641 mem_priv = IB_ACCESS_LOCAL_WRITE |
642 IB_ACCESS_REMOTE_WRITE |
643 IB_ACCESS_REMOTE_READ;
644 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400645 case RPCRDMA_MTHCAFMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400646 ia->ri_ops = &rpcrdma_fmr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400647 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400648 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400649 mem_priv = IB_ACCESS_LOCAL_WRITE;
650 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400651 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
652 if (IS_ERR(ia->ri_bind_mem)) {
653 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400654 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400655 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400656 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500657 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400658 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400659 break;
660 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400661 printk(KERN_ERR "RPC: Unsupported memory "
662 "registration mode: %d\n", memreg);
663 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500664 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400665 }
Chuck Levera0ce85f2015-03-30 14:34:21 -0400666 dprintk("RPC: %s: memory registration strategy is '%s'\n",
667 __func__, ia->ri_ops->ro_displayname);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400668
669 /* Else will do memory reg/dereg for each chunk */
670 ia->ri_memreg_strategy = memreg;
671
Chuck Lever73806c82014-07-29 17:23:25 -0400672 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400673 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500674
675out3:
676 ib_dealloc_pd(ia->ri_pd);
677 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400678out2:
679 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400680 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400681out1:
682 return rc;
683}
684
685/*
686 * Clean up/close an IA.
687 * o if event handles and PD have been initialized, free them.
688 * o close the IA
689 */
690void
691rpcrdma_ia_close(struct rpcrdma_ia *ia)
692{
693 int rc;
694
695 dprintk("RPC: %s: entering\n", __func__);
696 if (ia->ri_bind_mem != NULL) {
697 rc = ib_dereg_mr(ia->ri_bind_mem);
698 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
699 __func__, rc);
700 }
Chuck Lever6d446982015-05-26 11:51:27 -0400701
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400702 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
703 if (ia->ri_id->qp)
704 rdma_destroy_qp(ia->ri_id);
705 rdma_destroy_id(ia->ri_id);
706 ia->ri_id = NULL;
707 }
Chuck Lever6d446982015-05-26 11:51:27 -0400708
709 /* If the pd is still busy, xprtrdma missed freeing a resource */
710 if (ia->ri_pd && !IS_ERR(ia->ri_pd))
711 WARN_ON(ib_dealloc_pd(ia->ri_pd));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400712}
713
714/*
715 * Create unconnected endpoint.
716 */
717int
718rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
719 struct rpcrdma_create_data_internal *cdata)
720{
Chuck Lever7bc79722015-01-21 11:03:27 -0500721 struct ib_device_attr *devattr = &ia->ri_devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400722 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400723 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400724
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400725 /* check provider's send/recv wr limits */
Chuck Lever7bc79722015-01-21 11:03:27 -0500726 if (cdata->max_requests > devattr->max_qp_wr)
727 cdata->max_requests = devattr->max_qp_wr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400728
729 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
730 ep->rep_attr.qp_context = ep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400731 ep->rep_attr.srq = NULL;
732 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
Chuck Lever3968cb52015-03-30 14:35:26 -0400733 rc = ia->ri_ops->ro_open(ia, ep, cdata);
734 if (rc)
735 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400736 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
737 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
738 ep->rep_attr.cap.max_recv_sge = 1;
739 ep->rep_attr.cap.max_inline_data = 0;
740 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
741 ep->rep_attr.qp_type = IB_QPT_RC;
742 ep->rep_attr.port_num = ~0;
743
Chuck Leverc05fbb52015-01-21 11:04:33 -0500744 if (cdata->padding) {
745 ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
746 GFP_KERNEL);
747 if (IS_ERR(ep->rep_padbuf))
748 return PTR_ERR(ep->rep_padbuf);
749 } else
750 ep->rep_padbuf = NULL;
751
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400752 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
753 "iovs: send %d recv %d\n",
754 __func__,
755 ep->rep_attr.cap.max_send_wr,
756 ep->rep_attr.cap.max_recv_wr,
757 ep->rep_attr.cap.max_send_sge,
758 ep->rep_attr.cap.max_recv_sge);
759
760 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400761 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500762 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
763 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
764 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400765 ep->rep_cqinit = 0;
766 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400767 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400768 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400769
Chuck Leverfc664482014-05-28 10:33:25 -0400770 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400771 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400772 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400773 if (IS_ERR(sendcq)) {
774 rc = PTR_ERR(sendcq);
775 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400776 __func__, rc);
777 goto out1;
778 }
779
Chuck Leverfc664482014-05-28 10:33:25 -0400780 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400781 if (rc) {
782 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
783 __func__, rc);
784 goto out2;
785 }
786
Chuck Leverfc664482014-05-28 10:33:25 -0400787 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400788 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400789 ep->rep_attr.cap.max_recv_wr + 1, 0);
790 if (IS_ERR(recvcq)) {
791 rc = PTR_ERR(recvcq);
792 dprintk("RPC: %s: failed to create recv CQ: %i\n",
793 __func__, rc);
794 goto out2;
795 }
796
797 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
798 if (rc) {
799 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
800 __func__, rc);
801 ib_destroy_cq(recvcq);
802 goto out2;
803 }
804
805 ep->rep_attr.send_cq = sendcq;
806 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400807
808 /* Initialize cma parameters */
809
810 /* RPC/RDMA does not use private data */
811 ep->rep_remote_cma.private_data = NULL;
812 ep->rep_remote_cma.private_data_len = 0;
813
814 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400815 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever7bc79722015-01-21 11:03:27 -0500816 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400817 ep->rep_remote_cma.responder_resources = 32;
818 else
Chuck Lever7bc79722015-01-21 11:03:27 -0500819 ep->rep_remote_cma.responder_resources =
820 devattr->max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400821
822 ep->rep_remote_cma.retry_count = 7;
823 ep->rep_remote_cma.flow_control = 0;
824 ep->rep_remote_cma.rnr_retry_count = 0;
825
826 return 0;
827
828out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400829 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400830 if (err)
831 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
832 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400833out1:
Chuck Leverc05fbb52015-01-21 11:04:33 -0500834 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400835 return rc;
836}
837
838/*
839 * rpcrdma_ep_destroy
840 *
841 * Disconnect and destroy endpoint. After this, the only
842 * valid operations on the ep are to free it (if dynamically
843 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400844 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400845void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400846rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
847{
848 int rc;
849
850 dprintk("RPC: %s: entering, connected is %d\n",
851 __func__, ep->rep_connected);
852
Chuck Lever254f91e2014-05-28 10:32:17 -0400853 cancel_delayed_work_sync(&ep->rep_connect_worker);
854
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400855 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400856 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400857 rdma_destroy_qp(ia->ri_id);
858 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400859 }
860
Chuck Leverc05fbb52015-01-21 11:04:33 -0500861 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400862
Chuck Leverfc664482014-05-28 10:33:25 -0400863 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
864 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
865 if (rc)
866 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
867 __func__, rc);
868
869 rpcrdma_clean_cq(ep->rep_attr.send_cq);
870 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400871 if (rc)
872 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
873 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400874}
875
876/*
877 * Connect unconnected endpoint.
878 */
879int
880rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
881{
Chuck Lever73806c82014-07-29 17:23:25 -0400882 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400883 int rc = 0;
884 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400885
Tom Talpeyc0555512008-10-10 11:32:45 -0400886 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400887 struct rpcrdma_xprt *xprt;
888retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400889 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400890
891 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400892 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400893
894 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
Chuck Lever31a701a2015-03-30 14:35:07 -0400895 ia->ri_ops->ro_reset(xprt);
896
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400897 id = rpcrdma_create_id(xprt, ia,
898 (struct sockaddr *)&xprt->rx_data.addr);
899 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400900 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400901 goto out;
902 }
903 /* TEMP TEMP TEMP - fail if new device:
904 * Deregister/remarshal *all* requests!
905 * Close and recreate adapter, pd, etc!
906 * Re-determine all attributes still sane!
907 * More stuff I haven't thought of!
908 * Rrrgh!
909 */
910 if (ia->ri_id->device != id->device) {
911 printk("RPC: %s: can't reconnect on "
912 "different device!\n", __func__);
913 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400914 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400915 goto out;
916 }
917 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400918 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
919 if (rc) {
920 dprintk("RPC: %s: rdma_create_qp failed %i\n",
921 __func__, rc);
922 rdma_destroy_id(id);
923 rc = -ENETUNREACH;
924 goto out;
925 }
Chuck Lever73806c82014-07-29 17:23:25 -0400926
927 write_lock(&ia->ri_qplock);
928 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400929 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400930 write_unlock(&ia->ri_qplock);
931
932 rdma_destroy_qp(old);
933 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400934 } else {
935 dprintk("RPC: %s: connecting...\n", __func__);
936 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
937 if (rc) {
938 dprintk("RPC: %s: rdma_create_qp failed %i\n",
939 __func__, rc);
940 /* do not update ep->rep_connected */
941 return -ENETUNREACH;
942 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400943 }
944
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400945 ep->rep_connected = 0;
946
947 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
948 if (rc) {
949 dprintk("RPC: %s: rdma_connect() failed with %i\n",
950 __func__, rc);
951 goto out;
952 }
953
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400954 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
955
956 /*
957 * Check state. A non-peer reject indicates no listener
958 * (ECONNREFUSED), which may be a transient state. All
959 * others indicate a transport condition which has already
960 * undergone a best-effort.
961 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800962 if (ep->rep_connected == -ECONNREFUSED &&
963 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400964 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
965 goto retry;
966 }
967 if (ep->rep_connected <= 0) {
968 /* Sometimes, the only way to reliably connect to remote
969 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400970 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
971 (ep->rep_remote_cma.responder_resources == 0 ||
972 ep->rep_remote_cma.initiator_depth !=
973 ep->rep_remote_cma.responder_resources)) {
974 if (ep->rep_remote_cma.responder_resources == 0)
975 ep->rep_remote_cma.responder_resources = 1;
976 ep->rep_remote_cma.initiator_depth =
977 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400978 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400979 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400980 rc = ep->rep_connected;
981 } else {
982 dprintk("RPC: %s: connected\n", __func__);
983 }
984
985out:
986 if (rc)
987 ep->rep_connected = rc;
988 return rc;
989}
990
991/*
992 * rpcrdma_ep_disconnect
993 *
994 * This is separate from destroy to facilitate the ability
995 * to reconnect without recreating the endpoint.
996 *
997 * This call is not reentrant, and must not be made in parallel
998 * on the same endpoint.
999 */
Chuck Lever282191c2014-07-29 17:25:55 -04001000void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001001rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1002{
1003 int rc;
1004
Chuck Levera7bc2112014-07-29 17:23:52 -04001005 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001006 rc = rdma_disconnect(ia->ri_id);
1007 if (!rc) {
1008 /* returns without wait if not connected */
1009 wait_event_interruptible(ep->rep_connect_wait,
1010 ep->rep_connected != 1);
1011 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1012 (ep->rep_connected == 1) ? "still " : "dis");
1013 } else {
1014 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1015 ep->rep_connected = rc;
1016 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001017}
1018
Chuck Lever13924022015-01-21 11:03:52 -05001019static struct rpcrdma_req *
1020rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1021{
Chuck Lever13924022015-01-21 11:03:52 -05001022 struct rpcrdma_req *req;
Chuck Lever13924022015-01-21 11:03:52 -05001023
Chuck Lever85275c82015-01-21 11:04:16 -05001024 req = kzalloc(sizeof(*req), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001025 if (req == NULL)
Chuck Lever85275c82015-01-21 11:04:16 -05001026 return ERR_PTR(-ENOMEM);
Chuck Lever13924022015-01-21 11:03:52 -05001027
Chuck Lever13924022015-01-21 11:03:52 -05001028 req->rl_buffer = &r_xprt->rx_buf;
1029 return req;
Chuck Lever13924022015-01-21 11:03:52 -05001030}
1031
1032static struct rpcrdma_rep *
1033rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1034{
1035 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever13924022015-01-21 11:03:52 -05001036 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1037 struct rpcrdma_rep *rep;
1038 int rc;
1039
1040 rc = -ENOMEM;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001041 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001042 if (rep == NULL)
1043 goto out;
Chuck Lever13924022015-01-21 11:03:52 -05001044
Chuck Lever6b1184c2015-01-21 11:04:25 -05001045 rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
1046 GFP_KERNEL);
1047 if (IS_ERR(rep->rr_rdmabuf)) {
1048 rc = PTR_ERR(rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001049 goto out_free;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001050 }
Chuck Lever13924022015-01-21 11:03:52 -05001051
Chuck Leverfed171b2015-05-26 11:51:37 -04001052 rep->rr_rxprt = r_xprt;
Chuck Lever13924022015-01-21 11:03:52 -05001053 return rep;
1054
1055out_free:
1056 kfree(rep);
1057out:
1058 return ERR_PTR(rc);
1059}
1060
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001061int
Chuck Leverac920d02015-01-21 11:03:44 -05001062rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001063{
Chuck Leverac920d02015-01-21 11:03:44 -05001064 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1065 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1066 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001067 char *p;
Chuck Lever13924022015-01-21 11:03:52 -05001068 size_t len;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001069 int i, rc;
1070
1071 buf->rb_max_requests = cdata->max_requests;
1072 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001073
1074 /* Need to allocate:
1075 * 1. arrays for send and recv pointers
1076 * 2. arrays of struct rpcrdma_req to fill in pointers
1077 * 3. array of struct rpcrdma_rep for replies
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001078 * Send/recv buffers in req/rep need to be registered
1079 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001080 len = buf->rb_max_requests *
1081 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001082
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001083 p = kzalloc(len, GFP_KERNEL);
1084 if (p == NULL) {
1085 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1086 __func__, len);
1087 rc = -ENOMEM;
1088 goto out;
1089 }
1090 buf->rb_pool = p; /* for freeing it later */
1091
1092 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1093 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1094 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1095 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1096
Chuck Lever91e70e72015-03-30 14:34:58 -04001097 rc = ia->ri_ops->ro_init(r_xprt);
1098 if (rc)
1099 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001100
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001101 for (i = 0; i < buf->rb_max_requests; i++) {
1102 struct rpcrdma_req *req;
1103 struct rpcrdma_rep *rep;
1104
Chuck Lever13924022015-01-21 11:03:52 -05001105 req = rpcrdma_create_req(r_xprt);
1106 if (IS_ERR(req)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001107 dprintk("RPC: %s: request buffer %d alloc"
1108 " failed\n", __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001109 rc = PTR_ERR(req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001110 goto out;
1111 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001112 buf->rb_send_bufs[i] = req;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001113
Chuck Lever13924022015-01-21 11:03:52 -05001114 rep = rpcrdma_create_rep(r_xprt);
1115 if (IS_ERR(rep)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001116 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1117 __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001118 rc = PTR_ERR(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001119 goto out;
1120 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001121 buf->rb_recv_bufs[i] = rep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001122 }
Chuck Lever13924022015-01-21 11:03:52 -05001123
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001124 return 0;
1125out:
1126 rpcrdma_buffer_destroy(buf);
1127 return rc;
1128}
1129
Chuck Lever2e845222014-07-29 17:25:38 -04001130static void
Chuck Lever13924022015-01-21 11:03:52 -05001131rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1132{
1133 if (!rep)
1134 return;
1135
Chuck Lever6b1184c2015-01-21 11:04:25 -05001136 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001137 kfree(rep);
1138}
1139
1140static void
1141rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1142{
1143 if (!req)
1144 return;
1145
Chuck Lever0ca77dc2015-01-21 11:04:08 -05001146 rpcrdma_free_regbuf(ia, req->rl_sendbuf);
Chuck Lever85275c82015-01-21 11:04:16 -05001147 rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001148 kfree(req);
1149}
1150
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001151void
1152rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1153{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001154 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001155 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001156
1157 /* clean up in reverse order from create
1158 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001159 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001160 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001161 */
1162 dprintk("RPC: %s: entering\n", __func__);
1163
1164 for (i = 0; i < buf->rb_max_requests; i++) {
Chuck Lever13924022015-01-21 11:03:52 -05001165 if (buf->rb_recv_bufs)
1166 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1167 if (buf->rb_send_bufs)
1168 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001169 }
1170
Chuck Lever4561f342015-03-30 14:35:17 -04001171 ia->ri_ops->ro_destroy(buf);
Allen Andrews4034ba02014-05-28 10:32:09 -04001172
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001173 kfree(buf->rb_pool);
1174}
1175
Chuck Leverc2922c02014-07-29 17:24:36 -04001176/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1177 * some req segments uninitialized.
1178 */
1179static void
1180rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1181{
1182 if (*mw) {
1183 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1184 *mw = NULL;
1185 }
1186}
1187
1188/* Cycle mw's back in reverse order, and "spin" them.
1189 * This delays and scrambles reuse as much as possible.
1190 */
1191static void
1192rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1193{
1194 struct rpcrdma_mr_seg *seg = req->rl_segments;
1195 struct rpcrdma_mr_seg *seg1 = seg;
1196 int i;
1197
1198 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001199 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1200 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001201}
1202
1203static void
1204rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1205{
1206 buf->rb_send_bufs[--buf->rb_send_index] = req;
1207 req->rl_niovs = 0;
1208 if (req->rl_reply) {
1209 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
Chuck Leverc2922c02014-07-29 17:24:36 -04001210 req->rl_reply = NULL;
1211 }
1212}
1213
Chuck Lever6814bae2015-03-30 14:34:48 -04001214/* rpcrdma_unmap_one() was already done during deregistration.
Chuck Leverddb6beb2014-07-29 17:24:54 -04001215 * Redo only the ib_post_send().
1216 */
1217static void
1218rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1219{
1220 struct rpcrdma_xprt *r_xprt =
1221 container_of(ia, struct rpcrdma_xprt, rx_ia);
1222 struct ib_send_wr invalidate_wr, *bad_wr;
1223 int rc;
1224
1225 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1226
1227 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001228 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001229
1230 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1231 invalidate_wr.wr_id = (unsigned long)(void *)r;
1232 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001233 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1234 DECR_CQCOUNT(&r_xprt->rx_ep);
1235
1236 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1237 __func__, r, r->r.frmr.fr_mr->rkey);
1238
1239 read_lock(&ia->ri_qplock);
1240 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1241 read_unlock(&ia->ri_qplock);
1242 if (rc) {
1243 /* Force rpcrdma_buffer_get() to retry */
1244 r->r.frmr.fr_state = FRMR_IS_STALE;
1245 dprintk("RPC: %s: ib_post_send failed, %i\n",
1246 __func__, rc);
1247 }
1248}
1249
1250static void
1251rpcrdma_retry_flushed_linv(struct list_head *stale,
1252 struct rpcrdma_buffer *buf)
1253{
1254 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1255 struct list_head *pos;
1256 struct rpcrdma_mw *r;
1257 unsigned long flags;
1258
1259 list_for_each(pos, stale) {
1260 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1261 rpcrdma_retry_local_inv(r, ia);
1262 }
1263
1264 spin_lock_irqsave(&buf->rb_lock, flags);
1265 list_splice_tail(stale, &buf->rb_mws);
1266 spin_unlock_irqrestore(&buf->rb_lock, flags);
1267}
1268
Chuck Leverc2922c02014-07-29 17:24:36 -04001269static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001270rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1271 struct list_head *stale)
1272{
1273 struct rpcrdma_mw *r;
1274 int i;
1275
1276 i = RPCRDMA_MAX_SEGS - 1;
1277 while (!list_empty(&buf->rb_mws)) {
1278 r = list_entry(buf->rb_mws.next,
1279 struct rpcrdma_mw, mw_list);
1280 list_del(&r->mw_list);
1281 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1282 list_add(&r->mw_list, stale);
1283 continue;
1284 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001285 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001286 if (unlikely(i-- == 0))
1287 return req; /* Success */
1288 }
1289
1290 /* Not enough entries on rb_mws for this req */
1291 rpcrdma_buffer_put_sendbuf(req, buf);
1292 rpcrdma_buffer_put_mrs(req, buf);
1293 return NULL;
1294}
1295
1296static struct rpcrdma_req *
1297rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001298{
1299 struct rpcrdma_mw *r;
1300 int i;
1301
1302 i = RPCRDMA_MAX_SEGS - 1;
1303 while (!list_empty(&buf->rb_mws)) {
1304 r = list_entry(buf->rb_mws.next,
1305 struct rpcrdma_mw, mw_list);
1306 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001307 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001308 if (unlikely(i-- == 0))
1309 return req; /* Success */
1310 }
1311
1312 /* Not enough entries on rb_mws for this req */
1313 rpcrdma_buffer_put_sendbuf(req, buf);
1314 rpcrdma_buffer_put_mrs(req, buf);
1315 return NULL;
1316}
1317
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001318/*
1319 * Get a set of request/reply buffers.
1320 *
1321 * Reply buffer (if needed) is attached to send buffer upon return.
1322 * Rule:
1323 * rb_send_index and rb_recv_index MUST always be pointing to the
1324 * *next* available buffer (non-NULL). They are incremented after
1325 * removing buffers, and decremented *before* returning them.
1326 */
1327struct rpcrdma_req *
1328rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1329{
Chuck Leverc2922c02014-07-29 17:24:36 -04001330 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001331 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001332 struct rpcrdma_req *req;
1333 unsigned long flags;
1334
1335 spin_lock_irqsave(&buffers->rb_lock, flags);
1336 if (buffers->rb_send_index == buffers->rb_max_requests) {
1337 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1338 dprintk("RPC: %s: out of request buffers\n", __func__);
1339 return ((struct rpcrdma_req *)NULL);
1340 }
1341
1342 req = buffers->rb_send_bufs[buffers->rb_send_index];
1343 if (buffers->rb_send_index < buffers->rb_recv_index) {
1344 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1345 __func__,
1346 buffers->rb_recv_index - buffers->rb_send_index);
1347 req->rl_reply = NULL;
1348 } else {
1349 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1350 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1351 }
1352 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001353
1354 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001355 switch (ia->ri_memreg_strategy) {
1356 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001357 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1358 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001359 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001360 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001361 break;
1362 default:
1363 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001364 }
1365 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001366 if (!list_empty(&stale))
1367 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001368 return req;
1369}
1370
1371/*
1372 * Put request/reply buffers back into pool.
1373 * Pre-decrement counter/array index.
1374 */
1375void
1376rpcrdma_buffer_put(struct rpcrdma_req *req)
1377{
1378 struct rpcrdma_buffer *buffers = req->rl_buffer;
1379 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001380 unsigned long flags;
1381
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001382 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001383 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001384 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001385 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001386 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001387 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001388 break;
1389 default:
1390 break;
1391 }
1392 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1393}
1394
1395/*
1396 * Recover reply buffers from pool.
1397 * This happens when recovering from error conditions.
1398 * Post-increment counter/array index.
1399 */
1400void
1401rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1402{
1403 struct rpcrdma_buffer *buffers = req->rl_buffer;
1404 unsigned long flags;
1405
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001406 spin_lock_irqsave(&buffers->rb_lock, flags);
1407 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1408 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1409 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1410 }
1411 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1412}
1413
1414/*
1415 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001416 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001417 */
1418void
1419rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1420{
Chuck Leverfed171b2015-05-26 11:51:37 -04001421 struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001422 unsigned long flags;
1423
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001424 spin_lock_irqsave(&buffers->rb_lock, flags);
1425 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1426 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1427}
1428
1429/*
1430 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1431 */
1432
Chuck Leverd6547882015-03-30 14:35:44 -04001433void
1434rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
1435{
1436 dprintk("RPC: map_one: offset %p iova %llx len %zu\n",
1437 seg->mr_offset,
1438 (unsigned long long)seg->mr_dma, seg->mr_dmalen);
1439}
1440
Chuck Leverdf515ca2015-01-21 11:04:41 -05001441static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001442rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1443 struct ib_mr **mrp, struct ib_sge *iov)
1444{
1445 struct ib_phys_buf ipb;
1446 struct ib_mr *mr;
1447 int rc;
1448
1449 /*
1450 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1451 */
1452 iov->addr = ib_dma_map_single(ia->ri_id->device,
1453 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001454 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1455 return -ENOMEM;
1456
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001457 iov->length = len;
1458
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001459 if (ia->ri_have_dma_lkey) {
1460 *mrp = NULL;
1461 iov->lkey = ia->ri_dma_lkey;
1462 return 0;
1463 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001464 *mrp = NULL;
1465 iov->lkey = ia->ri_bind_mem->lkey;
1466 return 0;
1467 }
1468
1469 ipb.addr = iov->addr;
1470 ipb.size = iov->length;
1471 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1472 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1473
1474 dprintk("RPC: %s: phys convert: 0x%llx "
1475 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001476 __func__, (unsigned long long)ipb.addr,
1477 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001478
1479 if (IS_ERR(mr)) {
1480 *mrp = NULL;
1481 rc = PTR_ERR(mr);
1482 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1483 } else {
1484 *mrp = mr;
1485 iov->lkey = mr->lkey;
1486 rc = 0;
1487 }
1488
1489 return rc;
1490}
1491
Chuck Leverdf515ca2015-01-21 11:04:41 -05001492static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001493rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1494 struct ib_mr *mr, struct ib_sge *iov)
1495{
1496 int rc;
1497
1498 ib_dma_unmap_single(ia->ri_id->device,
1499 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1500
1501 if (NULL == mr)
1502 return 0;
1503
1504 rc = ib_dereg_mr(mr);
1505 if (rc)
1506 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1507 return rc;
1508}
1509
Chuck Lever9128c3e2015-01-21 11:04:00 -05001510/**
1511 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1512 * @ia: controlling rpcrdma_ia
1513 * @size: size of buffer to be allocated, in bytes
1514 * @flags: GFP flags
1515 *
1516 * Returns pointer to private header of an area of internally
1517 * registered memory, or an ERR_PTR. The registered buffer follows
1518 * the end of the private header.
1519 *
1520 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1521 * receiving the payload of RDMA RECV operations. regbufs are not
1522 * used for RDMA READ/WRITE operations, thus are registered only for
1523 * LOCAL access.
1524 */
1525struct rpcrdma_regbuf *
1526rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
1527{
1528 struct rpcrdma_regbuf *rb;
1529 int rc;
1530
1531 rc = -ENOMEM;
1532 rb = kmalloc(sizeof(*rb) + size, flags);
1533 if (rb == NULL)
1534 goto out;
1535
1536 rb->rg_size = size;
1537 rb->rg_owner = NULL;
1538 rc = rpcrdma_register_internal(ia, rb->rg_base, size,
1539 &rb->rg_mr, &rb->rg_iov);
1540 if (rc)
1541 goto out_free;
1542
1543 return rb;
1544
1545out_free:
1546 kfree(rb);
1547out:
1548 return ERR_PTR(rc);
1549}
1550
1551/**
1552 * rpcrdma_free_regbuf - deregister and free registered buffer
1553 * @ia: controlling rpcrdma_ia
1554 * @rb: regbuf to be deregistered and freed
1555 */
1556void
1557rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1558{
1559 if (rb) {
1560 rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
1561 kfree(rb);
1562 }
1563}
1564
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001565/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001566 * Prepost any receive buffer, then post send.
1567 *
1568 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1569 */
1570int
1571rpcrdma_ep_post(struct rpcrdma_ia *ia,
1572 struct rpcrdma_ep *ep,
1573 struct rpcrdma_req *req)
1574{
1575 struct ib_send_wr send_wr, *send_wr_fail;
1576 struct rpcrdma_rep *rep = req->rl_reply;
1577 int rc;
1578
1579 if (rep) {
1580 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1581 if (rc)
1582 goto out;
1583 req->rl_reply = NULL;
1584 }
1585
1586 send_wr.next = NULL;
Chuck Levere46ac342015-03-30 14:35:35 -04001587 send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001588 send_wr.sg_list = req->rl_send_iov;
1589 send_wr.num_sge = req->rl_niovs;
1590 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001591 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1592 ib_dma_sync_single_for_device(ia->ri_id->device,
1593 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1594 DMA_TO_DEVICE);
1595 ib_dma_sync_single_for_device(ia->ri_id->device,
1596 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1597 DMA_TO_DEVICE);
1598 ib_dma_sync_single_for_device(ia->ri_id->device,
1599 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1600 DMA_TO_DEVICE);
1601
1602 if (DECR_CQCOUNT(ep) > 0)
1603 send_wr.send_flags = 0;
1604 else { /* Provider must take a send completion every now and then */
1605 INIT_CQCOUNT(ep);
1606 send_wr.send_flags = IB_SEND_SIGNALED;
1607 }
1608
1609 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1610 if (rc)
1611 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1612 rc);
1613out:
1614 return rc;
1615}
1616
1617/*
1618 * (Re)post a receive buffer.
1619 */
1620int
1621rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1622 struct rpcrdma_ep *ep,
1623 struct rpcrdma_rep *rep)
1624{
1625 struct ib_recv_wr recv_wr, *recv_wr_fail;
1626 int rc;
1627
1628 recv_wr.next = NULL;
1629 recv_wr.wr_id = (u64) (unsigned long) rep;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001630 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001631 recv_wr.num_sge = 1;
1632
1633 ib_dma_sync_single_for_cpu(ia->ri_id->device,
Chuck Lever6b1184c2015-01-21 11:04:25 -05001634 rdmab_addr(rep->rr_rdmabuf),
1635 rdmab_length(rep->rr_rdmabuf),
1636 DMA_BIDIRECTIONAL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001637
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001638 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1639
1640 if (rc)
1641 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1642 rc);
1643 return rc;
1644}
Chuck Lever43e95982014-07-29 17:23:34 -04001645
Chuck Lever1c9351e2015-03-30 14:34:30 -04001646/* How many chunk list items fit within our inline buffers?
Chuck Lever43e95982014-07-29 17:23:34 -04001647 */
Chuck Lever1c9351e2015-03-30 14:34:30 -04001648unsigned int
1649rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
Chuck Lever43e95982014-07-29 17:23:34 -04001650{
1651 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever1c9351e2015-03-30 14:34:30 -04001652 int bytes, segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001653
Chuck Lever1c9351e2015-03-30 14:34:30 -04001654 bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
1655 bytes -= RPCRDMA_HDRLEN_MIN;
1656 if (bytes < sizeof(struct rpcrdma_segment) * 2) {
1657 pr_warn("RPC: %s: inline threshold too small\n",
1658 __func__);
1659 return 0;
Chuck Lever43e95982014-07-29 17:23:34 -04001660 }
Chuck Lever1c9351e2015-03-30 14:34:30 -04001661
1662 segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
1663 dprintk("RPC: %s: max chunk list size = %d segments\n",
1664 __func__, segments);
1665 return segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001666}