blob: 51900e6a2ab6bc7e97c4c05a6981717720a372e8 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever0dd39ca2015-03-30 14:33:43 -040053#include <linux/sunrpc/addr.h>
Chuck Lever65866f82014-05-28 10:33:59 -040054#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040055
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040056#include "xprt_rdma.h"
57
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040058/*
59 * Globals/Macros
60 */
61
Jeff Laytonf895b252014-11-17 16:58:04 -050062#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040063# define RPCDBG_FACILITY RPCDBG_TRANS
64#endif
65
66/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95
96 if (func)
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 }
103 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
104}
105
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107
Chuck Lever7ff11de2014-11-08 20:15:01 -0500108static const char * const async_event[] = {
109 "CQ error",
110 "QP fatal error",
111 "QP request error",
112 "QP access error",
113 "communication established",
114 "send queue drained",
115 "path migration successful",
116 "path mig error",
117 "device fatal error",
118 "port active",
119 "port error",
120 "LID change",
121 "P_key change",
122 "SM change",
123 "SRQ error",
124 "SRQ limit reached",
125 "last WQE reached",
126 "client reregister",
127 "GID change",
128};
129
130#define ASYNC_MSG(status) \
131 ((status) < ARRAY_SIZE(async_event) ? \
132 async_event[(status)] : "unknown async error")
133
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400134static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500135rpcrdma_schedule_tasklet(struct list_head *sched_list)
136{
137 unsigned long flags;
138
139 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
140 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
141 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
142 tasklet_schedule(&rpcrdma_tasklet_g);
143}
144
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400145static void
146rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
147{
148 struct rpcrdma_ep *ep = context;
149
Chuck Lever7ff11de2014-11-08 20:15:01 -0500150 pr_err("RPC: %s: %s on device %s ep %p\n",
151 __func__, ASYNC_MSG(event->event),
152 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400153 if (ep->rep_connected == 1) {
154 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500155 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400156 wake_up_all(&ep->rep_connect_wait);
157 }
158}
159
160static void
161rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
162{
163 struct rpcrdma_ep *ep = context;
164
Chuck Lever7ff11de2014-11-08 20:15:01 -0500165 pr_err("RPC: %s: %s on device %s ep %p\n",
166 __func__, ASYNC_MSG(event->event),
167 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400168 if (ep->rep_connected == 1) {
169 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500170 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400171 wake_up_all(&ep->rep_connect_wait);
172 }
173}
174
Chuck Lever85024272015-01-21 11:02:04 -0500175static const char * const wc_status[] = {
176 "success",
177 "local length error",
178 "local QP operation error",
179 "local EE context operation error",
180 "local protection error",
181 "WR flushed",
182 "memory management operation error",
183 "bad response error",
184 "local access error",
185 "remote invalid request error",
186 "remote access error",
187 "remote operation error",
188 "transport retry counter exceeded",
Chuck Levere46ac342015-03-30 14:35:35 -0400189 "RNR retry counter exceeded",
Chuck Lever85024272015-01-21 11:02:04 -0500190 "local RDD violation error",
191 "remove invalid RD request",
192 "operation aborted",
193 "invalid EE context number",
194 "invalid EE context state",
195 "fatal error",
196 "response timeout error",
197 "general error",
198};
199
200#define COMPLETION_MSG(status) \
201 ((status) < ARRAY_SIZE(wc_status) ? \
202 wc_status[(status)] : "unexpected completion error")
203
Chuck Leverfc664482014-05-28 10:33:25 -0400204static void
205rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400206{
Chuck Lever85024272015-01-21 11:02:04 -0500207 /* WARNING: Only wr_id and status are reliable at this point */
Chuck Levere46ac342015-03-30 14:35:35 -0400208 if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) {
209 if (wc->status != IB_WC_SUCCESS &&
210 wc->status != IB_WC_WR_FLUSH_ERR)
Chuck Lever85024272015-01-21 11:02:04 -0500211 pr_err("RPC: %s: SEND: %s\n",
212 __func__, COMPLETION_MSG(wc->status));
213 } else {
214 struct rpcrdma_mw *r;
215
216 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
Chuck Levere46ac342015-03-30 14:35:35 -0400217 r->mw_sendcompletion(wc);
Chuck Lever85024272015-01-21 11:02:04 -0500218 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400219}
220
Chuck Leverfc664482014-05-28 10:33:25 -0400221static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400222rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400223{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400224 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400225 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400226
Chuck Lever8301a2c2014-05-28 10:33:51 -0400227 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400228 do {
229 wcs = ep->rep_send_wcs;
230
231 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
232 if (rc <= 0)
233 return rc;
234
235 count = rc;
236 while (count-- > 0)
237 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400238 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400239 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400240}
241
242/*
Chuck Leverfc664482014-05-28 10:33:25 -0400243 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400244 *
Chuck Leverfc664482014-05-28 10:33:25 -0400245 * Send events are typically suppressed and thus do not result
246 * in an upcall. Occasionally one is signaled, however. This
247 * prevents the provider's completion queue from wrapping and
248 * losing a completion.
249 */
250static void
251rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
252{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400253 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400254 int rc;
255
Chuck Lever1c00dd02014-05-28 10:33:42 -0400256 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400257 if (rc) {
258 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
259 __func__, rc);
260 return;
261 }
262
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400263 rc = ib_req_notify_cq(cq,
264 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
265 if (rc == 0)
266 return;
267 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400268 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
269 __func__, rc);
270 return;
271 }
272
Chuck Lever1c00dd02014-05-28 10:33:42 -0400273 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400274}
275
276static void
Chuck Leverbb961932014-07-29 17:25:46 -0400277rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400278{
279 struct rpcrdma_rep *rep =
280 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
281
Chuck Lever85024272015-01-21 11:02:04 -0500282 /* WARNING: Only wr_id and status are reliable at this point */
283 if (wc->status != IB_WC_SUCCESS)
284 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400285
Chuck Lever85024272015-01-21 11:02:04 -0500286 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400287 if (wc->opcode != IB_WC_RECV)
288 return;
289
Chuck Lever85024272015-01-21 11:02:04 -0500290 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
291 __func__, rep, wc->byte_len);
292
Chuck Leverfc664482014-05-28 10:33:25 -0400293 rep->rr_len = wc->byte_len;
294 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
Chuck Lever6b1184c2015-01-21 11:04:25 -0500295 rdmab_addr(rep->rr_rdmabuf),
296 rep->rr_len, DMA_FROM_DEVICE);
297 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
Chuck Leverfc664482014-05-28 10:33:25 -0400298
299out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400300 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500301 return;
302out_fail:
303 if (wc->status != IB_WC_WR_FLUSH_ERR)
304 pr_err("RPC: %s: rep %p: %s\n",
305 __func__, rep, COMPLETION_MSG(wc->status));
306 rep->rr_len = ~0U;
307 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400308}
309
310static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400311rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400312{
Chuck Leverbb961932014-07-29 17:25:46 -0400313 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400314 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400315 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400316
Chuck Leverbb961932014-07-29 17:25:46 -0400317 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400318 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400319 do {
320 wcs = ep->rep_recv_wcs;
321
322 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
323 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400324 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400325
326 count = rc;
327 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400328 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400329 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400330 rc = 0;
331
332out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500333 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400334 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400335}
336
337/*
338 * Handle receive completions.
339 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400340 * It is reentrant but processes single events in order to maintain
341 * ordering of receives to keep server credits.
342 *
343 * It is the responsibility of the scheduled tasklet to return
344 * recv buffers to the pool. NOTE: this affects synchronization of
345 * connection shutdown. That is, the structures required for
346 * the completion of the reply handler must remain intact until
347 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400348 */
349static void
Chuck Leverfc664482014-05-28 10:33:25 -0400350rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400351{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400352 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 int rc;
354
Chuck Lever1c00dd02014-05-28 10:33:42 -0400355 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400356 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400357 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400358 __func__, rc);
359 return;
360 }
361
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400362 rc = ib_req_notify_cq(cq,
363 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
364 if (rc == 0)
365 return;
366 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400367 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
368 __func__, rc);
369 return;
370 }
371
Chuck Lever1c00dd02014-05-28 10:33:42 -0400372 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400373}
374
Chuck Levera7bc2112014-07-29 17:23:52 -0400375static void
376rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
377{
Chuck Lever5c166be2014-11-08 20:14:45 -0500378 struct ib_wc wc;
379 LIST_HEAD(sched_list);
380
381 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
382 rpcrdma_recvcq_process_wc(&wc, &sched_list);
383 if (!list_empty(&sched_list))
384 rpcrdma_schedule_tasklet(&sched_list);
385 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
386 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400387}
388
Jeff Laytonf895b252014-11-17 16:58:04 -0500389#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400390static const char * const conn[] = {
391 "address resolved",
392 "address error",
393 "route resolved",
394 "route error",
395 "connect request",
396 "connect response",
397 "connect error",
398 "unreachable",
399 "rejected",
400 "established",
401 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400402 "device removal",
403 "multicast join",
404 "multicast error",
405 "address change",
406 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400407};
Chuck Lever8079fb72014-07-29 17:26:12 -0400408
409#define CONNECTION_MSG(status) \
410 ((status) < ARRAY_SIZE(conn) ? \
411 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400412#endif
413
414static int
415rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
416{
417 struct rpcrdma_xprt *xprt = id->context;
418 struct rpcrdma_ia *ia = &xprt->rx_ia;
419 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500420#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400421 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800422#endif
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500423 struct ib_qp_attr *attr = &ia->ri_qp_attr;
424 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400425 int connstate = 0;
426
427 switch (event->event) {
428 case RDMA_CM_EVENT_ADDR_RESOLVED:
429 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400430 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400431 complete(&ia->ri_done);
432 break;
433 case RDMA_CM_EVENT_ADDR_ERROR:
434 ia->ri_async_rc = -EHOSTUNREACH;
435 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
436 __func__, ep);
437 complete(&ia->ri_done);
438 break;
439 case RDMA_CM_EVENT_ROUTE_ERROR:
440 ia->ri_async_rc = -ENETUNREACH;
441 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
442 __func__, ep);
443 complete(&ia->ri_done);
444 break;
445 case RDMA_CM_EVENT_ESTABLISHED:
446 connstate = 1;
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500447 ib_query_qp(ia->ri_id->qp, attr,
448 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
449 iattr);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400450 dprintk("RPC: %s: %d responder resources"
451 " (%d initiator)\n",
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500452 __func__, attr->max_dest_rd_atomic,
453 attr->max_rd_atomic);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400454 goto connected;
455 case RDMA_CM_EVENT_CONNECT_ERROR:
456 connstate = -ENOTCONN;
457 goto connected;
458 case RDMA_CM_EVENT_UNREACHABLE:
459 connstate = -ENETDOWN;
460 goto connected;
461 case RDMA_CM_EVENT_REJECTED:
462 connstate = -ECONNREFUSED;
463 goto connected;
464 case RDMA_CM_EVENT_DISCONNECTED:
465 connstate = -ECONNABORTED;
466 goto connected;
467 case RDMA_CM_EVENT_DEVICE_REMOVAL:
468 connstate = -ENODEV;
469connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400470 dprintk("RPC: %s: %sconnected\n",
471 __func__, connstate > 0 ? "" : "dis");
472 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500473 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400474 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400475 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400476 default:
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400477 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
478 __func__, sap, rpc_get_port(sap), ep,
Chuck Lever8079fb72014-07-29 17:26:12 -0400479 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400480 break;
481 }
482
Jeff Laytonf895b252014-11-17 16:58:04 -0500483#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400484 if (connstate == 1) {
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500485 int ird = attr->max_dest_rd_atomic;
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400486 int tird = ep->rep_remote_cma.responder_resources;
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400487
Chuck Levera0ce85f2015-03-30 14:34:21 -0400488 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400489 sap, rpc_get_port(sap),
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400490 ia->ri_id->device->name,
Chuck Levera0ce85f2015-03-30 14:34:21 -0400491 ia->ri_ops->ro_displayname,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400492 xprt->rx_buf.rb_max_requests,
493 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
494 } else if (connstate < 0) {
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400495 pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
496 sap, rpc_get_port(sap), connstate);
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400497 }
498#endif
499
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400500 return 0;
501}
502
503static struct rdma_cm_id *
504rpcrdma_create_id(struct rpcrdma_xprt *xprt,
505 struct rpcrdma_ia *ia, struct sockaddr *addr)
506{
507 struct rdma_cm_id *id;
508 int rc;
509
Tom Talpey1a954052008-10-09 15:01:31 -0400510 init_completion(&ia->ri_done);
511
Sean Heftyb26f9b92010-04-01 17:08:41 +0000512 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400513 if (IS_ERR(id)) {
514 rc = PTR_ERR(id);
515 dprintk("RPC: %s: rdma_create_id() failed %i\n",
516 __func__, rc);
517 return id;
518 }
519
Tom Talpey5675add2008-10-09 15:01:41 -0400520 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400521 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
522 if (rc) {
523 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
524 __func__, rc);
525 goto out;
526 }
Tom Talpey5675add2008-10-09 15:01:41 -0400527 wait_for_completion_interruptible_timeout(&ia->ri_done,
528 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400529 rc = ia->ri_async_rc;
530 if (rc)
531 goto out;
532
Tom Talpey5675add2008-10-09 15:01:41 -0400533 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400534 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
535 if (rc) {
536 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
537 __func__, rc);
538 goto out;
539 }
Tom Talpey5675add2008-10-09 15:01:41 -0400540 wait_for_completion_interruptible_timeout(&ia->ri_done,
541 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400542 rc = ia->ri_async_rc;
543 if (rc)
544 goto out;
545
546 return id;
547
548out:
549 rdma_destroy_id(id);
550 return ERR_PTR(rc);
551}
552
553/*
554 * Drain any cq, prior to teardown.
555 */
556static void
557rpcrdma_clean_cq(struct ib_cq *cq)
558{
559 struct ib_wc wc;
560 int count = 0;
561
562 while (1 == ib_poll_cq(cq, 1, &wc))
563 ++count;
564
565 if (count)
566 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
567 __func__, count, wc.opcode);
568}
569
570/*
571 * Exported functions.
572 */
573
574/*
575 * Open and initialize an Interface Adapter.
576 * o initializes fields of struct rpcrdma_ia, including
577 * interface and provider attributes and protection zone.
578 */
579int
580rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
581{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400582 int rc, mem_priv;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400583 struct rpcrdma_ia *ia = &xprt->rx_ia;
Chuck Lever7bc79722015-01-21 11:03:27 -0500584 struct ib_device_attr *devattr = &ia->ri_devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400585
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400586 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
587 if (IS_ERR(ia->ri_id)) {
588 rc = PTR_ERR(ia->ri_id);
589 goto out1;
590 }
591
592 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
593 if (IS_ERR(ia->ri_pd)) {
594 rc = PTR_ERR(ia->ri_pd);
595 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
596 __func__, rc);
597 goto out2;
598 }
599
Chuck Lever7bc79722015-01-21 11:03:27 -0500600 rc = ib_query_device(ia->ri_id->device, devattr);
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400601 if (rc) {
602 dprintk("RPC: %s: ib_query_device failed %d\n",
603 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500604 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400605 }
606
Chuck Lever7bc79722015-01-21 11:03:27 -0500607 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400608 ia->ri_have_dma_lkey = 1;
609 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
610 }
611
Chuck Leverf10eafd2014-05-28 10:32:51 -0400612 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400613 /* Requires both frmr reg and local dma lkey */
Chuck Lever41f97022015-03-30 14:34:12 -0400614 if (((devattr->device_cap_flags &
Tom Talpey3197d3092008-10-09 15:00:20 -0400615 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
Chuck Lever41f97022015-03-30 14:34:12 -0400616 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
617 (devattr->max_fast_reg_page_list_len == 0)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400618 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400619 "not supported by HCA\n", __func__);
620 memreg = RPCRDMA_MTHCAFMR;
Tom Talpey3197d3092008-10-09 15:00:20 -0400621 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400622 }
623 if (memreg == RPCRDMA_MTHCAFMR) {
624 if (!ia->ri_id->device->alloc_fmr) {
625 dprintk("RPC: %s: MTHCAFMR registration "
626 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400627 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400628 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400629 }
630
631 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400632 * Optionally obtain an underlying physical identity mapping in
633 * order to do a memory window-based bind. This base registration
634 * is protected from remote access - that is enabled only by binding
635 * for the specific bytes targeted during each RPC operation, and
636 * revoked after the corresponding completion similar to a storage
637 * adapter.
638 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400639 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400640 case RPCRDMA_FRMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400641 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400642 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400643 case RPCRDMA_ALLPHYSICAL:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400644 ia->ri_ops = &rpcrdma_physical_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400645 mem_priv = IB_ACCESS_LOCAL_WRITE |
646 IB_ACCESS_REMOTE_WRITE |
647 IB_ACCESS_REMOTE_READ;
648 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400649 case RPCRDMA_MTHCAFMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400650 ia->ri_ops = &rpcrdma_fmr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400651 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400652 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400653 mem_priv = IB_ACCESS_LOCAL_WRITE;
654 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400655 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
656 if (IS_ERR(ia->ri_bind_mem)) {
657 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400658 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400659 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400660 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500661 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400662 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400663 break;
664 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400665 printk(KERN_ERR "RPC: Unsupported memory "
666 "registration mode: %d\n", memreg);
667 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500668 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400669 }
Chuck Levera0ce85f2015-03-30 14:34:21 -0400670 dprintk("RPC: %s: memory registration strategy is '%s'\n",
671 __func__, ia->ri_ops->ro_displayname);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400672
673 /* Else will do memory reg/dereg for each chunk */
674 ia->ri_memreg_strategy = memreg;
675
Chuck Lever73806c82014-07-29 17:23:25 -0400676 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400677 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500678
679out3:
680 ib_dealloc_pd(ia->ri_pd);
681 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400682out2:
683 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400684 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400685out1:
686 return rc;
687}
688
689/*
690 * Clean up/close an IA.
691 * o if event handles and PD have been initialized, free them.
692 * o close the IA
693 */
694void
695rpcrdma_ia_close(struct rpcrdma_ia *ia)
696{
697 int rc;
698
699 dprintk("RPC: %s: entering\n", __func__);
700 if (ia->ri_bind_mem != NULL) {
701 rc = ib_dereg_mr(ia->ri_bind_mem);
702 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
703 __func__, rc);
704 }
Chuck Lever6d446982015-05-26 11:51:27 -0400705
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400706 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
707 if (ia->ri_id->qp)
708 rdma_destroy_qp(ia->ri_id);
709 rdma_destroy_id(ia->ri_id);
710 ia->ri_id = NULL;
711 }
Chuck Lever6d446982015-05-26 11:51:27 -0400712
713 /* If the pd is still busy, xprtrdma missed freeing a resource */
714 if (ia->ri_pd && !IS_ERR(ia->ri_pd))
715 WARN_ON(ib_dealloc_pd(ia->ri_pd));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400716}
717
718/*
719 * Create unconnected endpoint.
720 */
721int
722rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
723 struct rpcrdma_create_data_internal *cdata)
724{
Chuck Lever7bc79722015-01-21 11:03:27 -0500725 struct ib_device_attr *devattr = &ia->ri_devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400726 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400727 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400728
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400729 /* check provider's send/recv wr limits */
Chuck Lever7bc79722015-01-21 11:03:27 -0500730 if (cdata->max_requests > devattr->max_qp_wr)
731 cdata->max_requests = devattr->max_qp_wr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400732
733 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
734 ep->rep_attr.qp_context = ep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400735 ep->rep_attr.srq = NULL;
736 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
Chuck Lever3968cb52015-03-30 14:35:26 -0400737 rc = ia->ri_ops->ro_open(ia, ep, cdata);
738 if (rc)
739 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400740 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
741 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
742 ep->rep_attr.cap.max_recv_sge = 1;
743 ep->rep_attr.cap.max_inline_data = 0;
744 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
745 ep->rep_attr.qp_type = IB_QPT_RC;
746 ep->rep_attr.port_num = ~0;
747
Chuck Leverc05fbb52015-01-21 11:04:33 -0500748 if (cdata->padding) {
749 ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
750 GFP_KERNEL);
751 if (IS_ERR(ep->rep_padbuf))
752 return PTR_ERR(ep->rep_padbuf);
753 } else
754 ep->rep_padbuf = NULL;
755
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400756 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
757 "iovs: send %d recv %d\n",
758 __func__,
759 ep->rep_attr.cap.max_send_wr,
760 ep->rep_attr.cap.max_recv_wr,
761 ep->rep_attr.cap.max_send_sge,
762 ep->rep_attr.cap.max_recv_sge);
763
764 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400765 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500766 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
767 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
768 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400769 ep->rep_cqinit = 0;
770 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400771 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400772 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400773
Chuck Leverfc664482014-05-28 10:33:25 -0400774 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400775 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400776 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400777 if (IS_ERR(sendcq)) {
778 rc = PTR_ERR(sendcq);
779 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400780 __func__, rc);
781 goto out1;
782 }
783
Chuck Leverfc664482014-05-28 10:33:25 -0400784 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400785 if (rc) {
786 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
787 __func__, rc);
788 goto out2;
789 }
790
Chuck Leverfc664482014-05-28 10:33:25 -0400791 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400792 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400793 ep->rep_attr.cap.max_recv_wr + 1, 0);
794 if (IS_ERR(recvcq)) {
795 rc = PTR_ERR(recvcq);
796 dprintk("RPC: %s: failed to create recv CQ: %i\n",
797 __func__, rc);
798 goto out2;
799 }
800
801 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
802 if (rc) {
803 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
804 __func__, rc);
805 ib_destroy_cq(recvcq);
806 goto out2;
807 }
808
809 ep->rep_attr.send_cq = sendcq;
810 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400811
812 /* Initialize cma parameters */
813
814 /* RPC/RDMA does not use private data */
815 ep->rep_remote_cma.private_data = NULL;
816 ep->rep_remote_cma.private_data_len = 0;
817
818 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400819 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever7bc79722015-01-21 11:03:27 -0500820 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400821 ep->rep_remote_cma.responder_resources = 32;
822 else
Chuck Lever7bc79722015-01-21 11:03:27 -0500823 ep->rep_remote_cma.responder_resources =
824 devattr->max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400825
826 ep->rep_remote_cma.retry_count = 7;
827 ep->rep_remote_cma.flow_control = 0;
828 ep->rep_remote_cma.rnr_retry_count = 0;
829
830 return 0;
831
832out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400833 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400834 if (err)
835 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
836 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400837out1:
Chuck Leverc05fbb52015-01-21 11:04:33 -0500838 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400839 return rc;
840}
841
842/*
843 * rpcrdma_ep_destroy
844 *
845 * Disconnect and destroy endpoint. After this, the only
846 * valid operations on the ep are to free it (if dynamically
847 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400848 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400849void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400850rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
851{
852 int rc;
853
854 dprintk("RPC: %s: entering, connected is %d\n",
855 __func__, ep->rep_connected);
856
Chuck Lever254f91e2014-05-28 10:32:17 -0400857 cancel_delayed_work_sync(&ep->rep_connect_worker);
858
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400859 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400860 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400861 rdma_destroy_qp(ia->ri_id);
862 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400863 }
864
Chuck Leverc05fbb52015-01-21 11:04:33 -0500865 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400866
Chuck Leverfc664482014-05-28 10:33:25 -0400867 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
868 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
869 if (rc)
870 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
871 __func__, rc);
872
873 rpcrdma_clean_cq(ep->rep_attr.send_cq);
874 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400875 if (rc)
876 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
877 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400878}
879
880/*
881 * Connect unconnected endpoint.
882 */
883int
884rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
885{
Chuck Lever73806c82014-07-29 17:23:25 -0400886 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400887 int rc = 0;
888 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400889
Tom Talpeyc0555512008-10-10 11:32:45 -0400890 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400891 struct rpcrdma_xprt *xprt;
892retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400893 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400894
895 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400896 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400897
898 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
Chuck Lever31a701a2015-03-30 14:35:07 -0400899 ia->ri_ops->ro_reset(xprt);
900
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400901 id = rpcrdma_create_id(xprt, ia,
902 (struct sockaddr *)&xprt->rx_data.addr);
903 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400904 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400905 goto out;
906 }
907 /* TEMP TEMP TEMP - fail if new device:
908 * Deregister/remarshal *all* requests!
909 * Close and recreate adapter, pd, etc!
910 * Re-determine all attributes still sane!
911 * More stuff I haven't thought of!
912 * Rrrgh!
913 */
914 if (ia->ri_id->device != id->device) {
915 printk("RPC: %s: can't reconnect on "
916 "different device!\n", __func__);
917 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400918 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400919 goto out;
920 }
921 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400922 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
923 if (rc) {
924 dprintk("RPC: %s: rdma_create_qp failed %i\n",
925 __func__, rc);
926 rdma_destroy_id(id);
927 rc = -ENETUNREACH;
928 goto out;
929 }
Chuck Lever73806c82014-07-29 17:23:25 -0400930
931 write_lock(&ia->ri_qplock);
932 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400933 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400934 write_unlock(&ia->ri_qplock);
935
936 rdma_destroy_qp(old);
937 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400938 } else {
939 dprintk("RPC: %s: connecting...\n", __func__);
940 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
941 if (rc) {
942 dprintk("RPC: %s: rdma_create_qp failed %i\n",
943 __func__, rc);
944 /* do not update ep->rep_connected */
945 return -ENETUNREACH;
946 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400947 }
948
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400949 ep->rep_connected = 0;
950
951 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
952 if (rc) {
953 dprintk("RPC: %s: rdma_connect() failed with %i\n",
954 __func__, rc);
955 goto out;
956 }
957
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400958 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
959
960 /*
961 * Check state. A non-peer reject indicates no listener
962 * (ECONNREFUSED), which may be a transient state. All
963 * others indicate a transport condition which has already
964 * undergone a best-effort.
965 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800966 if (ep->rep_connected == -ECONNREFUSED &&
967 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400968 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
969 goto retry;
970 }
971 if (ep->rep_connected <= 0) {
972 /* Sometimes, the only way to reliably connect to remote
973 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400974 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
975 (ep->rep_remote_cma.responder_resources == 0 ||
976 ep->rep_remote_cma.initiator_depth !=
977 ep->rep_remote_cma.responder_resources)) {
978 if (ep->rep_remote_cma.responder_resources == 0)
979 ep->rep_remote_cma.responder_resources = 1;
980 ep->rep_remote_cma.initiator_depth =
981 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400982 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400983 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400984 rc = ep->rep_connected;
985 } else {
986 dprintk("RPC: %s: connected\n", __func__);
987 }
988
989out:
990 if (rc)
991 ep->rep_connected = rc;
992 return rc;
993}
994
995/*
996 * rpcrdma_ep_disconnect
997 *
998 * This is separate from destroy to facilitate the ability
999 * to reconnect without recreating the endpoint.
1000 *
1001 * This call is not reentrant, and must not be made in parallel
1002 * on the same endpoint.
1003 */
Chuck Lever282191c2014-07-29 17:25:55 -04001004void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001005rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1006{
1007 int rc;
1008
Chuck Levera7bc2112014-07-29 17:23:52 -04001009 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001010 rc = rdma_disconnect(ia->ri_id);
1011 if (!rc) {
1012 /* returns without wait if not connected */
1013 wait_event_interruptible(ep->rep_connect_wait,
1014 ep->rep_connected != 1);
1015 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1016 (ep->rep_connected == 1) ? "still " : "dis");
1017 } else {
1018 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1019 ep->rep_connected = rc;
1020 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001021}
1022
Chuck Lever13924022015-01-21 11:03:52 -05001023static struct rpcrdma_req *
1024rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1025{
Chuck Lever13924022015-01-21 11:03:52 -05001026 struct rpcrdma_req *req;
Chuck Lever13924022015-01-21 11:03:52 -05001027
Chuck Lever85275c82015-01-21 11:04:16 -05001028 req = kzalloc(sizeof(*req), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001029 if (req == NULL)
Chuck Lever85275c82015-01-21 11:04:16 -05001030 return ERR_PTR(-ENOMEM);
Chuck Lever13924022015-01-21 11:03:52 -05001031
Chuck Lever13924022015-01-21 11:03:52 -05001032 req->rl_buffer = &r_xprt->rx_buf;
1033 return req;
Chuck Lever13924022015-01-21 11:03:52 -05001034}
1035
1036static struct rpcrdma_rep *
1037rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1038{
1039 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever13924022015-01-21 11:03:52 -05001040 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1041 struct rpcrdma_rep *rep;
1042 int rc;
1043
1044 rc = -ENOMEM;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001045 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001046 if (rep == NULL)
1047 goto out;
Chuck Lever13924022015-01-21 11:03:52 -05001048
Chuck Lever6b1184c2015-01-21 11:04:25 -05001049 rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
1050 GFP_KERNEL);
1051 if (IS_ERR(rep->rr_rdmabuf)) {
1052 rc = PTR_ERR(rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001053 goto out_free;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001054 }
Chuck Lever13924022015-01-21 11:03:52 -05001055
1056 rep->rr_buffer = &r_xprt->rx_buf;
1057 return rep;
1058
1059out_free:
1060 kfree(rep);
1061out:
1062 return ERR_PTR(rc);
1063}
1064
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001065int
Chuck Leverac920d02015-01-21 11:03:44 -05001066rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001067{
Chuck Leverac920d02015-01-21 11:03:44 -05001068 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1069 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1070 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001071 char *p;
Chuck Lever13924022015-01-21 11:03:52 -05001072 size_t len;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001073 int i, rc;
1074
1075 buf->rb_max_requests = cdata->max_requests;
1076 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001077
1078 /* Need to allocate:
1079 * 1. arrays for send and recv pointers
1080 * 2. arrays of struct rpcrdma_req to fill in pointers
1081 * 3. array of struct rpcrdma_rep for replies
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001082 * Send/recv buffers in req/rep need to be registered
1083 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001084 len = buf->rb_max_requests *
1085 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001086
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001087 p = kzalloc(len, GFP_KERNEL);
1088 if (p == NULL) {
1089 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1090 __func__, len);
1091 rc = -ENOMEM;
1092 goto out;
1093 }
1094 buf->rb_pool = p; /* for freeing it later */
1095
1096 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1097 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1098 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1099 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1100
Chuck Lever91e70e72015-03-30 14:34:58 -04001101 rc = ia->ri_ops->ro_init(r_xprt);
1102 if (rc)
1103 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001104
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001105 for (i = 0; i < buf->rb_max_requests; i++) {
1106 struct rpcrdma_req *req;
1107 struct rpcrdma_rep *rep;
1108
Chuck Lever13924022015-01-21 11:03:52 -05001109 req = rpcrdma_create_req(r_xprt);
1110 if (IS_ERR(req)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001111 dprintk("RPC: %s: request buffer %d alloc"
1112 " failed\n", __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001113 rc = PTR_ERR(req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001114 goto out;
1115 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001116 buf->rb_send_bufs[i] = req;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001117
Chuck Lever13924022015-01-21 11:03:52 -05001118 rep = rpcrdma_create_rep(r_xprt);
1119 if (IS_ERR(rep)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001120 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1121 __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001122 rc = PTR_ERR(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001123 goto out;
1124 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001125 buf->rb_recv_bufs[i] = rep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001126 }
Chuck Lever13924022015-01-21 11:03:52 -05001127
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001128 return 0;
1129out:
1130 rpcrdma_buffer_destroy(buf);
1131 return rc;
1132}
1133
Chuck Lever2e845222014-07-29 17:25:38 -04001134static void
Chuck Lever13924022015-01-21 11:03:52 -05001135rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1136{
1137 if (!rep)
1138 return;
1139
Chuck Lever6b1184c2015-01-21 11:04:25 -05001140 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001141 kfree(rep);
1142}
1143
1144static void
1145rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1146{
1147 if (!req)
1148 return;
1149
Chuck Lever0ca77dc2015-01-21 11:04:08 -05001150 rpcrdma_free_regbuf(ia, req->rl_sendbuf);
Chuck Lever85275c82015-01-21 11:04:16 -05001151 rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001152 kfree(req);
1153}
1154
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001155void
1156rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1157{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001158 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001159 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001160
1161 /* clean up in reverse order from create
1162 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001163 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001164 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001165 */
1166 dprintk("RPC: %s: entering\n", __func__);
1167
1168 for (i = 0; i < buf->rb_max_requests; i++) {
Chuck Lever13924022015-01-21 11:03:52 -05001169 if (buf->rb_recv_bufs)
1170 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1171 if (buf->rb_send_bufs)
1172 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001173 }
1174
Chuck Lever4561f342015-03-30 14:35:17 -04001175 ia->ri_ops->ro_destroy(buf);
Allen Andrews4034ba02014-05-28 10:32:09 -04001176
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001177 kfree(buf->rb_pool);
1178}
1179
Chuck Leverc2922c02014-07-29 17:24:36 -04001180/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1181 * some req segments uninitialized.
1182 */
1183static void
1184rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1185{
1186 if (*mw) {
1187 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1188 *mw = NULL;
1189 }
1190}
1191
1192/* Cycle mw's back in reverse order, and "spin" them.
1193 * This delays and scrambles reuse as much as possible.
1194 */
1195static void
1196rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1197{
1198 struct rpcrdma_mr_seg *seg = req->rl_segments;
1199 struct rpcrdma_mr_seg *seg1 = seg;
1200 int i;
1201
1202 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001203 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1204 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001205}
1206
1207static void
1208rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1209{
1210 buf->rb_send_bufs[--buf->rb_send_index] = req;
1211 req->rl_niovs = 0;
1212 if (req->rl_reply) {
1213 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1214 req->rl_reply->rr_func = NULL;
1215 req->rl_reply = NULL;
1216 }
1217}
1218
Chuck Lever6814bae2015-03-30 14:34:48 -04001219/* rpcrdma_unmap_one() was already done during deregistration.
Chuck Leverddb6beb2014-07-29 17:24:54 -04001220 * Redo only the ib_post_send().
1221 */
1222static void
1223rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1224{
1225 struct rpcrdma_xprt *r_xprt =
1226 container_of(ia, struct rpcrdma_xprt, rx_ia);
1227 struct ib_send_wr invalidate_wr, *bad_wr;
1228 int rc;
1229
1230 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1231
1232 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001233 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001234
1235 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1236 invalidate_wr.wr_id = (unsigned long)(void *)r;
1237 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001238 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1239 DECR_CQCOUNT(&r_xprt->rx_ep);
1240
1241 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1242 __func__, r, r->r.frmr.fr_mr->rkey);
1243
1244 read_lock(&ia->ri_qplock);
1245 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1246 read_unlock(&ia->ri_qplock);
1247 if (rc) {
1248 /* Force rpcrdma_buffer_get() to retry */
1249 r->r.frmr.fr_state = FRMR_IS_STALE;
1250 dprintk("RPC: %s: ib_post_send failed, %i\n",
1251 __func__, rc);
1252 }
1253}
1254
1255static void
1256rpcrdma_retry_flushed_linv(struct list_head *stale,
1257 struct rpcrdma_buffer *buf)
1258{
1259 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1260 struct list_head *pos;
1261 struct rpcrdma_mw *r;
1262 unsigned long flags;
1263
1264 list_for_each(pos, stale) {
1265 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1266 rpcrdma_retry_local_inv(r, ia);
1267 }
1268
1269 spin_lock_irqsave(&buf->rb_lock, flags);
1270 list_splice_tail(stale, &buf->rb_mws);
1271 spin_unlock_irqrestore(&buf->rb_lock, flags);
1272}
1273
Chuck Leverc2922c02014-07-29 17:24:36 -04001274static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001275rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1276 struct list_head *stale)
1277{
1278 struct rpcrdma_mw *r;
1279 int i;
1280
1281 i = RPCRDMA_MAX_SEGS - 1;
1282 while (!list_empty(&buf->rb_mws)) {
1283 r = list_entry(buf->rb_mws.next,
1284 struct rpcrdma_mw, mw_list);
1285 list_del(&r->mw_list);
1286 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1287 list_add(&r->mw_list, stale);
1288 continue;
1289 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001290 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001291 if (unlikely(i-- == 0))
1292 return req; /* Success */
1293 }
1294
1295 /* Not enough entries on rb_mws for this req */
1296 rpcrdma_buffer_put_sendbuf(req, buf);
1297 rpcrdma_buffer_put_mrs(req, buf);
1298 return NULL;
1299}
1300
1301static struct rpcrdma_req *
1302rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001303{
1304 struct rpcrdma_mw *r;
1305 int i;
1306
1307 i = RPCRDMA_MAX_SEGS - 1;
1308 while (!list_empty(&buf->rb_mws)) {
1309 r = list_entry(buf->rb_mws.next,
1310 struct rpcrdma_mw, mw_list);
1311 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001312 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001313 if (unlikely(i-- == 0))
1314 return req; /* Success */
1315 }
1316
1317 /* Not enough entries on rb_mws for this req */
1318 rpcrdma_buffer_put_sendbuf(req, buf);
1319 rpcrdma_buffer_put_mrs(req, buf);
1320 return NULL;
1321}
1322
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001323/*
1324 * Get a set of request/reply buffers.
1325 *
1326 * Reply buffer (if needed) is attached to send buffer upon return.
1327 * Rule:
1328 * rb_send_index and rb_recv_index MUST always be pointing to the
1329 * *next* available buffer (non-NULL). They are incremented after
1330 * removing buffers, and decremented *before* returning them.
1331 */
1332struct rpcrdma_req *
1333rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1334{
Chuck Leverc2922c02014-07-29 17:24:36 -04001335 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001336 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001337 struct rpcrdma_req *req;
1338 unsigned long flags;
1339
1340 spin_lock_irqsave(&buffers->rb_lock, flags);
1341 if (buffers->rb_send_index == buffers->rb_max_requests) {
1342 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1343 dprintk("RPC: %s: out of request buffers\n", __func__);
1344 return ((struct rpcrdma_req *)NULL);
1345 }
1346
1347 req = buffers->rb_send_bufs[buffers->rb_send_index];
1348 if (buffers->rb_send_index < buffers->rb_recv_index) {
1349 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1350 __func__,
1351 buffers->rb_recv_index - buffers->rb_send_index);
1352 req->rl_reply = NULL;
1353 } else {
1354 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1355 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1356 }
1357 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001358
1359 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001360 switch (ia->ri_memreg_strategy) {
1361 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001362 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1363 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001364 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001365 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001366 break;
1367 default:
1368 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001369 }
1370 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001371 if (!list_empty(&stale))
1372 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001373 return req;
1374}
1375
1376/*
1377 * Put request/reply buffers back into pool.
1378 * Pre-decrement counter/array index.
1379 */
1380void
1381rpcrdma_buffer_put(struct rpcrdma_req *req)
1382{
1383 struct rpcrdma_buffer *buffers = req->rl_buffer;
1384 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001385 unsigned long flags;
1386
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001387 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001388 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001389 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001390 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001391 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001392 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001393 break;
1394 default:
1395 break;
1396 }
1397 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1398}
1399
1400/*
1401 * Recover reply buffers from pool.
1402 * This happens when recovering from error conditions.
1403 * Post-increment counter/array index.
1404 */
1405void
1406rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1407{
1408 struct rpcrdma_buffer *buffers = req->rl_buffer;
1409 unsigned long flags;
1410
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001411 spin_lock_irqsave(&buffers->rb_lock, flags);
1412 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1413 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1414 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1415 }
1416 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1417}
1418
1419/*
1420 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001421 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001422 */
1423void
1424rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1425{
1426 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1427 unsigned long flags;
1428
1429 rep->rr_func = NULL;
1430 spin_lock_irqsave(&buffers->rb_lock, flags);
1431 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1432 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1433}
1434
1435/*
1436 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1437 */
1438
Chuck Leverd6547882015-03-30 14:35:44 -04001439void
1440rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
1441{
1442 dprintk("RPC: map_one: offset %p iova %llx len %zu\n",
1443 seg->mr_offset,
1444 (unsigned long long)seg->mr_dma, seg->mr_dmalen);
1445}
1446
Chuck Leverdf515ca2015-01-21 11:04:41 -05001447static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001448rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1449 struct ib_mr **mrp, struct ib_sge *iov)
1450{
1451 struct ib_phys_buf ipb;
1452 struct ib_mr *mr;
1453 int rc;
1454
1455 /*
1456 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1457 */
1458 iov->addr = ib_dma_map_single(ia->ri_id->device,
1459 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001460 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1461 return -ENOMEM;
1462
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001463 iov->length = len;
1464
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001465 if (ia->ri_have_dma_lkey) {
1466 *mrp = NULL;
1467 iov->lkey = ia->ri_dma_lkey;
1468 return 0;
1469 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001470 *mrp = NULL;
1471 iov->lkey = ia->ri_bind_mem->lkey;
1472 return 0;
1473 }
1474
1475 ipb.addr = iov->addr;
1476 ipb.size = iov->length;
1477 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1478 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1479
1480 dprintk("RPC: %s: phys convert: 0x%llx "
1481 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001482 __func__, (unsigned long long)ipb.addr,
1483 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001484
1485 if (IS_ERR(mr)) {
1486 *mrp = NULL;
1487 rc = PTR_ERR(mr);
1488 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1489 } else {
1490 *mrp = mr;
1491 iov->lkey = mr->lkey;
1492 rc = 0;
1493 }
1494
1495 return rc;
1496}
1497
Chuck Leverdf515ca2015-01-21 11:04:41 -05001498static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001499rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1500 struct ib_mr *mr, struct ib_sge *iov)
1501{
1502 int rc;
1503
1504 ib_dma_unmap_single(ia->ri_id->device,
1505 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1506
1507 if (NULL == mr)
1508 return 0;
1509
1510 rc = ib_dereg_mr(mr);
1511 if (rc)
1512 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1513 return rc;
1514}
1515
Chuck Lever9128c3e2015-01-21 11:04:00 -05001516/**
1517 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1518 * @ia: controlling rpcrdma_ia
1519 * @size: size of buffer to be allocated, in bytes
1520 * @flags: GFP flags
1521 *
1522 * Returns pointer to private header of an area of internally
1523 * registered memory, or an ERR_PTR. The registered buffer follows
1524 * the end of the private header.
1525 *
1526 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1527 * receiving the payload of RDMA RECV operations. regbufs are not
1528 * used for RDMA READ/WRITE operations, thus are registered only for
1529 * LOCAL access.
1530 */
1531struct rpcrdma_regbuf *
1532rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
1533{
1534 struct rpcrdma_regbuf *rb;
1535 int rc;
1536
1537 rc = -ENOMEM;
1538 rb = kmalloc(sizeof(*rb) + size, flags);
1539 if (rb == NULL)
1540 goto out;
1541
1542 rb->rg_size = size;
1543 rb->rg_owner = NULL;
1544 rc = rpcrdma_register_internal(ia, rb->rg_base, size,
1545 &rb->rg_mr, &rb->rg_iov);
1546 if (rc)
1547 goto out_free;
1548
1549 return rb;
1550
1551out_free:
1552 kfree(rb);
1553out:
1554 return ERR_PTR(rc);
1555}
1556
1557/**
1558 * rpcrdma_free_regbuf - deregister and free registered buffer
1559 * @ia: controlling rpcrdma_ia
1560 * @rb: regbuf to be deregistered and freed
1561 */
1562void
1563rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1564{
1565 if (rb) {
1566 rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
1567 kfree(rb);
1568 }
1569}
1570
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001571/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001572 * Prepost any receive buffer, then post send.
1573 *
1574 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1575 */
1576int
1577rpcrdma_ep_post(struct rpcrdma_ia *ia,
1578 struct rpcrdma_ep *ep,
1579 struct rpcrdma_req *req)
1580{
1581 struct ib_send_wr send_wr, *send_wr_fail;
1582 struct rpcrdma_rep *rep = req->rl_reply;
1583 int rc;
1584
1585 if (rep) {
1586 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1587 if (rc)
1588 goto out;
1589 req->rl_reply = NULL;
1590 }
1591
1592 send_wr.next = NULL;
Chuck Levere46ac342015-03-30 14:35:35 -04001593 send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001594 send_wr.sg_list = req->rl_send_iov;
1595 send_wr.num_sge = req->rl_niovs;
1596 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001597 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1598 ib_dma_sync_single_for_device(ia->ri_id->device,
1599 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1600 DMA_TO_DEVICE);
1601 ib_dma_sync_single_for_device(ia->ri_id->device,
1602 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1603 DMA_TO_DEVICE);
1604 ib_dma_sync_single_for_device(ia->ri_id->device,
1605 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1606 DMA_TO_DEVICE);
1607
1608 if (DECR_CQCOUNT(ep) > 0)
1609 send_wr.send_flags = 0;
1610 else { /* Provider must take a send completion every now and then */
1611 INIT_CQCOUNT(ep);
1612 send_wr.send_flags = IB_SEND_SIGNALED;
1613 }
1614
1615 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1616 if (rc)
1617 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1618 rc);
1619out:
1620 return rc;
1621}
1622
1623/*
1624 * (Re)post a receive buffer.
1625 */
1626int
1627rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1628 struct rpcrdma_ep *ep,
1629 struct rpcrdma_rep *rep)
1630{
1631 struct ib_recv_wr recv_wr, *recv_wr_fail;
1632 int rc;
1633
1634 recv_wr.next = NULL;
1635 recv_wr.wr_id = (u64) (unsigned long) rep;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001636 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001637 recv_wr.num_sge = 1;
1638
1639 ib_dma_sync_single_for_cpu(ia->ri_id->device,
Chuck Lever6b1184c2015-01-21 11:04:25 -05001640 rdmab_addr(rep->rr_rdmabuf),
1641 rdmab_length(rep->rr_rdmabuf),
1642 DMA_BIDIRECTIONAL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001643
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001644 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1645
1646 if (rc)
1647 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1648 rc);
1649 return rc;
1650}
Chuck Lever43e95982014-07-29 17:23:34 -04001651
Chuck Lever1c9351e2015-03-30 14:34:30 -04001652/* How many chunk list items fit within our inline buffers?
Chuck Lever43e95982014-07-29 17:23:34 -04001653 */
Chuck Lever1c9351e2015-03-30 14:34:30 -04001654unsigned int
1655rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
Chuck Lever43e95982014-07-29 17:23:34 -04001656{
1657 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever1c9351e2015-03-30 14:34:30 -04001658 int bytes, segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001659
Chuck Lever1c9351e2015-03-30 14:34:30 -04001660 bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
1661 bytes -= RPCRDMA_HDRLEN_MIN;
1662 if (bytes < sizeof(struct rpcrdma_segment) * 2) {
1663 pr_warn("RPC: %s: inline threshold too small\n",
1664 __func__);
1665 return 0;
Chuck Lever43e95982014-07-29 17:23:34 -04001666 }
Chuck Lever1c9351e2015-03-30 14:34:30 -04001667
1668 segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
1669 dprintk("RPC: %s: max chunk list size = %d segments\n",
1670 __func__, segments);
1671 return segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001672}