blob: b697b3ed6273cc06b21bc714b32630f5fea8e080 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever0dd39ca2015-03-30 14:33:43 -040053#include <linux/sunrpc/addr.h>
Chuck Lever65866f82014-05-28 10:33:59 -040054#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040055
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040056#include "xprt_rdma.h"
57
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040058/*
59 * Globals/Macros
60 */
61
Jeff Laytonf895b252014-11-17 16:58:04 -050062#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040063# define RPCDBG_FACILITY RPCDBG_TRANS
64#endif
65
66/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95
96 if (func)
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 }
103 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
104}
105
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107
Chuck Lever7ff11de2014-11-08 20:15:01 -0500108static const char * const async_event[] = {
109 "CQ error",
110 "QP fatal error",
111 "QP request error",
112 "QP access error",
113 "communication established",
114 "send queue drained",
115 "path migration successful",
116 "path mig error",
117 "device fatal error",
118 "port active",
119 "port error",
120 "LID change",
121 "P_key change",
122 "SM change",
123 "SRQ error",
124 "SRQ limit reached",
125 "last WQE reached",
126 "client reregister",
127 "GID change",
128};
129
130#define ASYNC_MSG(status) \
131 ((status) < ARRAY_SIZE(async_event) ? \
132 async_event[(status)] : "unknown async error")
133
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400134static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500135rpcrdma_schedule_tasklet(struct list_head *sched_list)
136{
137 unsigned long flags;
138
139 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
140 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
141 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
142 tasklet_schedule(&rpcrdma_tasklet_g);
143}
144
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400145static void
146rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
147{
148 struct rpcrdma_ep *ep = context;
149
Chuck Lever7ff11de2014-11-08 20:15:01 -0500150 pr_err("RPC: %s: %s on device %s ep %p\n",
151 __func__, ASYNC_MSG(event->event),
152 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400153 if (ep->rep_connected == 1) {
154 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500155 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400156 wake_up_all(&ep->rep_connect_wait);
157 }
158}
159
160static void
161rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
162{
163 struct rpcrdma_ep *ep = context;
164
Chuck Lever7ff11de2014-11-08 20:15:01 -0500165 pr_err("RPC: %s: %s on device %s ep %p\n",
166 __func__, ASYNC_MSG(event->event),
167 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400168 if (ep->rep_connected == 1) {
169 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500170 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400171 wake_up_all(&ep->rep_connect_wait);
172 }
173}
174
Chuck Lever85024272015-01-21 11:02:04 -0500175static const char * const wc_status[] = {
176 "success",
177 "local length error",
178 "local QP operation error",
179 "local EE context operation error",
180 "local protection error",
181 "WR flushed",
182 "memory management operation error",
183 "bad response error",
184 "local access error",
185 "remote invalid request error",
186 "remote access error",
187 "remote operation error",
188 "transport retry counter exceeded",
189 "RNR retrycounter exceeded",
190 "local RDD violation error",
191 "remove invalid RD request",
192 "operation aborted",
193 "invalid EE context number",
194 "invalid EE context state",
195 "fatal error",
196 "response timeout error",
197 "general error",
198};
199
200#define COMPLETION_MSG(status) \
201 ((status) < ARRAY_SIZE(wc_status) ? \
202 wc_status[(status)] : "unexpected completion error")
203
Chuck Leverfc664482014-05-28 10:33:25 -0400204static void
205rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400206{
Chuck Lever85024272015-01-21 11:02:04 -0500207 if (likely(wc->status == IB_WC_SUCCESS))
Chuck Leverfc664482014-05-28 10:33:25 -0400208 return;
Chuck Lever85024272015-01-21 11:02:04 -0500209
210 /* WARNING: Only wr_id and status are reliable at this point */
211 if (wc->wr_id == 0ULL) {
212 if (wc->status != IB_WC_WR_FLUSH_ERR)
213 pr_err("RPC: %s: SEND: %s\n",
214 __func__, COMPLETION_MSG(wc->status));
215 } else {
216 struct rpcrdma_mw *r;
217
218 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
219 r->r.frmr.fr_state = FRMR_IS_STALE;
220 pr_err("RPC: %s: frmr %p (stale): %s\n",
221 __func__, r, COMPLETION_MSG(wc->status));
222 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400223}
224
Chuck Leverfc664482014-05-28 10:33:25 -0400225static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400226rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400227{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400228 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400229 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400230
Chuck Lever8301a2c2014-05-28 10:33:51 -0400231 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400232 do {
233 wcs = ep->rep_send_wcs;
234
235 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
236 if (rc <= 0)
237 return rc;
238
239 count = rc;
240 while (count-- > 0)
241 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400242 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400243 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400244}
245
246/*
Chuck Leverfc664482014-05-28 10:33:25 -0400247 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400248 *
Chuck Leverfc664482014-05-28 10:33:25 -0400249 * Send events are typically suppressed and thus do not result
250 * in an upcall. Occasionally one is signaled, however. This
251 * prevents the provider's completion queue from wrapping and
252 * losing a completion.
253 */
254static void
255rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
256{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400257 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400258 int rc;
259
Chuck Lever1c00dd02014-05-28 10:33:42 -0400260 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400261 if (rc) {
262 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
263 __func__, rc);
264 return;
265 }
266
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400267 rc = ib_req_notify_cq(cq,
268 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
269 if (rc == 0)
270 return;
271 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400272 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
273 __func__, rc);
274 return;
275 }
276
Chuck Lever1c00dd02014-05-28 10:33:42 -0400277 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400278}
279
280static void
Chuck Leverbb961932014-07-29 17:25:46 -0400281rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400282{
283 struct rpcrdma_rep *rep =
284 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
285
Chuck Lever85024272015-01-21 11:02:04 -0500286 /* WARNING: Only wr_id and status are reliable at this point */
287 if (wc->status != IB_WC_SUCCESS)
288 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400289
Chuck Lever85024272015-01-21 11:02:04 -0500290 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400291 if (wc->opcode != IB_WC_RECV)
292 return;
293
Chuck Lever85024272015-01-21 11:02:04 -0500294 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
295 __func__, rep, wc->byte_len);
296
Chuck Leverfc664482014-05-28 10:33:25 -0400297 rep->rr_len = wc->byte_len;
298 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
Chuck Lever6b1184c2015-01-21 11:04:25 -0500299 rdmab_addr(rep->rr_rdmabuf),
300 rep->rr_len, DMA_FROM_DEVICE);
301 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
Chuck Leverfc664482014-05-28 10:33:25 -0400302
303out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400304 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500305 return;
306out_fail:
307 if (wc->status != IB_WC_WR_FLUSH_ERR)
308 pr_err("RPC: %s: rep %p: %s\n",
309 __func__, rep, COMPLETION_MSG(wc->status));
310 rep->rr_len = ~0U;
311 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400312}
313
314static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400315rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400316{
Chuck Leverbb961932014-07-29 17:25:46 -0400317 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400318 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400319 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400320
Chuck Leverbb961932014-07-29 17:25:46 -0400321 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400322 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400323 do {
324 wcs = ep->rep_recv_wcs;
325
326 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
327 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400328 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400329
330 count = rc;
331 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400332 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400333 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400334 rc = 0;
335
336out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500337 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400338 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400339}
340
341/*
342 * Handle receive completions.
343 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400344 * It is reentrant but processes single events in order to maintain
345 * ordering of receives to keep server credits.
346 *
347 * It is the responsibility of the scheduled tasklet to return
348 * recv buffers to the pool. NOTE: this affects synchronization of
349 * connection shutdown. That is, the structures required for
350 * the completion of the reply handler must remain intact until
351 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400352 */
353static void
Chuck Leverfc664482014-05-28 10:33:25 -0400354rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400355{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400356 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400357 int rc;
358
Chuck Lever1c00dd02014-05-28 10:33:42 -0400359 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400360 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400361 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400362 __func__, rc);
363 return;
364 }
365
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400366 rc = ib_req_notify_cq(cq,
367 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
368 if (rc == 0)
369 return;
370 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400371 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
372 __func__, rc);
373 return;
374 }
375
Chuck Lever1c00dd02014-05-28 10:33:42 -0400376 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400377}
378
Chuck Levera7bc2112014-07-29 17:23:52 -0400379static void
380rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
381{
Chuck Lever5c166be2014-11-08 20:14:45 -0500382 struct ib_wc wc;
383 LIST_HEAD(sched_list);
384
385 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
386 rpcrdma_recvcq_process_wc(&wc, &sched_list);
387 if (!list_empty(&sched_list))
388 rpcrdma_schedule_tasklet(&sched_list);
389 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
390 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400391}
392
Jeff Laytonf895b252014-11-17 16:58:04 -0500393#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400394static const char * const conn[] = {
395 "address resolved",
396 "address error",
397 "route resolved",
398 "route error",
399 "connect request",
400 "connect response",
401 "connect error",
402 "unreachable",
403 "rejected",
404 "established",
405 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400406 "device removal",
407 "multicast join",
408 "multicast error",
409 "address change",
410 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400411};
Chuck Lever8079fb72014-07-29 17:26:12 -0400412
413#define CONNECTION_MSG(status) \
414 ((status) < ARRAY_SIZE(conn) ? \
415 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400416#endif
417
418static int
419rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
420{
421 struct rpcrdma_xprt *xprt = id->context;
422 struct rpcrdma_ia *ia = &xprt->rx_ia;
423 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500424#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400425 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800426#endif
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500427 struct ib_qp_attr *attr = &ia->ri_qp_attr;
428 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400429 int connstate = 0;
430
431 switch (event->event) {
432 case RDMA_CM_EVENT_ADDR_RESOLVED:
433 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400434 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400435 complete(&ia->ri_done);
436 break;
437 case RDMA_CM_EVENT_ADDR_ERROR:
438 ia->ri_async_rc = -EHOSTUNREACH;
439 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
440 __func__, ep);
441 complete(&ia->ri_done);
442 break;
443 case RDMA_CM_EVENT_ROUTE_ERROR:
444 ia->ri_async_rc = -ENETUNREACH;
445 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
446 __func__, ep);
447 complete(&ia->ri_done);
448 break;
449 case RDMA_CM_EVENT_ESTABLISHED:
450 connstate = 1;
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500451 ib_query_qp(ia->ri_id->qp, attr,
452 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
453 iattr);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400454 dprintk("RPC: %s: %d responder resources"
455 " (%d initiator)\n",
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500456 __func__, attr->max_dest_rd_atomic,
457 attr->max_rd_atomic);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400458 goto connected;
459 case RDMA_CM_EVENT_CONNECT_ERROR:
460 connstate = -ENOTCONN;
461 goto connected;
462 case RDMA_CM_EVENT_UNREACHABLE:
463 connstate = -ENETDOWN;
464 goto connected;
465 case RDMA_CM_EVENT_REJECTED:
466 connstate = -ECONNREFUSED;
467 goto connected;
468 case RDMA_CM_EVENT_DISCONNECTED:
469 connstate = -ECONNABORTED;
470 goto connected;
471 case RDMA_CM_EVENT_DEVICE_REMOVAL:
472 connstate = -ENODEV;
473connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400474 dprintk("RPC: %s: %sconnected\n",
475 __func__, connstate > 0 ? "" : "dis");
476 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500477 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400478 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400479 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400480 default:
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400481 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
482 __func__, sap, rpc_get_port(sap), ep,
Chuck Lever8079fb72014-07-29 17:26:12 -0400483 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400484 break;
485 }
486
Jeff Laytonf895b252014-11-17 16:58:04 -0500487#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400488 if (connstate == 1) {
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500489 int ird = attr->max_dest_rd_atomic;
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400490 int tird = ep->rep_remote_cma.responder_resources;
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400491
Chuck Levera0ce85f2015-03-30 14:34:21 -0400492 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400493 sap, rpc_get_port(sap),
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400494 ia->ri_id->device->name,
Chuck Levera0ce85f2015-03-30 14:34:21 -0400495 ia->ri_ops->ro_displayname,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400496 xprt->rx_buf.rb_max_requests,
497 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
498 } else if (connstate < 0) {
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400499 pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
500 sap, rpc_get_port(sap), connstate);
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400501 }
502#endif
503
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400504 return 0;
505}
506
507static struct rdma_cm_id *
508rpcrdma_create_id(struct rpcrdma_xprt *xprt,
509 struct rpcrdma_ia *ia, struct sockaddr *addr)
510{
511 struct rdma_cm_id *id;
512 int rc;
513
Tom Talpey1a954052008-10-09 15:01:31 -0400514 init_completion(&ia->ri_done);
515
Sean Heftyb26f9b92010-04-01 17:08:41 +0000516 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400517 if (IS_ERR(id)) {
518 rc = PTR_ERR(id);
519 dprintk("RPC: %s: rdma_create_id() failed %i\n",
520 __func__, rc);
521 return id;
522 }
523
Tom Talpey5675add2008-10-09 15:01:41 -0400524 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400525 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
526 if (rc) {
527 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
528 __func__, rc);
529 goto out;
530 }
Tom Talpey5675add2008-10-09 15:01:41 -0400531 wait_for_completion_interruptible_timeout(&ia->ri_done,
532 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400533 rc = ia->ri_async_rc;
534 if (rc)
535 goto out;
536
Tom Talpey5675add2008-10-09 15:01:41 -0400537 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400538 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
539 if (rc) {
540 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
541 __func__, rc);
542 goto out;
543 }
Tom Talpey5675add2008-10-09 15:01:41 -0400544 wait_for_completion_interruptible_timeout(&ia->ri_done,
545 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400546 rc = ia->ri_async_rc;
547 if (rc)
548 goto out;
549
550 return id;
551
552out:
553 rdma_destroy_id(id);
554 return ERR_PTR(rc);
555}
556
557/*
558 * Drain any cq, prior to teardown.
559 */
560static void
561rpcrdma_clean_cq(struct ib_cq *cq)
562{
563 struct ib_wc wc;
564 int count = 0;
565
566 while (1 == ib_poll_cq(cq, 1, &wc))
567 ++count;
568
569 if (count)
570 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
571 __func__, count, wc.opcode);
572}
573
574/*
575 * Exported functions.
576 */
577
578/*
579 * Open and initialize an Interface Adapter.
580 * o initializes fields of struct rpcrdma_ia, including
581 * interface and provider attributes and protection zone.
582 */
583int
584rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
585{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400586 int rc, mem_priv;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400587 struct rpcrdma_ia *ia = &xprt->rx_ia;
Chuck Lever7bc79722015-01-21 11:03:27 -0500588 struct ib_device_attr *devattr = &ia->ri_devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400589
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400590 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
591 if (IS_ERR(ia->ri_id)) {
592 rc = PTR_ERR(ia->ri_id);
593 goto out1;
594 }
595
596 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
597 if (IS_ERR(ia->ri_pd)) {
598 rc = PTR_ERR(ia->ri_pd);
599 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
600 __func__, rc);
601 goto out2;
602 }
603
Chuck Lever7bc79722015-01-21 11:03:27 -0500604 rc = ib_query_device(ia->ri_id->device, devattr);
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400605 if (rc) {
606 dprintk("RPC: %s: ib_query_device failed %d\n",
607 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500608 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400609 }
610
Chuck Lever7bc79722015-01-21 11:03:27 -0500611 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400612 ia->ri_have_dma_lkey = 1;
613 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
614 }
615
Chuck Leverf10eafd2014-05-28 10:32:51 -0400616 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400617 /* Requires both frmr reg and local dma lkey */
Chuck Lever41f97022015-03-30 14:34:12 -0400618 if (((devattr->device_cap_flags &
Tom Talpey3197d3092008-10-09 15:00:20 -0400619 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
Chuck Lever41f97022015-03-30 14:34:12 -0400620 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
621 (devattr->max_fast_reg_page_list_len == 0)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400622 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400623 "not supported by HCA\n", __func__);
624 memreg = RPCRDMA_MTHCAFMR;
Tom Talpey3197d3092008-10-09 15:00:20 -0400625 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400626 }
627 if (memreg == RPCRDMA_MTHCAFMR) {
628 if (!ia->ri_id->device->alloc_fmr) {
629 dprintk("RPC: %s: MTHCAFMR registration "
630 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400631 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400632 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400633 }
634
635 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400636 * Optionally obtain an underlying physical identity mapping in
637 * order to do a memory window-based bind. This base registration
638 * is protected from remote access - that is enabled only by binding
639 * for the specific bytes targeted during each RPC operation, and
640 * revoked after the corresponding completion similar to a storage
641 * adapter.
642 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400643 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400644 case RPCRDMA_FRMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400645 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400646 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400647 case RPCRDMA_ALLPHYSICAL:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400648 ia->ri_ops = &rpcrdma_physical_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400649 mem_priv = IB_ACCESS_LOCAL_WRITE |
650 IB_ACCESS_REMOTE_WRITE |
651 IB_ACCESS_REMOTE_READ;
652 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400653 case RPCRDMA_MTHCAFMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400654 ia->ri_ops = &rpcrdma_fmr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400655 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400656 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400657 mem_priv = IB_ACCESS_LOCAL_WRITE;
658 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400659 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
660 if (IS_ERR(ia->ri_bind_mem)) {
661 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400662 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400663 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400664 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500665 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400666 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400667 break;
668 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400669 printk(KERN_ERR "RPC: Unsupported memory "
670 "registration mode: %d\n", memreg);
671 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500672 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400673 }
Chuck Levera0ce85f2015-03-30 14:34:21 -0400674 dprintk("RPC: %s: memory registration strategy is '%s'\n",
675 __func__, ia->ri_ops->ro_displayname);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400676
677 /* Else will do memory reg/dereg for each chunk */
678 ia->ri_memreg_strategy = memreg;
679
Chuck Lever73806c82014-07-29 17:23:25 -0400680 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400681 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500682
683out3:
684 ib_dealloc_pd(ia->ri_pd);
685 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400686out2:
687 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400688 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400689out1:
690 return rc;
691}
692
693/*
694 * Clean up/close an IA.
695 * o if event handles and PD have been initialized, free them.
696 * o close the IA
697 */
698void
699rpcrdma_ia_close(struct rpcrdma_ia *ia)
700{
701 int rc;
702
703 dprintk("RPC: %s: entering\n", __func__);
704 if (ia->ri_bind_mem != NULL) {
705 rc = ib_dereg_mr(ia->ri_bind_mem);
706 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
707 __func__, rc);
708 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400709 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
710 if (ia->ri_id->qp)
711 rdma_destroy_qp(ia->ri_id);
712 rdma_destroy_id(ia->ri_id);
713 ia->ri_id = NULL;
714 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400715 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
716 rc = ib_dealloc_pd(ia->ri_pd);
717 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
718 __func__, rc);
719 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400720}
721
722/*
723 * Create unconnected endpoint.
724 */
725int
726rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
727 struct rpcrdma_create_data_internal *cdata)
728{
Chuck Lever7bc79722015-01-21 11:03:27 -0500729 struct ib_device_attr *devattr = &ia->ri_devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400730 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400731 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400732
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400733 /* check provider's send/recv wr limits */
Chuck Lever7bc79722015-01-21 11:03:27 -0500734 if (cdata->max_requests > devattr->max_qp_wr)
735 cdata->max_requests = devattr->max_qp_wr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400736
737 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
738 ep->rep_attr.qp_context = ep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400739 ep->rep_attr.srq = NULL;
740 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
Chuck Lever3968cb52015-03-30 14:35:26 -0400741 rc = ia->ri_ops->ro_open(ia, ep, cdata);
742 if (rc)
743 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400744 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
745 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
746 ep->rep_attr.cap.max_recv_sge = 1;
747 ep->rep_attr.cap.max_inline_data = 0;
748 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
749 ep->rep_attr.qp_type = IB_QPT_RC;
750 ep->rep_attr.port_num = ~0;
751
Chuck Leverc05fbb52015-01-21 11:04:33 -0500752 if (cdata->padding) {
753 ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
754 GFP_KERNEL);
755 if (IS_ERR(ep->rep_padbuf))
756 return PTR_ERR(ep->rep_padbuf);
757 } else
758 ep->rep_padbuf = NULL;
759
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400760 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
761 "iovs: send %d recv %d\n",
762 __func__,
763 ep->rep_attr.cap.max_send_wr,
764 ep->rep_attr.cap.max_recv_wr,
765 ep->rep_attr.cap.max_send_sge,
766 ep->rep_attr.cap.max_recv_sge);
767
768 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400769 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500770 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
771 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
772 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400773 ep->rep_cqinit = 0;
774 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400775 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400776 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400777
Chuck Leverfc664482014-05-28 10:33:25 -0400778 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400779 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400780 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400781 if (IS_ERR(sendcq)) {
782 rc = PTR_ERR(sendcq);
783 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400784 __func__, rc);
785 goto out1;
786 }
787
Chuck Leverfc664482014-05-28 10:33:25 -0400788 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400789 if (rc) {
790 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
791 __func__, rc);
792 goto out2;
793 }
794
Chuck Leverfc664482014-05-28 10:33:25 -0400795 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400796 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400797 ep->rep_attr.cap.max_recv_wr + 1, 0);
798 if (IS_ERR(recvcq)) {
799 rc = PTR_ERR(recvcq);
800 dprintk("RPC: %s: failed to create recv CQ: %i\n",
801 __func__, rc);
802 goto out2;
803 }
804
805 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
806 if (rc) {
807 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
808 __func__, rc);
809 ib_destroy_cq(recvcq);
810 goto out2;
811 }
812
813 ep->rep_attr.send_cq = sendcq;
814 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400815
816 /* Initialize cma parameters */
817
818 /* RPC/RDMA does not use private data */
819 ep->rep_remote_cma.private_data = NULL;
820 ep->rep_remote_cma.private_data_len = 0;
821
822 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400823 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever7bc79722015-01-21 11:03:27 -0500824 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400825 ep->rep_remote_cma.responder_resources = 32;
826 else
Chuck Lever7bc79722015-01-21 11:03:27 -0500827 ep->rep_remote_cma.responder_resources =
828 devattr->max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400829
830 ep->rep_remote_cma.retry_count = 7;
831 ep->rep_remote_cma.flow_control = 0;
832 ep->rep_remote_cma.rnr_retry_count = 0;
833
834 return 0;
835
836out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400837 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400838 if (err)
839 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
840 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400841out1:
Chuck Leverc05fbb52015-01-21 11:04:33 -0500842 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400843 return rc;
844}
845
846/*
847 * rpcrdma_ep_destroy
848 *
849 * Disconnect and destroy endpoint. After this, the only
850 * valid operations on the ep are to free it (if dynamically
851 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400852 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400853void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400854rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
855{
856 int rc;
857
858 dprintk("RPC: %s: entering, connected is %d\n",
859 __func__, ep->rep_connected);
860
Chuck Lever254f91e2014-05-28 10:32:17 -0400861 cancel_delayed_work_sync(&ep->rep_connect_worker);
862
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400863 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400864 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400865 rdma_destroy_qp(ia->ri_id);
866 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400867 }
868
Chuck Leverc05fbb52015-01-21 11:04:33 -0500869 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400870
Chuck Leverfc664482014-05-28 10:33:25 -0400871 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
872 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
873 if (rc)
874 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
875 __func__, rc);
876
877 rpcrdma_clean_cq(ep->rep_attr.send_cq);
878 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400879 if (rc)
880 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
881 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400882}
883
884/*
885 * Connect unconnected endpoint.
886 */
887int
888rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
889{
Chuck Lever73806c82014-07-29 17:23:25 -0400890 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400891 int rc = 0;
892 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400893
Tom Talpeyc0555512008-10-10 11:32:45 -0400894 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400895 struct rpcrdma_xprt *xprt;
896retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400897 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400898
899 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400900 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400901
902 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
Chuck Lever31a701a2015-03-30 14:35:07 -0400903 ia->ri_ops->ro_reset(xprt);
904
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400905 id = rpcrdma_create_id(xprt, ia,
906 (struct sockaddr *)&xprt->rx_data.addr);
907 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400908 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400909 goto out;
910 }
911 /* TEMP TEMP TEMP - fail if new device:
912 * Deregister/remarshal *all* requests!
913 * Close and recreate adapter, pd, etc!
914 * Re-determine all attributes still sane!
915 * More stuff I haven't thought of!
916 * Rrrgh!
917 */
918 if (ia->ri_id->device != id->device) {
919 printk("RPC: %s: can't reconnect on "
920 "different device!\n", __func__);
921 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400922 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400923 goto out;
924 }
925 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400926 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
927 if (rc) {
928 dprintk("RPC: %s: rdma_create_qp failed %i\n",
929 __func__, rc);
930 rdma_destroy_id(id);
931 rc = -ENETUNREACH;
932 goto out;
933 }
Chuck Lever73806c82014-07-29 17:23:25 -0400934
935 write_lock(&ia->ri_qplock);
936 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400937 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400938 write_unlock(&ia->ri_qplock);
939
940 rdma_destroy_qp(old);
941 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400942 } else {
943 dprintk("RPC: %s: connecting...\n", __func__);
944 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
945 if (rc) {
946 dprintk("RPC: %s: rdma_create_qp failed %i\n",
947 __func__, rc);
948 /* do not update ep->rep_connected */
949 return -ENETUNREACH;
950 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400951 }
952
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400953 ep->rep_connected = 0;
954
955 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
956 if (rc) {
957 dprintk("RPC: %s: rdma_connect() failed with %i\n",
958 __func__, rc);
959 goto out;
960 }
961
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400962 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
963
964 /*
965 * Check state. A non-peer reject indicates no listener
966 * (ECONNREFUSED), which may be a transient state. All
967 * others indicate a transport condition which has already
968 * undergone a best-effort.
969 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800970 if (ep->rep_connected == -ECONNREFUSED &&
971 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400972 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
973 goto retry;
974 }
975 if (ep->rep_connected <= 0) {
976 /* Sometimes, the only way to reliably connect to remote
977 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400978 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
979 (ep->rep_remote_cma.responder_resources == 0 ||
980 ep->rep_remote_cma.initiator_depth !=
981 ep->rep_remote_cma.responder_resources)) {
982 if (ep->rep_remote_cma.responder_resources == 0)
983 ep->rep_remote_cma.responder_resources = 1;
984 ep->rep_remote_cma.initiator_depth =
985 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400986 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400987 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400988 rc = ep->rep_connected;
989 } else {
990 dprintk("RPC: %s: connected\n", __func__);
991 }
992
993out:
994 if (rc)
995 ep->rep_connected = rc;
996 return rc;
997}
998
999/*
1000 * rpcrdma_ep_disconnect
1001 *
1002 * This is separate from destroy to facilitate the ability
1003 * to reconnect without recreating the endpoint.
1004 *
1005 * This call is not reentrant, and must not be made in parallel
1006 * on the same endpoint.
1007 */
Chuck Lever282191c2014-07-29 17:25:55 -04001008void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001009rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1010{
1011 int rc;
1012
Chuck Levera7bc2112014-07-29 17:23:52 -04001013 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001014 rc = rdma_disconnect(ia->ri_id);
1015 if (!rc) {
1016 /* returns without wait if not connected */
1017 wait_event_interruptible(ep->rep_connect_wait,
1018 ep->rep_connected != 1);
1019 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1020 (ep->rep_connected == 1) ? "still " : "dis");
1021 } else {
1022 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1023 ep->rep_connected = rc;
1024 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001025}
1026
Chuck Lever13924022015-01-21 11:03:52 -05001027static struct rpcrdma_req *
1028rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1029{
Chuck Lever13924022015-01-21 11:03:52 -05001030 struct rpcrdma_req *req;
Chuck Lever13924022015-01-21 11:03:52 -05001031
Chuck Lever85275c82015-01-21 11:04:16 -05001032 req = kzalloc(sizeof(*req), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001033 if (req == NULL)
Chuck Lever85275c82015-01-21 11:04:16 -05001034 return ERR_PTR(-ENOMEM);
Chuck Lever13924022015-01-21 11:03:52 -05001035
Chuck Lever13924022015-01-21 11:03:52 -05001036 req->rl_buffer = &r_xprt->rx_buf;
1037 return req;
Chuck Lever13924022015-01-21 11:03:52 -05001038}
1039
1040static struct rpcrdma_rep *
1041rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1042{
1043 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever13924022015-01-21 11:03:52 -05001044 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1045 struct rpcrdma_rep *rep;
1046 int rc;
1047
1048 rc = -ENOMEM;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001049 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001050 if (rep == NULL)
1051 goto out;
Chuck Lever13924022015-01-21 11:03:52 -05001052
Chuck Lever6b1184c2015-01-21 11:04:25 -05001053 rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
1054 GFP_KERNEL);
1055 if (IS_ERR(rep->rr_rdmabuf)) {
1056 rc = PTR_ERR(rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001057 goto out_free;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001058 }
Chuck Lever13924022015-01-21 11:03:52 -05001059
1060 rep->rr_buffer = &r_xprt->rx_buf;
1061 return rep;
1062
1063out_free:
1064 kfree(rep);
1065out:
1066 return ERR_PTR(rc);
1067}
1068
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001069int
Chuck Leverac920d02015-01-21 11:03:44 -05001070rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001071{
Chuck Leverac920d02015-01-21 11:03:44 -05001072 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1073 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1074 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001075 char *p;
Chuck Lever13924022015-01-21 11:03:52 -05001076 size_t len;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001077 int i, rc;
1078
1079 buf->rb_max_requests = cdata->max_requests;
1080 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001081
1082 /* Need to allocate:
1083 * 1. arrays for send and recv pointers
1084 * 2. arrays of struct rpcrdma_req to fill in pointers
1085 * 3. array of struct rpcrdma_rep for replies
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001086 * Send/recv buffers in req/rep need to be registered
1087 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001088 len = buf->rb_max_requests *
1089 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001090
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001091 p = kzalloc(len, GFP_KERNEL);
1092 if (p == NULL) {
1093 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1094 __func__, len);
1095 rc = -ENOMEM;
1096 goto out;
1097 }
1098 buf->rb_pool = p; /* for freeing it later */
1099
1100 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1101 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1102 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1103 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1104
Chuck Lever91e70e72015-03-30 14:34:58 -04001105 rc = ia->ri_ops->ro_init(r_xprt);
1106 if (rc)
1107 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001108
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001109 for (i = 0; i < buf->rb_max_requests; i++) {
1110 struct rpcrdma_req *req;
1111 struct rpcrdma_rep *rep;
1112
Chuck Lever13924022015-01-21 11:03:52 -05001113 req = rpcrdma_create_req(r_xprt);
1114 if (IS_ERR(req)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001115 dprintk("RPC: %s: request buffer %d alloc"
1116 " failed\n", __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001117 rc = PTR_ERR(req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001118 goto out;
1119 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001120 buf->rb_send_bufs[i] = req;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001121
Chuck Lever13924022015-01-21 11:03:52 -05001122 rep = rpcrdma_create_rep(r_xprt);
1123 if (IS_ERR(rep)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001124 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1125 __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001126 rc = PTR_ERR(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001127 goto out;
1128 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001129 buf->rb_recv_bufs[i] = rep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001130 }
Chuck Lever13924022015-01-21 11:03:52 -05001131
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001132 return 0;
1133out:
1134 rpcrdma_buffer_destroy(buf);
1135 return rc;
1136}
1137
Chuck Lever2e845222014-07-29 17:25:38 -04001138static void
Chuck Lever13924022015-01-21 11:03:52 -05001139rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1140{
1141 if (!rep)
1142 return;
1143
Chuck Lever6b1184c2015-01-21 11:04:25 -05001144 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001145 kfree(rep);
1146}
1147
1148static void
1149rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1150{
1151 if (!req)
1152 return;
1153
Chuck Lever0ca77dc2015-01-21 11:04:08 -05001154 rpcrdma_free_regbuf(ia, req->rl_sendbuf);
Chuck Lever85275c82015-01-21 11:04:16 -05001155 rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001156 kfree(req);
1157}
1158
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001159void
1160rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1161{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001162 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001163 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001164
1165 /* clean up in reverse order from create
1166 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001167 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001168 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001169 */
1170 dprintk("RPC: %s: entering\n", __func__);
1171
1172 for (i = 0; i < buf->rb_max_requests; i++) {
Chuck Lever13924022015-01-21 11:03:52 -05001173 if (buf->rb_recv_bufs)
1174 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1175 if (buf->rb_send_bufs)
1176 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001177 }
1178
Chuck Lever4561f342015-03-30 14:35:17 -04001179 ia->ri_ops->ro_destroy(buf);
Allen Andrews4034ba02014-05-28 10:32:09 -04001180
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001181 kfree(buf->rb_pool);
1182}
1183
Chuck Leverc2922c02014-07-29 17:24:36 -04001184/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1185 * some req segments uninitialized.
1186 */
1187static void
1188rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1189{
1190 if (*mw) {
1191 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1192 *mw = NULL;
1193 }
1194}
1195
1196/* Cycle mw's back in reverse order, and "spin" them.
1197 * This delays and scrambles reuse as much as possible.
1198 */
1199static void
1200rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1201{
1202 struct rpcrdma_mr_seg *seg = req->rl_segments;
1203 struct rpcrdma_mr_seg *seg1 = seg;
1204 int i;
1205
1206 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001207 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1208 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001209}
1210
1211static void
1212rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1213{
1214 buf->rb_send_bufs[--buf->rb_send_index] = req;
1215 req->rl_niovs = 0;
1216 if (req->rl_reply) {
1217 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1218 req->rl_reply->rr_func = NULL;
1219 req->rl_reply = NULL;
1220 }
1221}
1222
Chuck Lever6814bae2015-03-30 14:34:48 -04001223/* rpcrdma_unmap_one() was already done during deregistration.
Chuck Leverddb6beb2014-07-29 17:24:54 -04001224 * Redo only the ib_post_send().
1225 */
1226static void
1227rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1228{
1229 struct rpcrdma_xprt *r_xprt =
1230 container_of(ia, struct rpcrdma_xprt, rx_ia);
1231 struct ib_send_wr invalidate_wr, *bad_wr;
1232 int rc;
1233
1234 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1235
1236 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001237 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001238
1239 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1240 invalidate_wr.wr_id = (unsigned long)(void *)r;
1241 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001242 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1243 DECR_CQCOUNT(&r_xprt->rx_ep);
1244
1245 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1246 __func__, r, r->r.frmr.fr_mr->rkey);
1247
1248 read_lock(&ia->ri_qplock);
1249 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1250 read_unlock(&ia->ri_qplock);
1251 if (rc) {
1252 /* Force rpcrdma_buffer_get() to retry */
1253 r->r.frmr.fr_state = FRMR_IS_STALE;
1254 dprintk("RPC: %s: ib_post_send failed, %i\n",
1255 __func__, rc);
1256 }
1257}
1258
1259static void
1260rpcrdma_retry_flushed_linv(struct list_head *stale,
1261 struct rpcrdma_buffer *buf)
1262{
1263 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1264 struct list_head *pos;
1265 struct rpcrdma_mw *r;
1266 unsigned long flags;
1267
1268 list_for_each(pos, stale) {
1269 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1270 rpcrdma_retry_local_inv(r, ia);
1271 }
1272
1273 spin_lock_irqsave(&buf->rb_lock, flags);
1274 list_splice_tail(stale, &buf->rb_mws);
1275 spin_unlock_irqrestore(&buf->rb_lock, flags);
1276}
1277
Chuck Leverc2922c02014-07-29 17:24:36 -04001278static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001279rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1280 struct list_head *stale)
1281{
1282 struct rpcrdma_mw *r;
1283 int i;
1284
1285 i = RPCRDMA_MAX_SEGS - 1;
1286 while (!list_empty(&buf->rb_mws)) {
1287 r = list_entry(buf->rb_mws.next,
1288 struct rpcrdma_mw, mw_list);
1289 list_del(&r->mw_list);
1290 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1291 list_add(&r->mw_list, stale);
1292 continue;
1293 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001294 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001295 if (unlikely(i-- == 0))
1296 return req; /* Success */
1297 }
1298
1299 /* Not enough entries on rb_mws for this req */
1300 rpcrdma_buffer_put_sendbuf(req, buf);
1301 rpcrdma_buffer_put_mrs(req, buf);
1302 return NULL;
1303}
1304
1305static struct rpcrdma_req *
1306rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001307{
1308 struct rpcrdma_mw *r;
1309 int i;
1310
1311 i = RPCRDMA_MAX_SEGS - 1;
1312 while (!list_empty(&buf->rb_mws)) {
1313 r = list_entry(buf->rb_mws.next,
1314 struct rpcrdma_mw, mw_list);
1315 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001316 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001317 if (unlikely(i-- == 0))
1318 return req; /* Success */
1319 }
1320
1321 /* Not enough entries on rb_mws for this req */
1322 rpcrdma_buffer_put_sendbuf(req, buf);
1323 rpcrdma_buffer_put_mrs(req, buf);
1324 return NULL;
1325}
1326
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001327/*
1328 * Get a set of request/reply buffers.
1329 *
1330 * Reply buffer (if needed) is attached to send buffer upon return.
1331 * Rule:
1332 * rb_send_index and rb_recv_index MUST always be pointing to the
1333 * *next* available buffer (non-NULL). They are incremented after
1334 * removing buffers, and decremented *before* returning them.
1335 */
1336struct rpcrdma_req *
1337rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1338{
Chuck Leverc2922c02014-07-29 17:24:36 -04001339 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001340 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001341 struct rpcrdma_req *req;
1342 unsigned long flags;
1343
1344 spin_lock_irqsave(&buffers->rb_lock, flags);
1345 if (buffers->rb_send_index == buffers->rb_max_requests) {
1346 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1347 dprintk("RPC: %s: out of request buffers\n", __func__);
1348 return ((struct rpcrdma_req *)NULL);
1349 }
1350
1351 req = buffers->rb_send_bufs[buffers->rb_send_index];
1352 if (buffers->rb_send_index < buffers->rb_recv_index) {
1353 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1354 __func__,
1355 buffers->rb_recv_index - buffers->rb_send_index);
1356 req->rl_reply = NULL;
1357 } else {
1358 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1359 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1360 }
1361 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001362
1363 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001364 switch (ia->ri_memreg_strategy) {
1365 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001366 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1367 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001368 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001369 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001370 break;
1371 default:
1372 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001373 }
1374 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001375 if (!list_empty(&stale))
1376 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001377 return req;
1378}
1379
1380/*
1381 * Put request/reply buffers back into pool.
1382 * Pre-decrement counter/array index.
1383 */
1384void
1385rpcrdma_buffer_put(struct rpcrdma_req *req)
1386{
1387 struct rpcrdma_buffer *buffers = req->rl_buffer;
1388 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001389 unsigned long flags;
1390
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001391 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001392 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001393 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001394 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001395 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001396 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001397 break;
1398 default:
1399 break;
1400 }
1401 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1402}
1403
1404/*
1405 * Recover reply buffers from pool.
1406 * This happens when recovering from error conditions.
1407 * Post-increment counter/array index.
1408 */
1409void
1410rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1411{
1412 struct rpcrdma_buffer *buffers = req->rl_buffer;
1413 unsigned long flags;
1414
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001415 spin_lock_irqsave(&buffers->rb_lock, flags);
1416 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1417 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1418 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1419 }
1420 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1421}
1422
1423/*
1424 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001425 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001426 */
1427void
1428rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1429{
1430 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1431 unsigned long flags;
1432
1433 rep->rr_func = NULL;
1434 spin_lock_irqsave(&buffers->rb_lock, flags);
1435 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1436 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1437}
1438
1439/*
1440 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1441 */
1442
Chuck Leverdf515ca2015-01-21 11:04:41 -05001443static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001444rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1445 struct ib_mr **mrp, struct ib_sge *iov)
1446{
1447 struct ib_phys_buf ipb;
1448 struct ib_mr *mr;
1449 int rc;
1450
1451 /*
1452 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1453 */
1454 iov->addr = ib_dma_map_single(ia->ri_id->device,
1455 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001456 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1457 return -ENOMEM;
1458
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001459 iov->length = len;
1460
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001461 if (ia->ri_have_dma_lkey) {
1462 *mrp = NULL;
1463 iov->lkey = ia->ri_dma_lkey;
1464 return 0;
1465 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001466 *mrp = NULL;
1467 iov->lkey = ia->ri_bind_mem->lkey;
1468 return 0;
1469 }
1470
1471 ipb.addr = iov->addr;
1472 ipb.size = iov->length;
1473 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1474 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1475
1476 dprintk("RPC: %s: phys convert: 0x%llx "
1477 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001478 __func__, (unsigned long long)ipb.addr,
1479 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001480
1481 if (IS_ERR(mr)) {
1482 *mrp = NULL;
1483 rc = PTR_ERR(mr);
1484 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1485 } else {
1486 *mrp = mr;
1487 iov->lkey = mr->lkey;
1488 rc = 0;
1489 }
1490
1491 return rc;
1492}
1493
Chuck Leverdf515ca2015-01-21 11:04:41 -05001494static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001495rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1496 struct ib_mr *mr, struct ib_sge *iov)
1497{
1498 int rc;
1499
1500 ib_dma_unmap_single(ia->ri_id->device,
1501 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1502
1503 if (NULL == mr)
1504 return 0;
1505
1506 rc = ib_dereg_mr(mr);
1507 if (rc)
1508 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1509 return rc;
1510}
1511
Chuck Lever9128c3e2015-01-21 11:04:00 -05001512/**
1513 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1514 * @ia: controlling rpcrdma_ia
1515 * @size: size of buffer to be allocated, in bytes
1516 * @flags: GFP flags
1517 *
1518 * Returns pointer to private header of an area of internally
1519 * registered memory, or an ERR_PTR. The registered buffer follows
1520 * the end of the private header.
1521 *
1522 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1523 * receiving the payload of RDMA RECV operations. regbufs are not
1524 * used for RDMA READ/WRITE operations, thus are registered only for
1525 * LOCAL access.
1526 */
1527struct rpcrdma_regbuf *
1528rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
1529{
1530 struct rpcrdma_regbuf *rb;
1531 int rc;
1532
1533 rc = -ENOMEM;
1534 rb = kmalloc(sizeof(*rb) + size, flags);
1535 if (rb == NULL)
1536 goto out;
1537
1538 rb->rg_size = size;
1539 rb->rg_owner = NULL;
1540 rc = rpcrdma_register_internal(ia, rb->rg_base, size,
1541 &rb->rg_mr, &rb->rg_iov);
1542 if (rc)
1543 goto out_free;
1544
1545 return rb;
1546
1547out_free:
1548 kfree(rb);
1549out:
1550 return ERR_PTR(rc);
1551}
1552
1553/**
1554 * rpcrdma_free_regbuf - deregister and free registered buffer
1555 * @ia: controlling rpcrdma_ia
1556 * @rb: regbuf to be deregistered and freed
1557 */
1558void
1559rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1560{
1561 if (rb) {
1562 rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
1563 kfree(rb);
1564 }
1565}
1566
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001567/*
1568 * Wrappers for chunk registration, shared by read/write chunk code.
1569 */
1570
Chuck Lever9c1b4d72015-03-30 14:34:39 -04001571void
1572rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, bool writing)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001573{
1574 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1575 seg->mr_dmalen = seg->mr_len;
1576 if (seg->mr_page)
1577 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1578 seg->mr_page, offset_in_page(seg->mr_offset),
1579 seg->mr_dmalen, seg->mr_dir);
1580 else
1581 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1582 seg->mr_offset,
1583 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001584 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1585 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1586 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001587 (unsigned long long)seg->mr_dma,
1588 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001589 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001590}
1591
Chuck Lever9c1b4d72015-03-30 14:34:39 -04001592void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001593rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1594{
1595 if (seg->mr_page)
1596 ib_dma_unmap_page(ia->ri_id->device,
1597 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1598 else
1599 ib_dma_unmap_single(ia->ri_id->device,
1600 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1601}
1602
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001603/*
1604 * Prepost any receive buffer, then post send.
1605 *
1606 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1607 */
1608int
1609rpcrdma_ep_post(struct rpcrdma_ia *ia,
1610 struct rpcrdma_ep *ep,
1611 struct rpcrdma_req *req)
1612{
1613 struct ib_send_wr send_wr, *send_wr_fail;
1614 struct rpcrdma_rep *rep = req->rl_reply;
1615 int rc;
1616
1617 if (rep) {
1618 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1619 if (rc)
1620 goto out;
1621 req->rl_reply = NULL;
1622 }
1623
1624 send_wr.next = NULL;
1625 send_wr.wr_id = 0ULL; /* no send cookie */
1626 send_wr.sg_list = req->rl_send_iov;
1627 send_wr.num_sge = req->rl_niovs;
1628 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001629 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1630 ib_dma_sync_single_for_device(ia->ri_id->device,
1631 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1632 DMA_TO_DEVICE);
1633 ib_dma_sync_single_for_device(ia->ri_id->device,
1634 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1635 DMA_TO_DEVICE);
1636 ib_dma_sync_single_for_device(ia->ri_id->device,
1637 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1638 DMA_TO_DEVICE);
1639
1640 if (DECR_CQCOUNT(ep) > 0)
1641 send_wr.send_flags = 0;
1642 else { /* Provider must take a send completion every now and then */
1643 INIT_CQCOUNT(ep);
1644 send_wr.send_flags = IB_SEND_SIGNALED;
1645 }
1646
1647 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1648 if (rc)
1649 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1650 rc);
1651out:
1652 return rc;
1653}
1654
1655/*
1656 * (Re)post a receive buffer.
1657 */
1658int
1659rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1660 struct rpcrdma_ep *ep,
1661 struct rpcrdma_rep *rep)
1662{
1663 struct ib_recv_wr recv_wr, *recv_wr_fail;
1664 int rc;
1665
1666 recv_wr.next = NULL;
1667 recv_wr.wr_id = (u64) (unsigned long) rep;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001668 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001669 recv_wr.num_sge = 1;
1670
1671 ib_dma_sync_single_for_cpu(ia->ri_id->device,
Chuck Lever6b1184c2015-01-21 11:04:25 -05001672 rdmab_addr(rep->rr_rdmabuf),
1673 rdmab_length(rep->rr_rdmabuf),
1674 DMA_BIDIRECTIONAL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001675
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001676 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1677
1678 if (rc)
1679 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1680 rc);
1681 return rc;
1682}
Chuck Lever43e95982014-07-29 17:23:34 -04001683
Chuck Lever1c9351e2015-03-30 14:34:30 -04001684/* How many chunk list items fit within our inline buffers?
Chuck Lever43e95982014-07-29 17:23:34 -04001685 */
Chuck Lever1c9351e2015-03-30 14:34:30 -04001686unsigned int
1687rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
Chuck Lever43e95982014-07-29 17:23:34 -04001688{
1689 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever1c9351e2015-03-30 14:34:30 -04001690 int bytes, segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001691
Chuck Lever1c9351e2015-03-30 14:34:30 -04001692 bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
1693 bytes -= RPCRDMA_HDRLEN_MIN;
1694 if (bytes < sizeof(struct rpcrdma_segment) * 2) {
1695 pr_warn("RPC: %s: inline threshold too small\n",
1696 __func__);
1697 return 0;
Chuck Lever43e95982014-07-29 17:23:34 -04001698 }
Chuck Lever1c9351e2015-03-30 14:34:30 -04001699
1700 segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
1701 dprintk("RPC: %s: max chunk list size = %d segments\n",
1702 __func__, segments);
1703 return segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001704}