blob: b7ca73e7e2e63fe5c4d30a9033213e9d52f18d36 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Levereba8ff62015-01-21 11:03:02 -050052#include <linux/prefetch.h>
Chuck Lever0dd39ca2015-03-30 14:33:43 -040053#include <linux/sunrpc/addr.h>
Chuck Lever65866f82014-05-28 10:33:59 -040054#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040055
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040056#include "xprt_rdma.h"
57
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040058/*
59 * Globals/Macros
60 */
61
Jeff Laytonf895b252014-11-17 16:58:04 -050062#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040063# define RPCDBG_FACILITY RPCDBG_TRANS
64#endif
65
66/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040083 unsigned long flags;
84
85 data = data;
86 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
87 while (!list_empty(&rpcrdma_tasklets_g)) {
88 rep = list_entry(rpcrdma_tasklets_g.next,
89 struct rpcrdma_rep, rr_list);
90 list_del(&rep->rr_list);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040091 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
92
Chuck Lever494ae302015-05-26 11:51:46 -040093 rpcrdma_reply_handler(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040094
95 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
96 }
97 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
98}
99
100static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
101
Chuck Lever7ff11de2014-11-08 20:15:01 -0500102static const char * const async_event[] = {
103 "CQ error",
104 "QP fatal error",
105 "QP request error",
106 "QP access error",
107 "communication established",
108 "send queue drained",
109 "path migration successful",
110 "path mig error",
111 "device fatal error",
112 "port active",
113 "port error",
114 "LID change",
115 "P_key change",
116 "SM change",
117 "SRQ error",
118 "SRQ limit reached",
119 "last WQE reached",
120 "client reregister",
121 "GID change",
122};
123
124#define ASYNC_MSG(status) \
125 ((status) < ARRAY_SIZE(async_event) ? \
126 async_event[(status)] : "unknown async error")
127
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400128static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500129rpcrdma_schedule_tasklet(struct list_head *sched_list)
130{
131 unsigned long flags;
132
133 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
134 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
135 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
136 tasklet_schedule(&rpcrdma_tasklet_g);
137}
138
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400139static void
140rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
141{
142 struct rpcrdma_ep *ep = context;
143
Chuck Lever7ff11de2014-11-08 20:15:01 -0500144 pr_err("RPC: %s: %s on device %s ep %p\n",
145 __func__, ASYNC_MSG(event->event),
146 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147 if (ep->rep_connected == 1) {
148 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500149 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400150 wake_up_all(&ep->rep_connect_wait);
151 }
152}
153
154static void
155rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
156{
157 struct rpcrdma_ep *ep = context;
158
Chuck Lever7ff11de2014-11-08 20:15:01 -0500159 pr_err("RPC: %s: %s on device %s ep %p\n",
160 __func__, ASYNC_MSG(event->event),
161 event->device->name, context);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400162 if (ep->rep_connected == 1) {
163 ep->rep_connected = -EIO;
Chuck Leverafadc462015-01-21 11:03:11 -0500164 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400165 wake_up_all(&ep->rep_connect_wait);
166 }
167}
168
Chuck Lever85024272015-01-21 11:02:04 -0500169static const char * const wc_status[] = {
170 "success",
171 "local length error",
172 "local QP operation error",
173 "local EE context operation error",
174 "local protection error",
175 "WR flushed",
176 "memory management operation error",
177 "bad response error",
178 "local access error",
179 "remote invalid request error",
180 "remote access error",
181 "remote operation error",
182 "transport retry counter exceeded",
Chuck Levere46ac342015-03-30 14:35:35 -0400183 "RNR retry counter exceeded",
Chuck Lever85024272015-01-21 11:02:04 -0500184 "local RDD violation error",
185 "remove invalid RD request",
186 "operation aborted",
187 "invalid EE context number",
188 "invalid EE context state",
189 "fatal error",
190 "response timeout error",
191 "general error",
192};
193
194#define COMPLETION_MSG(status) \
195 ((status) < ARRAY_SIZE(wc_status) ? \
196 wc_status[(status)] : "unexpected completion error")
197
Chuck Leverfc664482014-05-28 10:33:25 -0400198static void
199rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400200{
Chuck Lever85024272015-01-21 11:02:04 -0500201 /* WARNING: Only wr_id and status are reliable at this point */
Chuck Levere46ac342015-03-30 14:35:35 -0400202 if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) {
203 if (wc->status != IB_WC_SUCCESS &&
204 wc->status != IB_WC_WR_FLUSH_ERR)
Chuck Lever85024272015-01-21 11:02:04 -0500205 pr_err("RPC: %s: SEND: %s\n",
206 __func__, COMPLETION_MSG(wc->status));
207 } else {
208 struct rpcrdma_mw *r;
209
210 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
Chuck Levere46ac342015-03-30 14:35:35 -0400211 r->mw_sendcompletion(wc);
Chuck Lever85024272015-01-21 11:02:04 -0500212 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400213}
214
Chuck Leverfc664482014-05-28 10:33:25 -0400215static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400216rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400217{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400218 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400219 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400220
Chuck Lever8301a2c2014-05-28 10:33:51 -0400221 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400222 do {
223 wcs = ep->rep_send_wcs;
224
225 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
226 if (rc <= 0)
227 return rc;
228
229 count = rc;
230 while (count-- > 0)
231 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400232 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400233 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400234}
235
236/*
Chuck Leverfc664482014-05-28 10:33:25 -0400237 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400238 *
Chuck Leverfc664482014-05-28 10:33:25 -0400239 * Send events are typically suppressed and thus do not result
240 * in an upcall. Occasionally one is signaled, however. This
241 * prevents the provider's completion queue from wrapping and
242 * losing a completion.
243 */
244static void
245rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
246{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400247 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400248 int rc;
249
Chuck Lever1c00dd02014-05-28 10:33:42 -0400250 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400251 if (rc) {
252 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
253 __func__, rc);
254 return;
255 }
256
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400257 rc = ib_req_notify_cq(cq,
258 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
259 if (rc == 0)
260 return;
261 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400262 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
263 __func__, rc);
264 return;
265 }
266
Chuck Lever1c00dd02014-05-28 10:33:42 -0400267 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400268}
269
270static void
Chuck Leverbb961932014-07-29 17:25:46 -0400271rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400272{
273 struct rpcrdma_rep *rep =
274 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
275
Chuck Lever85024272015-01-21 11:02:04 -0500276 /* WARNING: Only wr_id and status are reliable at this point */
277 if (wc->status != IB_WC_SUCCESS)
278 goto out_fail;
Chuck Leverfc664482014-05-28 10:33:25 -0400279
Chuck Lever85024272015-01-21 11:02:04 -0500280 /* status == SUCCESS means all fields in wc are trustworthy */
Chuck Leverfc664482014-05-28 10:33:25 -0400281 if (wc->opcode != IB_WC_RECV)
282 return;
283
Chuck Lever85024272015-01-21 11:02:04 -0500284 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
285 __func__, rep, wc->byte_len);
286
Chuck Leverfc664482014-05-28 10:33:25 -0400287 rep->rr_len = wc->byte_len;
Chuck Lever89e0d1122015-05-26 11:51:56 -0400288 ib_dma_sync_single_for_cpu(rep->rr_device,
Chuck Lever6b1184c2015-01-21 11:04:25 -0500289 rdmab_addr(rep->rr_rdmabuf),
290 rep->rr_len, DMA_FROM_DEVICE);
291 prefetch(rdmab_to_msg(rep->rr_rdmabuf));
Chuck Leverfc664482014-05-28 10:33:25 -0400292
293out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400294 list_add_tail(&rep->rr_list, sched_list);
Chuck Lever85024272015-01-21 11:02:04 -0500295 return;
296out_fail:
297 if (wc->status != IB_WC_WR_FLUSH_ERR)
298 pr_err("RPC: %s: rep %p: %s\n",
299 __func__, rep, COMPLETION_MSG(wc->status));
300 rep->rr_len = ~0U;
301 goto out_schedule;
Chuck Leverfc664482014-05-28 10:33:25 -0400302}
303
304static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400305rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400306{
Chuck Leverbb961932014-07-29 17:25:46 -0400307 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400308 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400309 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400310
Chuck Leverbb961932014-07-29 17:25:46 -0400311 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400312 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400313 do {
314 wcs = ep->rep_recv_wcs;
315
316 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
317 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400318 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400319
320 count = rc;
321 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400322 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400323 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400324 rc = 0;
325
326out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500327 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400328 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400329}
330
331/*
332 * Handle receive completions.
333 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400334 * It is reentrant but processes single events in order to maintain
335 * ordering of receives to keep server credits.
336 *
337 * It is the responsibility of the scheduled tasklet to return
338 * recv buffers to the pool. NOTE: this affects synchronization of
339 * connection shutdown. That is, the structures required for
340 * the completion of the reply handler must remain intact until
341 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400342 */
343static void
Chuck Leverfc664482014-05-28 10:33:25 -0400344rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400345{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400346 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400347 int rc;
348
Chuck Lever1c00dd02014-05-28 10:33:42 -0400349 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400350 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400351 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400352 __func__, rc);
353 return;
354 }
355
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400356 rc = ib_req_notify_cq(cq,
357 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
358 if (rc == 0)
359 return;
360 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400361 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
362 __func__, rc);
363 return;
364 }
365
Chuck Lever1c00dd02014-05-28 10:33:42 -0400366 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400367}
368
Chuck Levera7bc2112014-07-29 17:23:52 -0400369static void
370rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
371{
Chuck Lever5c166be2014-11-08 20:14:45 -0500372 struct ib_wc wc;
373 LIST_HEAD(sched_list);
374
375 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
376 rpcrdma_recvcq_process_wc(&wc, &sched_list);
377 if (!list_empty(&sched_list))
378 rpcrdma_schedule_tasklet(&sched_list);
379 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
380 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400381}
382
Jeff Laytonf895b252014-11-17 16:58:04 -0500383#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400384static const char * const conn[] = {
385 "address resolved",
386 "address error",
387 "route resolved",
388 "route error",
389 "connect request",
390 "connect response",
391 "connect error",
392 "unreachable",
393 "rejected",
394 "established",
395 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400396 "device removal",
397 "multicast join",
398 "multicast error",
399 "address change",
400 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400401};
Chuck Lever8079fb72014-07-29 17:26:12 -0400402
403#define CONNECTION_MSG(status) \
404 ((status) < ARRAY_SIZE(conn) ? \
405 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400406#endif
407
408static int
409rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
410{
411 struct rpcrdma_xprt *xprt = id->context;
412 struct rpcrdma_ia *ia = &xprt->rx_ia;
413 struct rpcrdma_ep *ep = &xprt->rx_ep;
Jeff Laytonf895b252014-11-17 16:58:04 -0500414#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400415 struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800416#endif
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500417 struct ib_qp_attr *attr = &ia->ri_qp_attr;
418 struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400419 int connstate = 0;
420
421 switch (event->event) {
422 case RDMA_CM_EVENT_ADDR_RESOLVED:
423 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400424 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400425 complete(&ia->ri_done);
426 break;
427 case RDMA_CM_EVENT_ADDR_ERROR:
428 ia->ri_async_rc = -EHOSTUNREACH;
429 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
430 __func__, ep);
431 complete(&ia->ri_done);
432 break;
433 case RDMA_CM_EVENT_ROUTE_ERROR:
434 ia->ri_async_rc = -ENETUNREACH;
435 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
436 __func__, ep);
437 complete(&ia->ri_done);
438 break;
439 case RDMA_CM_EVENT_ESTABLISHED:
440 connstate = 1;
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500441 ib_query_qp(ia->ri_id->qp, attr,
442 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
443 iattr);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400444 dprintk("RPC: %s: %d responder resources"
445 " (%d initiator)\n",
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500446 __func__, attr->max_dest_rd_atomic,
447 attr->max_rd_atomic);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400448 goto connected;
449 case RDMA_CM_EVENT_CONNECT_ERROR:
450 connstate = -ENOTCONN;
451 goto connected;
452 case RDMA_CM_EVENT_UNREACHABLE:
453 connstate = -ENETDOWN;
454 goto connected;
455 case RDMA_CM_EVENT_REJECTED:
456 connstate = -ECONNREFUSED;
457 goto connected;
458 case RDMA_CM_EVENT_DISCONNECTED:
459 connstate = -ECONNABORTED;
460 goto connected;
461 case RDMA_CM_EVENT_DEVICE_REMOVAL:
462 connstate = -ENODEV;
463connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400464 dprintk("RPC: %s: %sconnected\n",
465 __func__, connstate > 0 ? "" : "dis");
466 ep->rep_connected = connstate;
Chuck Leverafadc462015-01-21 11:03:11 -0500467 rpcrdma_conn_func(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400468 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400469 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400470 default:
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400471 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
472 __func__, sap, rpc_get_port(sap), ep,
Chuck Lever8079fb72014-07-29 17:26:12 -0400473 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400474 break;
475 }
476
Jeff Laytonf895b252014-11-17 16:58:04 -0500477#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400478 if (connstate == 1) {
Chuck Leverce1ab9a2015-01-21 11:03:35 -0500479 int ird = attr->max_dest_rd_atomic;
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400480 int tird = ep->rep_remote_cma.responder_resources;
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400481
Chuck Levera0ce85f2015-03-30 14:34:21 -0400482 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400483 sap, rpc_get_port(sap),
Chuck Lever89e0d1122015-05-26 11:51:56 -0400484 ia->ri_device->name,
Chuck Levera0ce85f2015-03-30 14:34:21 -0400485 ia->ri_ops->ro_displayname,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400486 xprt->rx_buf.rb_max_requests,
487 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
488 } else if (connstate < 0) {
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400489 pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
490 sap, rpc_get_port(sap), connstate);
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400491 }
492#endif
493
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400494 return 0;
495}
496
497static struct rdma_cm_id *
498rpcrdma_create_id(struct rpcrdma_xprt *xprt,
499 struct rpcrdma_ia *ia, struct sockaddr *addr)
500{
501 struct rdma_cm_id *id;
502 int rc;
503
Tom Talpey1a954052008-10-09 15:01:31 -0400504 init_completion(&ia->ri_done);
505
Sean Heftyb26f9b92010-04-01 17:08:41 +0000506 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400507 if (IS_ERR(id)) {
508 rc = PTR_ERR(id);
509 dprintk("RPC: %s: rdma_create_id() failed %i\n",
510 __func__, rc);
511 return id;
512 }
513
Tom Talpey5675add2008-10-09 15:01:41 -0400514 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400515 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
516 if (rc) {
517 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
518 __func__, rc);
519 goto out;
520 }
Tom Talpey5675add2008-10-09 15:01:41 -0400521 wait_for_completion_interruptible_timeout(&ia->ri_done,
522 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400523 rc = ia->ri_async_rc;
524 if (rc)
525 goto out;
526
Tom Talpey5675add2008-10-09 15:01:41 -0400527 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400528 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
529 if (rc) {
530 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
531 __func__, rc);
532 goto out;
533 }
Tom Talpey5675add2008-10-09 15:01:41 -0400534 wait_for_completion_interruptible_timeout(&ia->ri_done,
535 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400536 rc = ia->ri_async_rc;
537 if (rc)
538 goto out;
539
540 return id;
541
542out:
543 rdma_destroy_id(id);
544 return ERR_PTR(rc);
545}
546
547/*
548 * Drain any cq, prior to teardown.
549 */
550static void
551rpcrdma_clean_cq(struct ib_cq *cq)
552{
553 struct ib_wc wc;
554 int count = 0;
555
556 while (1 == ib_poll_cq(cq, 1, &wc))
557 ++count;
558
559 if (count)
560 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
561 __func__, count, wc.opcode);
562}
563
564/*
565 * Exported functions.
566 */
567
568/*
569 * Open and initialize an Interface Adapter.
570 * o initializes fields of struct rpcrdma_ia, including
571 * interface and provider attributes and protection zone.
572 */
573int
574rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
575{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400576 int rc, mem_priv;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400577 struct rpcrdma_ia *ia = &xprt->rx_ia;
Chuck Lever7bc79722015-01-21 11:03:27 -0500578 struct ib_device_attr *devattr = &ia->ri_devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400579
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400580 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
581 if (IS_ERR(ia->ri_id)) {
582 rc = PTR_ERR(ia->ri_id);
583 goto out1;
584 }
Chuck Lever89e0d1122015-05-26 11:51:56 -0400585 ia->ri_device = ia->ri_id->device;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400586
Chuck Lever89e0d1122015-05-26 11:51:56 -0400587 ia->ri_pd = ib_alloc_pd(ia->ri_device);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400588 if (IS_ERR(ia->ri_pd)) {
589 rc = PTR_ERR(ia->ri_pd);
590 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
591 __func__, rc);
592 goto out2;
593 }
594
Chuck Lever89e0d1122015-05-26 11:51:56 -0400595 rc = ib_query_device(ia->ri_device, devattr);
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400596 if (rc) {
597 dprintk("RPC: %s: ib_query_device failed %d\n",
598 __func__, rc);
Chuck Lever5ae711a2015-01-21 11:03:19 -0500599 goto out3;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400600 }
601
Chuck Lever7bc79722015-01-21 11:03:27 -0500602 if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400603 ia->ri_have_dma_lkey = 1;
Chuck Lever89e0d1122015-05-26 11:51:56 -0400604 ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400605 }
606
Chuck Leverf10eafd2014-05-28 10:32:51 -0400607 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400608 /* Requires both frmr reg and local dma lkey */
Chuck Lever41f97022015-03-30 14:34:12 -0400609 if (((devattr->device_cap_flags &
Tom Talpey3197d3092008-10-09 15:00:20 -0400610 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
Chuck Lever41f97022015-03-30 14:34:12 -0400611 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
612 (devattr->max_fast_reg_page_list_len == 0)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400613 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400614 "not supported by HCA\n", __func__);
615 memreg = RPCRDMA_MTHCAFMR;
Tom Talpey3197d3092008-10-09 15:00:20 -0400616 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400617 }
618 if (memreg == RPCRDMA_MTHCAFMR) {
Chuck Lever89e0d1122015-05-26 11:51:56 -0400619 if (!ia->ri_device->alloc_fmr) {
Chuck Leverf10eafd2014-05-28 10:32:51 -0400620 dprintk("RPC: %s: MTHCAFMR registration "
621 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400622 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400623 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400624 }
625
626 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400627 * Optionally obtain an underlying physical identity mapping in
628 * order to do a memory window-based bind. This base registration
629 * is protected from remote access - that is enabled only by binding
630 * for the specific bytes targeted during each RPC operation, and
631 * revoked after the corresponding completion similar to a storage
632 * adapter.
633 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400634 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400635 case RPCRDMA_FRMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400636 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400637 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400638 case RPCRDMA_ALLPHYSICAL:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400639 ia->ri_ops = &rpcrdma_physical_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400640 mem_priv = IB_ACCESS_LOCAL_WRITE |
641 IB_ACCESS_REMOTE_WRITE |
642 IB_ACCESS_REMOTE_READ;
643 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400644 case RPCRDMA_MTHCAFMR:
Chuck Levera0ce85f2015-03-30 14:34:21 -0400645 ia->ri_ops = &rpcrdma_fmr_memreg_ops;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400646 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400647 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400648 mem_priv = IB_ACCESS_LOCAL_WRITE;
649 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400650 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
651 if (IS_ERR(ia->ri_bind_mem)) {
652 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400653 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400654 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400655 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500656 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400657 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400658 break;
659 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400660 printk(KERN_ERR "RPC: Unsupported memory "
661 "registration mode: %d\n", memreg);
662 rc = -ENOMEM;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500663 goto out3;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400664 }
Chuck Levera0ce85f2015-03-30 14:34:21 -0400665 dprintk("RPC: %s: memory registration strategy is '%s'\n",
666 __func__, ia->ri_ops->ro_displayname);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400667
668 /* Else will do memory reg/dereg for each chunk */
669 ia->ri_memreg_strategy = memreg;
670
Chuck Lever73806c82014-07-29 17:23:25 -0400671 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400672 return 0;
Chuck Lever5ae711a2015-01-21 11:03:19 -0500673
674out3:
675 ib_dealloc_pd(ia->ri_pd);
676 ia->ri_pd = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400677out2:
678 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400679 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400680out1:
681 return rc;
682}
683
684/*
685 * Clean up/close an IA.
686 * o if event handles and PD have been initialized, free them.
687 * o close the IA
688 */
689void
690rpcrdma_ia_close(struct rpcrdma_ia *ia)
691{
692 int rc;
693
694 dprintk("RPC: %s: entering\n", __func__);
695 if (ia->ri_bind_mem != NULL) {
696 rc = ib_dereg_mr(ia->ri_bind_mem);
697 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
698 __func__, rc);
699 }
Chuck Lever6d446982015-05-26 11:51:27 -0400700
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400701 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
702 if (ia->ri_id->qp)
703 rdma_destroy_qp(ia->ri_id);
704 rdma_destroy_id(ia->ri_id);
705 ia->ri_id = NULL;
706 }
Chuck Lever6d446982015-05-26 11:51:27 -0400707
708 /* If the pd is still busy, xprtrdma missed freeing a resource */
709 if (ia->ri_pd && !IS_ERR(ia->ri_pd))
710 WARN_ON(ib_dealloc_pd(ia->ri_pd));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400711}
712
713/*
714 * Create unconnected endpoint.
715 */
716int
717rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
718 struct rpcrdma_create_data_internal *cdata)
719{
Chuck Lever7bc79722015-01-21 11:03:27 -0500720 struct ib_device_attr *devattr = &ia->ri_devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400721 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400722 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400723
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400724 /* check provider's send/recv wr limits */
Chuck Lever7bc79722015-01-21 11:03:27 -0500725 if (cdata->max_requests > devattr->max_qp_wr)
726 cdata->max_requests = devattr->max_qp_wr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400727
728 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
729 ep->rep_attr.qp_context = ep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400730 ep->rep_attr.srq = NULL;
731 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
Chuck Lever3968cb52015-03-30 14:35:26 -0400732 rc = ia->ri_ops->ro_open(ia, ep, cdata);
733 if (rc)
734 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400735 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
736 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
737 ep->rep_attr.cap.max_recv_sge = 1;
738 ep->rep_attr.cap.max_inline_data = 0;
739 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
740 ep->rep_attr.qp_type = IB_QPT_RC;
741 ep->rep_attr.port_num = ~0;
742
Chuck Leverc05fbb52015-01-21 11:04:33 -0500743 if (cdata->padding) {
744 ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
745 GFP_KERNEL);
746 if (IS_ERR(ep->rep_padbuf))
747 return PTR_ERR(ep->rep_padbuf);
748 } else
749 ep->rep_padbuf = NULL;
750
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400751 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
752 "iovs: send %d recv %d\n",
753 __func__,
754 ep->rep_attr.cap.max_send_wr,
755 ep->rep_attr.cap.max_recv_wr,
756 ep->rep_attr.cap.max_send_sge,
757 ep->rep_attr.cap.max_recv_sge);
758
759 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400760 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500761 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
762 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
763 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400764 ep->rep_cqinit = 0;
765 INIT_CQCOUNT(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400766 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400767 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400768
Chuck Lever89e0d1122015-05-26 11:51:56 -0400769 sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
770 rpcrdma_cq_async_error_upcall, ep,
771 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400772 if (IS_ERR(sendcq)) {
773 rc = PTR_ERR(sendcq);
774 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400775 __func__, rc);
776 goto out1;
777 }
778
Chuck Leverfc664482014-05-28 10:33:25 -0400779 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400780 if (rc) {
781 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
782 __func__, rc);
783 goto out2;
784 }
785
Chuck Lever89e0d1122015-05-26 11:51:56 -0400786 recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
787 rpcrdma_cq_async_error_upcall, ep,
788 ep->rep_attr.cap.max_recv_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400789 if (IS_ERR(recvcq)) {
790 rc = PTR_ERR(recvcq);
791 dprintk("RPC: %s: failed to create recv CQ: %i\n",
792 __func__, rc);
793 goto out2;
794 }
795
796 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
797 if (rc) {
798 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
799 __func__, rc);
800 ib_destroy_cq(recvcq);
801 goto out2;
802 }
803
804 ep->rep_attr.send_cq = sendcq;
805 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400806
807 /* Initialize cma parameters */
808
809 /* RPC/RDMA does not use private data */
810 ep->rep_remote_cma.private_data = NULL;
811 ep->rep_remote_cma.private_data_len = 0;
812
813 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400814 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever7bc79722015-01-21 11:03:27 -0500815 if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400816 ep->rep_remote_cma.responder_resources = 32;
817 else
Chuck Lever7bc79722015-01-21 11:03:27 -0500818 ep->rep_remote_cma.responder_resources =
819 devattr->max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400820
821 ep->rep_remote_cma.retry_count = 7;
822 ep->rep_remote_cma.flow_control = 0;
823 ep->rep_remote_cma.rnr_retry_count = 0;
824
825 return 0;
826
827out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400828 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400829 if (err)
830 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
831 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400832out1:
Chuck Leverc05fbb52015-01-21 11:04:33 -0500833 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400834 return rc;
835}
836
837/*
838 * rpcrdma_ep_destroy
839 *
840 * Disconnect and destroy endpoint. After this, the only
841 * valid operations on the ep are to free it (if dynamically
842 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400843 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400844void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400845rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
846{
847 int rc;
848
849 dprintk("RPC: %s: entering, connected is %d\n",
850 __func__, ep->rep_connected);
851
Chuck Lever254f91e2014-05-28 10:32:17 -0400852 cancel_delayed_work_sync(&ep->rep_connect_worker);
853
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400854 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400855 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400856 rdma_destroy_qp(ia->ri_id);
857 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400858 }
859
Chuck Leverc05fbb52015-01-21 11:04:33 -0500860 rpcrdma_free_regbuf(ia, ep->rep_padbuf);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400861
Chuck Leverfc664482014-05-28 10:33:25 -0400862 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
863 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
864 if (rc)
865 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
866 __func__, rc);
867
868 rpcrdma_clean_cq(ep->rep_attr.send_cq);
869 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400870 if (rc)
871 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
872 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400873}
874
875/*
876 * Connect unconnected endpoint.
877 */
878int
879rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
880{
Chuck Lever73806c82014-07-29 17:23:25 -0400881 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400882 int rc = 0;
883 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400884
Tom Talpeyc0555512008-10-10 11:32:45 -0400885 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400886 struct rpcrdma_xprt *xprt;
887retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400888 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400889
890 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400891 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400892
893 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
Chuck Lever31a701a2015-03-30 14:35:07 -0400894 ia->ri_ops->ro_reset(xprt);
895
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400896 id = rpcrdma_create_id(xprt, ia,
897 (struct sockaddr *)&xprt->rx_data.addr);
898 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400899 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400900 goto out;
901 }
902 /* TEMP TEMP TEMP - fail if new device:
903 * Deregister/remarshal *all* requests!
904 * Close and recreate adapter, pd, etc!
905 * Re-determine all attributes still sane!
906 * More stuff I haven't thought of!
907 * Rrrgh!
908 */
Chuck Lever89e0d1122015-05-26 11:51:56 -0400909 if (ia->ri_device != id->device) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400910 printk("RPC: %s: can't reconnect on "
911 "different device!\n", __func__);
912 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400913 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400914 goto out;
915 }
916 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400917 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
918 if (rc) {
919 dprintk("RPC: %s: rdma_create_qp failed %i\n",
920 __func__, rc);
921 rdma_destroy_id(id);
922 rc = -ENETUNREACH;
923 goto out;
924 }
Chuck Lever73806c82014-07-29 17:23:25 -0400925
926 write_lock(&ia->ri_qplock);
927 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400928 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400929 write_unlock(&ia->ri_qplock);
930
931 rdma_destroy_qp(old);
932 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400933 } else {
934 dprintk("RPC: %s: connecting...\n", __func__);
935 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
936 if (rc) {
937 dprintk("RPC: %s: rdma_create_qp failed %i\n",
938 __func__, rc);
939 /* do not update ep->rep_connected */
940 return -ENETUNREACH;
941 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400942 }
943
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400944 ep->rep_connected = 0;
945
946 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
947 if (rc) {
948 dprintk("RPC: %s: rdma_connect() failed with %i\n",
949 __func__, rc);
950 goto out;
951 }
952
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400953 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
954
955 /*
956 * Check state. A non-peer reject indicates no listener
957 * (ECONNREFUSED), which may be a transient state. All
958 * others indicate a transport condition which has already
959 * undergone a best-effort.
960 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800961 if (ep->rep_connected == -ECONNREFUSED &&
962 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400963 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
964 goto retry;
965 }
966 if (ep->rep_connected <= 0) {
967 /* Sometimes, the only way to reliably connect to remote
968 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400969 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
970 (ep->rep_remote_cma.responder_resources == 0 ||
971 ep->rep_remote_cma.initiator_depth !=
972 ep->rep_remote_cma.responder_resources)) {
973 if (ep->rep_remote_cma.responder_resources == 0)
974 ep->rep_remote_cma.responder_resources = 1;
975 ep->rep_remote_cma.initiator_depth =
976 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400977 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400978 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400979 rc = ep->rep_connected;
980 } else {
981 dprintk("RPC: %s: connected\n", __func__);
982 }
983
984out:
985 if (rc)
986 ep->rep_connected = rc;
987 return rc;
988}
989
990/*
991 * rpcrdma_ep_disconnect
992 *
993 * This is separate from destroy to facilitate the ability
994 * to reconnect without recreating the endpoint.
995 *
996 * This call is not reentrant, and must not be made in parallel
997 * on the same endpoint.
998 */
Chuck Lever282191c2014-07-29 17:25:55 -0400999void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001000rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1001{
1002 int rc;
1003
Chuck Levera7bc2112014-07-29 17:23:52 -04001004 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001005 rc = rdma_disconnect(ia->ri_id);
1006 if (!rc) {
1007 /* returns without wait if not connected */
1008 wait_event_interruptible(ep->rep_connect_wait,
1009 ep->rep_connected != 1);
1010 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1011 (ep->rep_connected == 1) ? "still " : "dis");
1012 } else {
1013 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1014 ep->rep_connected = rc;
1015 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001016}
1017
Chuck Lever13924022015-01-21 11:03:52 -05001018static struct rpcrdma_req *
1019rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
1020{
Chuck Lever13924022015-01-21 11:03:52 -05001021 struct rpcrdma_req *req;
Chuck Lever13924022015-01-21 11:03:52 -05001022
Chuck Lever85275c82015-01-21 11:04:16 -05001023 req = kzalloc(sizeof(*req), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001024 if (req == NULL)
Chuck Lever85275c82015-01-21 11:04:16 -05001025 return ERR_PTR(-ENOMEM);
Chuck Lever13924022015-01-21 11:03:52 -05001026
Chuck Lever13924022015-01-21 11:03:52 -05001027 req->rl_buffer = &r_xprt->rx_buf;
1028 return req;
Chuck Lever13924022015-01-21 11:03:52 -05001029}
1030
1031static struct rpcrdma_rep *
1032rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
1033{
1034 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever13924022015-01-21 11:03:52 -05001035 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1036 struct rpcrdma_rep *rep;
1037 int rc;
1038
1039 rc = -ENOMEM;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001040 rep = kzalloc(sizeof(*rep), GFP_KERNEL);
Chuck Lever13924022015-01-21 11:03:52 -05001041 if (rep == NULL)
1042 goto out;
Chuck Lever13924022015-01-21 11:03:52 -05001043
Chuck Lever6b1184c2015-01-21 11:04:25 -05001044 rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize,
1045 GFP_KERNEL);
1046 if (IS_ERR(rep->rr_rdmabuf)) {
1047 rc = PTR_ERR(rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001048 goto out_free;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001049 }
Chuck Lever13924022015-01-21 11:03:52 -05001050
Chuck Lever89e0d1122015-05-26 11:51:56 -04001051 rep->rr_device = ia->ri_device;
Chuck Leverfed171b2015-05-26 11:51:37 -04001052 rep->rr_rxprt = r_xprt;
Chuck Lever13924022015-01-21 11:03:52 -05001053 return rep;
1054
1055out_free:
1056 kfree(rep);
1057out:
1058 return ERR_PTR(rc);
1059}
1060
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001061int
Chuck Leverac920d02015-01-21 11:03:44 -05001062rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001063{
Chuck Leverac920d02015-01-21 11:03:44 -05001064 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1065 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1066 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001067 char *p;
Chuck Lever13924022015-01-21 11:03:52 -05001068 size_t len;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001069 int i, rc;
1070
1071 buf->rb_max_requests = cdata->max_requests;
1072 spin_lock_init(&buf->rb_lock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001073
1074 /* Need to allocate:
1075 * 1. arrays for send and recv pointers
1076 * 2. arrays of struct rpcrdma_req to fill in pointers
1077 * 3. array of struct rpcrdma_rep for replies
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001078 * Send/recv buffers in req/rep need to be registered
1079 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001080 len = buf->rb_max_requests *
1081 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001082
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001083 p = kzalloc(len, GFP_KERNEL);
1084 if (p == NULL) {
1085 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1086 __func__, len);
1087 rc = -ENOMEM;
1088 goto out;
1089 }
1090 buf->rb_pool = p; /* for freeing it later */
1091
1092 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1093 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1094 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1095 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1096
Chuck Lever91e70e72015-03-30 14:34:58 -04001097 rc = ia->ri_ops->ro_init(r_xprt);
1098 if (rc)
1099 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001100
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001101 for (i = 0; i < buf->rb_max_requests; i++) {
1102 struct rpcrdma_req *req;
1103 struct rpcrdma_rep *rep;
1104
Chuck Lever13924022015-01-21 11:03:52 -05001105 req = rpcrdma_create_req(r_xprt);
1106 if (IS_ERR(req)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001107 dprintk("RPC: %s: request buffer %d alloc"
1108 " failed\n", __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001109 rc = PTR_ERR(req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001110 goto out;
1111 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001112 buf->rb_send_bufs[i] = req;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001113
Chuck Lever13924022015-01-21 11:03:52 -05001114 rep = rpcrdma_create_rep(r_xprt);
1115 if (IS_ERR(rep)) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001116 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1117 __func__, i);
Chuck Lever13924022015-01-21 11:03:52 -05001118 rc = PTR_ERR(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001119 goto out;
1120 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001121 buf->rb_recv_bufs[i] = rep;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001122 }
Chuck Lever13924022015-01-21 11:03:52 -05001123
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001124 return 0;
1125out:
1126 rpcrdma_buffer_destroy(buf);
1127 return rc;
1128}
1129
Chuck Lever2e845222014-07-29 17:25:38 -04001130static void
Chuck Lever13924022015-01-21 11:03:52 -05001131rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
1132{
1133 if (!rep)
1134 return;
1135
Chuck Lever6b1184c2015-01-21 11:04:25 -05001136 rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001137 kfree(rep);
1138}
1139
1140static void
1141rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
1142{
1143 if (!req)
1144 return;
1145
Chuck Lever0ca77dc2015-01-21 11:04:08 -05001146 rpcrdma_free_regbuf(ia, req->rl_sendbuf);
Chuck Lever85275c82015-01-21 11:04:16 -05001147 rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
Chuck Lever13924022015-01-21 11:03:52 -05001148 kfree(req);
1149}
1150
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001151void
1152rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1153{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001154 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001155 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001156
1157 /* clean up in reverse order from create
1158 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001159 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001160 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001161 */
1162 dprintk("RPC: %s: entering\n", __func__);
1163
1164 for (i = 0; i < buf->rb_max_requests; i++) {
Chuck Lever13924022015-01-21 11:03:52 -05001165 if (buf->rb_recv_bufs)
1166 rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
1167 if (buf->rb_send_bufs)
1168 rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001169 }
1170
Chuck Lever4561f342015-03-30 14:35:17 -04001171 ia->ri_ops->ro_destroy(buf);
Allen Andrews4034ba02014-05-28 10:32:09 -04001172
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001173 kfree(buf->rb_pool);
1174}
1175
Chuck Lever346aa662015-05-26 11:52:06 -04001176struct rpcrdma_mw *
1177rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
1178{
1179 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1180 struct rpcrdma_mw *mw = NULL;
1181 unsigned long flags;
1182
1183 spin_lock_irqsave(&buf->rb_lock, flags);
1184 if (!list_empty(&buf->rb_mws)) {
1185 mw = list_first_entry(&buf->rb_mws,
1186 struct rpcrdma_mw, mw_list);
1187 list_del_init(&mw->mw_list);
1188 }
1189 spin_unlock_irqrestore(&buf->rb_lock, flags);
1190
1191 if (!mw)
1192 pr_err("RPC: %s: no MWs available\n", __func__);
1193 return mw;
1194}
1195
1196void
1197rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
1198{
1199 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1200 unsigned long flags;
1201
1202 spin_lock_irqsave(&buf->rb_lock, flags);
1203 list_add_tail(&mw->mw_list, &buf->rb_mws);
1204 spin_unlock_irqrestore(&buf->rb_lock, flags);
1205}
1206
Chuck Leverc2922c02014-07-29 17:24:36 -04001207/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1208 * some req segments uninitialized.
1209 */
1210static void
1211rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1212{
1213 if (*mw) {
1214 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1215 *mw = NULL;
1216 }
1217}
1218
1219/* Cycle mw's back in reverse order, and "spin" them.
1220 * This delays and scrambles reuse as much as possible.
1221 */
1222static void
1223rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1224{
1225 struct rpcrdma_mr_seg *seg = req->rl_segments;
1226 struct rpcrdma_mr_seg *seg1 = seg;
1227 int i;
1228
1229 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
Chuck Lever3eb35812015-01-21 11:02:54 -05001230 rpcrdma_buffer_put_mr(&seg->rl_mw, buf);
1231 rpcrdma_buffer_put_mr(&seg1->rl_mw, buf);
Chuck Leverc2922c02014-07-29 17:24:36 -04001232}
1233
1234static void
1235rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1236{
1237 buf->rb_send_bufs[--buf->rb_send_index] = req;
1238 req->rl_niovs = 0;
1239 if (req->rl_reply) {
1240 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
Chuck Leverc2922c02014-07-29 17:24:36 -04001241 req->rl_reply = NULL;
1242 }
1243}
1244
Chuck Lever6814bae2015-03-30 14:34:48 -04001245/* rpcrdma_unmap_one() was already done during deregistration.
Chuck Leverddb6beb2014-07-29 17:24:54 -04001246 * Redo only the ib_post_send().
1247 */
1248static void
1249rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1250{
1251 struct rpcrdma_xprt *r_xprt =
1252 container_of(ia, struct rpcrdma_xprt, rx_ia);
1253 struct ib_send_wr invalidate_wr, *bad_wr;
1254 int rc;
1255
1256 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1257
1258 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001259 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001260
1261 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1262 invalidate_wr.wr_id = (unsigned long)(void *)r;
1263 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001264 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1265 DECR_CQCOUNT(&r_xprt->rx_ep);
1266
1267 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1268 __func__, r, r->r.frmr.fr_mr->rkey);
1269
1270 read_lock(&ia->ri_qplock);
1271 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1272 read_unlock(&ia->ri_qplock);
1273 if (rc) {
1274 /* Force rpcrdma_buffer_get() to retry */
1275 r->r.frmr.fr_state = FRMR_IS_STALE;
1276 dprintk("RPC: %s: ib_post_send failed, %i\n",
1277 __func__, rc);
1278 }
1279}
1280
1281static void
1282rpcrdma_retry_flushed_linv(struct list_head *stale,
1283 struct rpcrdma_buffer *buf)
1284{
1285 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1286 struct list_head *pos;
1287 struct rpcrdma_mw *r;
1288 unsigned long flags;
1289
1290 list_for_each(pos, stale) {
1291 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1292 rpcrdma_retry_local_inv(r, ia);
1293 }
1294
1295 spin_lock_irqsave(&buf->rb_lock, flags);
1296 list_splice_tail(stale, &buf->rb_mws);
1297 spin_unlock_irqrestore(&buf->rb_lock, flags);
1298}
1299
Chuck Leverc2922c02014-07-29 17:24:36 -04001300static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001301rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1302 struct list_head *stale)
1303{
1304 struct rpcrdma_mw *r;
1305 int i;
1306
1307 i = RPCRDMA_MAX_SEGS - 1;
1308 while (!list_empty(&buf->rb_mws)) {
1309 r = list_entry(buf->rb_mws.next,
1310 struct rpcrdma_mw, mw_list);
1311 list_del(&r->mw_list);
1312 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1313 list_add(&r->mw_list, stale);
1314 continue;
1315 }
Chuck Lever3eb35812015-01-21 11:02:54 -05001316 req->rl_segments[i].rl_mw = r;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001317 if (unlikely(i-- == 0))
1318 return req; /* Success */
1319 }
1320
1321 /* Not enough entries on rb_mws for this req */
1322 rpcrdma_buffer_put_sendbuf(req, buf);
1323 rpcrdma_buffer_put_mrs(req, buf);
1324 return NULL;
1325}
1326
1327static struct rpcrdma_req *
1328rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001329{
1330 struct rpcrdma_mw *r;
1331 int i;
1332
1333 i = RPCRDMA_MAX_SEGS - 1;
1334 while (!list_empty(&buf->rb_mws)) {
1335 r = list_entry(buf->rb_mws.next,
1336 struct rpcrdma_mw, mw_list);
1337 list_del(&r->mw_list);
Chuck Lever3eb35812015-01-21 11:02:54 -05001338 req->rl_segments[i].rl_mw = r;
Chuck Leverc2922c02014-07-29 17:24:36 -04001339 if (unlikely(i-- == 0))
1340 return req; /* Success */
1341 }
1342
1343 /* Not enough entries on rb_mws for this req */
1344 rpcrdma_buffer_put_sendbuf(req, buf);
1345 rpcrdma_buffer_put_mrs(req, buf);
1346 return NULL;
1347}
1348
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001349/*
1350 * Get a set of request/reply buffers.
1351 *
1352 * Reply buffer (if needed) is attached to send buffer upon return.
1353 * Rule:
1354 * rb_send_index and rb_recv_index MUST always be pointing to the
1355 * *next* available buffer (non-NULL). They are incremented after
1356 * removing buffers, and decremented *before* returning them.
1357 */
1358struct rpcrdma_req *
1359rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1360{
Chuck Leverc2922c02014-07-29 17:24:36 -04001361 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001362 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001363 struct rpcrdma_req *req;
1364 unsigned long flags;
1365
1366 spin_lock_irqsave(&buffers->rb_lock, flags);
1367 if (buffers->rb_send_index == buffers->rb_max_requests) {
1368 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1369 dprintk("RPC: %s: out of request buffers\n", __func__);
1370 return ((struct rpcrdma_req *)NULL);
1371 }
1372
1373 req = buffers->rb_send_bufs[buffers->rb_send_index];
1374 if (buffers->rb_send_index < buffers->rb_recv_index) {
1375 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1376 __func__,
1377 buffers->rb_recv_index - buffers->rb_send_index);
1378 req->rl_reply = NULL;
1379 } else {
1380 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1381 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1382 }
1383 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001384
1385 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001386 switch (ia->ri_memreg_strategy) {
1387 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001388 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1389 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001390 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001391 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001392 break;
1393 default:
1394 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001395 }
1396 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001397 if (!list_empty(&stale))
1398 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001399 return req;
1400}
1401
1402/*
1403 * Put request/reply buffers back into pool.
1404 * Pre-decrement counter/array index.
1405 */
1406void
1407rpcrdma_buffer_put(struct rpcrdma_req *req)
1408{
1409 struct rpcrdma_buffer *buffers = req->rl_buffer;
1410 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001411 unsigned long flags;
1412
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001413 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001414 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001415 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001416 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001417 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001418 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001419 break;
1420 default:
1421 break;
1422 }
1423 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1424}
1425
1426/*
1427 * Recover reply buffers from pool.
1428 * This happens when recovering from error conditions.
1429 * Post-increment counter/array index.
1430 */
1431void
1432rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1433{
1434 struct rpcrdma_buffer *buffers = req->rl_buffer;
1435 unsigned long flags;
1436
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001437 spin_lock_irqsave(&buffers->rb_lock, flags);
1438 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1439 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1440 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1441 }
1442 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1443}
1444
1445/*
1446 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001447 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001448 */
1449void
1450rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1451{
Chuck Leverfed171b2015-05-26 11:51:37 -04001452 struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001453 unsigned long flags;
1454
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001455 spin_lock_irqsave(&buffers->rb_lock, flags);
1456 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1457 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1458}
1459
1460/*
1461 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1462 */
1463
Chuck Leverd6547882015-03-30 14:35:44 -04001464void
1465rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
1466{
1467 dprintk("RPC: map_one: offset %p iova %llx len %zu\n",
1468 seg->mr_offset,
1469 (unsigned long long)seg->mr_dma, seg->mr_dmalen);
1470}
1471
Chuck Leverdf515ca2015-01-21 11:04:41 -05001472static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001473rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1474 struct ib_mr **mrp, struct ib_sge *iov)
1475{
1476 struct ib_phys_buf ipb;
1477 struct ib_mr *mr;
1478 int rc;
1479
1480 /*
1481 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1482 */
Chuck Lever89e0d1122015-05-26 11:51:56 -04001483 iov->addr = ib_dma_map_single(ia->ri_device,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001484 va, len, DMA_BIDIRECTIONAL);
Chuck Lever89e0d1122015-05-26 11:51:56 -04001485 if (ib_dma_mapping_error(ia->ri_device, iov->addr))
Yan Burmanbf858ab2014-06-19 16:06:30 +03001486 return -ENOMEM;
1487
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001488 iov->length = len;
1489
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001490 if (ia->ri_have_dma_lkey) {
1491 *mrp = NULL;
1492 iov->lkey = ia->ri_dma_lkey;
1493 return 0;
1494 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001495 *mrp = NULL;
1496 iov->lkey = ia->ri_bind_mem->lkey;
1497 return 0;
1498 }
1499
1500 ipb.addr = iov->addr;
1501 ipb.size = iov->length;
1502 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1503 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1504
1505 dprintk("RPC: %s: phys convert: 0x%llx "
1506 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001507 __func__, (unsigned long long)ipb.addr,
1508 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001509
1510 if (IS_ERR(mr)) {
1511 *mrp = NULL;
1512 rc = PTR_ERR(mr);
1513 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1514 } else {
1515 *mrp = mr;
1516 iov->lkey = mr->lkey;
1517 rc = 0;
1518 }
1519
1520 return rc;
1521}
1522
Chuck Leverdf515ca2015-01-21 11:04:41 -05001523static int
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001524rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1525 struct ib_mr *mr, struct ib_sge *iov)
1526{
1527 int rc;
1528
Chuck Lever89e0d1122015-05-26 11:51:56 -04001529 ib_dma_unmap_single(ia->ri_device,
1530 iov->addr, iov->length, DMA_BIDIRECTIONAL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001531
1532 if (NULL == mr)
1533 return 0;
1534
1535 rc = ib_dereg_mr(mr);
1536 if (rc)
1537 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1538 return rc;
1539}
1540
Chuck Lever9128c3e2015-01-21 11:04:00 -05001541/**
1542 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1543 * @ia: controlling rpcrdma_ia
1544 * @size: size of buffer to be allocated, in bytes
1545 * @flags: GFP flags
1546 *
1547 * Returns pointer to private header of an area of internally
1548 * registered memory, or an ERR_PTR. The registered buffer follows
1549 * the end of the private header.
1550 *
1551 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1552 * receiving the payload of RDMA RECV operations. regbufs are not
1553 * used for RDMA READ/WRITE operations, thus are registered only for
1554 * LOCAL access.
1555 */
1556struct rpcrdma_regbuf *
1557rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
1558{
1559 struct rpcrdma_regbuf *rb;
1560 int rc;
1561
1562 rc = -ENOMEM;
1563 rb = kmalloc(sizeof(*rb) + size, flags);
1564 if (rb == NULL)
1565 goto out;
1566
1567 rb->rg_size = size;
1568 rb->rg_owner = NULL;
1569 rc = rpcrdma_register_internal(ia, rb->rg_base, size,
1570 &rb->rg_mr, &rb->rg_iov);
1571 if (rc)
1572 goto out_free;
1573
1574 return rb;
1575
1576out_free:
1577 kfree(rb);
1578out:
1579 return ERR_PTR(rc);
1580}
1581
1582/**
1583 * rpcrdma_free_regbuf - deregister and free registered buffer
1584 * @ia: controlling rpcrdma_ia
1585 * @rb: regbuf to be deregistered and freed
1586 */
1587void
1588rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1589{
1590 if (rb) {
1591 rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
1592 kfree(rb);
1593 }
1594}
1595
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001596/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001597 * Prepost any receive buffer, then post send.
1598 *
1599 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1600 */
1601int
1602rpcrdma_ep_post(struct rpcrdma_ia *ia,
1603 struct rpcrdma_ep *ep,
1604 struct rpcrdma_req *req)
1605{
1606 struct ib_send_wr send_wr, *send_wr_fail;
1607 struct rpcrdma_rep *rep = req->rl_reply;
1608 int rc;
1609
1610 if (rep) {
1611 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1612 if (rc)
1613 goto out;
1614 req->rl_reply = NULL;
1615 }
1616
1617 send_wr.next = NULL;
Chuck Levere46ac342015-03-30 14:35:35 -04001618 send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001619 send_wr.sg_list = req->rl_send_iov;
1620 send_wr.num_sge = req->rl_niovs;
1621 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001622 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
Chuck Lever89e0d1122015-05-26 11:51:56 -04001623 ib_dma_sync_single_for_device(ia->ri_device,
1624 req->rl_send_iov[3].addr,
1625 req->rl_send_iov[3].length,
1626 DMA_TO_DEVICE);
1627 ib_dma_sync_single_for_device(ia->ri_device,
1628 req->rl_send_iov[1].addr,
1629 req->rl_send_iov[1].length,
1630 DMA_TO_DEVICE);
1631 ib_dma_sync_single_for_device(ia->ri_device,
1632 req->rl_send_iov[0].addr,
1633 req->rl_send_iov[0].length,
1634 DMA_TO_DEVICE);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001635
1636 if (DECR_CQCOUNT(ep) > 0)
1637 send_wr.send_flags = 0;
1638 else { /* Provider must take a send completion every now and then */
1639 INIT_CQCOUNT(ep);
1640 send_wr.send_flags = IB_SEND_SIGNALED;
1641 }
1642
1643 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1644 if (rc)
1645 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1646 rc);
1647out:
1648 return rc;
1649}
1650
1651/*
1652 * (Re)post a receive buffer.
1653 */
1654int
1655rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1656 struct rpcrdma_ep *ep,
1657 struct rpcrdma_rep *rep)
1658{
1659 struct ib_recv_wr recv_wr, *recv_wr_fail;
1660 int rc;
1661
1662 recv_wr.next = NULL;
1663 recv_wr.wr_id = (u64) (unsigned long) rep;
Chuck Lever6b1184c2015-01-21 11:04:25 -05001664 recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001665 recv_wr.num_sge = 1;
1666
Chuck Lever89e0d1122015-05-26 11:51:56 -04001667 ib_dma_sync_single_for_cpu(ia->ri_device,
Chuck Lever6b1184c2015-01-21 11:04:25 -05001668 rdmab_addr(rep->rr_rdmabuf),
1669 rdmab_length(rep->rr_rdmabuf),
1670 DMA_BIDIRECTIONAL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001671
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001672 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1673
1674 if (rc)
1675 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1676 rc);
1677 return rc;
1678}
Chuck Lever43e95982014-07-29 17:23:34 -04001679
Chuck Lever1c9351e2015-03-30 14:34:30 -04001680/* How many chunk list items fit within our inline buffers?
Chuck Lever43e95982014-07-29 17:23:34 -04001681 */
Chuck Lever1c9351e2015-03-30 14:34:30 -04001682unsigned int
1683rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
Chuck Lever43e95982014-07-29 17:23:34 -04001684{
1685 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
Chuck Lever1c9351e2015-03-30 14:34:30 -04001686 int bytes, segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001687
Chuck Lever1c9351e2015-03-30 14:34:30 -04001688 bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
1689 bytes -= RPCRDMA_HDRLEN_MIN;
1690 if (bytes < sizeof(struct rpcrdma_segment) * 2) {
1691 pr_warn("RPC: %s: inline threshold too small\n",
1692 __func__);
1693 return 0;
Chuck Lever43e95982014-07-29 17:23:34 -04001694 }
Chuck Lever1c9351e2015-03-30 14:34:30 -04001695
1696 segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
1697 dprintk("RPC: %s: max chunk list size = %d segments\n",
1698 __func__, segments);
1699 return segments;
Chuck Lever43e95982014-07-29 17:23:34 -04001700}