blob: e6ac9643fe563345e3af582af7caa0a5f01da5db [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
Chuck Lever9f9d8022014-07-29 17:24:45 -040064static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
Chuck Lever467c9672014-11-08 20:14:29 -050065static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
Chuck Lever9f9d8022014-07-29 17:24:45 -040066
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040067/*
68 * internal functions
69 */
70
71/*
72 * handle replies in tasklet context, using a single, global list
73 * rdma tasklet function -- just turn around and call the func
74 * for all replies on the list
75 */
76
77static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
78static LIST_HEAD(rpcrdma_tasklets_g);
79
80static void
81rpcrdma_run_tasklet(unsigned long data)
82{
83 struct rpcrdma_rep *rep;
84 void (*func)(struct rpcrdma_rep *);
85 unsigned long flags;
86
87 data = data;
88 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
89 while (!list_empty(&rpcrdma_tasklets_g)) {
90 rep = list_entry(rpcrdma_tasklets_g.next,
91 struct rpcrdma_rep, rr_list);
92 list_del(&rep->rr_list);
93 func = rep->rr_func;
94 rep->rr_func = NULL;
95 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
96
97 if (func)
98 func(rep);
99 else
100 rpcrdma_recv_buffer_put(rep);
101
102 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
103 }
104 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
105}
106
107static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
108
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400109static void
Chuck Leverf1a03b72014-11-08 20:14:37 -0500110rpcrdma_schedule_tasklet(struct list_head *sched_list)
111{
112 unsigned long flags;
113
114 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
115 list_splice_tail(sched_list, &rpcrdma_tasklets_g);
116 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
117 tasklet_schedule(&rpcrdma_tasklet_g);
118}
119
120static void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400121rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
122{
123 struct rpcrdma_ep *ep = context;
124
125 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
126 __func__, event->event, event->device->name, context);
127 if (ep->rep_connected == 1) {
128 ep->rep_connected = -EIO;
129 ep->rep_func(ep);
130 wake_up_all(&ep->rep_connect_wait);
131 }
132}
133
134static void
135rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
136{
137 struct rpcrdma_ep *ep = context;
138
139 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
140 __func__, event->event, event->device->name, context);
141 if (ep->rep_connected == 1) {
142 ep->rep_connected = -EIO;
143 ep->rep_func(ep);
144 wake_up_all(&ep->rep_connect_wait);
145 }
146}
147
Chuck Leverfc664482014-05-28 10:33:25 -0400148static void
149rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400150{
Chuck Leverfc664482014-05-28 10:33:25 -0400151 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400152
Chuck Leverfc664482014-05-28 10:33:25 -0400153 dprintk("RPC: %s: frmr %p status %X opcode %d\n",
154 __func__, frmr, wc->status, wc->opcode);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400155
Chuck Leverfc664482014-05-28 10:33:25 -0400156 if (wc->wr_id == 0ULL)
157 return;
Chuck Leverdab7e3b2014-07-29 17:25:20 -0400158 if (wc->status != IB_WC_SUCCESS)
Chuck Lever9f9d8022014-07-29 17:24:45 -0400159 frmr->r.frmr.fr_state = FRMR_IS_STALE;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400160}
161
Chuck Leverfc664482014-05-28 10:33:25 -0400162static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400163rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400164{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400165 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400166 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400167
Chuck Lever8301a2c2014-05-28 10:33:51 -0400168 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400169 do {
170 wcs = ep->rep_send_wcs;
171
172 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
173 if (rc <= 0)
174 return rc;
175
176 count = rc;
177 while (count-- > 0)
178 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400179 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400180 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400181}
182
183/*
Chuck Leverfc664482014-05-28 10:33:25 -0400184 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400185 *
Chuck Leverfc664482014-05-28 10:33:25 -0400186 * Send events are typically suppressed and thus do not result
187 * in an upcall. Occasionally one is signaled, however. This
188 * prevents the provider's completion queue from wrapping and
189 * losing a completion.
190 */
191static void
192rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
193{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400194 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400195 int rc;
196
Chuck Lever1c00dd02014-05-28 10:33:42 -0400197 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400198 if (rc) {
199 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
200 __func__, rc);
201 return;
202 }
203
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400204 rc = ib_req_notify_cq(cq,
205 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
206 if (rc == 0)
207 return;
208 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400209 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
210 __func__, rc);
211 return;
212 }
213
Chuck Lever1c00dd02014-05-28 10:33:42 -0400214 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400215}
216
217static void
Chuck Leverbb961932014-07-29 17:25:46 -0400218rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400219{
220 struct rpcrdma_rep *rep =
221 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
222
223 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
224 __func__, rep, wc->status, wc->opcode, wc->byte_len);
225
226 if (wc->status != IB_WC_SUCCESS) {
227 rep->rr_len = ~0U;
228 goto out_schedule;
229 }
230 if (wc->opcode != IB_WC_RECV)
231 return;
232
233 rep->rr_len = wc->byte_len;
234 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
235 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
236
237 if (rep->rr_len >= 16) {
238 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
239 unsigned int credits = ntohl(p->rm_credit);
240
241 if (credits == 0)
242 credits = 1; /* don't deadlock */
243 else if (credits > rep->rr_buffer->rb_max_requests)
244 credits = rep->rr_buffer->rb_max_requests;
245 atomic_set(&rep->rr_buffer->rb_credits, credits);
246 }
247
248out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400249 list_add_tail(&rep->rr_list, sched_list);
Chuck Leverfc664482014-05-28 10:33:25 -0400250}
251
252static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400253rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400254{
Chuck Leverbb961932014-07-29 17:25:46 -0400255 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400256 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400257 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400258
Chuck Leverbb961932014-07-29 17:25:46 -0400259 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400260 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400261 do {
262 wcs = ep->rep_recv_wcs;
263
264 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
265 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400266 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400267
268 count = rc;
269 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400270 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400271 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400272 rc = 0;
273
274out_schedule:
Chuck Leverf1a03b72014-11-08 20:14:37 -0500275 rpcrdma_schedule_tasklet(&sched_list);
Chuck Leverbb961932014-07-29 17:25:46 -0400276 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400277}
278
279/*
280 * Handle receive completions.
281 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400282 * It is reentrant but processes single events in order to maintain
283 * ordering of receives to keep server credits.
284 *
285 * It is the responsibility of the scheduled tasklet to return
286 * recv buffers to the pool. NOTE: this affects synchronization of
287 * connection shutdown. That is, the structures required for
288 * the completion of the reply handler must remain intact until
289 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400290 */
291static void
Chuck Leverfc664482014-05-28 10:33:25 -0400292rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400293{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400294 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400295 int rc;
296
Chuck Lever1c00dd02014-05-28 10:33:42 -0400297 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400298 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400299 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400300 __func__, rc);
301 return;
302 }
303
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400304 rc = ib_req_notify_cq(cq,
305 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
306 if (rc == 0)
307 return;
308 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400309 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
310 __func__, rc);
311 return;
312 }
313
Chuck Lever1c00dd02014-05-28 10:33:42 -0400314 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400315}
316
Chuck Levera7bc2112014-07-29 17:23:52 -0400317static void
318rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
319{
Chuck Lever5c166be2014-11-08 20:14:45 -0500320 struct ib_wc wc;
321 LIST_HEAD(sched_list);
322
323 while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
324 rpcrdma_recvcq_process_wc(&wc, &sched_list);
325 if (!list_empty(&sched_list))
326 rpcrdma_schedule_tasklet(&sched_list);
327 while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
328 rpcrdma_sendcq_process_wc(&wc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400329}
330
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400331#ifdef RPC_DEBUG
332static const char * const conn[] = {
333 "address resolved",
334 "address error",
335 "route resolved",
336 "route error",
337 "connect request",
338 "connect response",
339 "connect error",
340 "unreachable",
341 "rejected",
342 "established",
343 "disconnected",
Chuck Lever8079fb72014-07-29 17:26:12 -0400344 "device removal",
345 "multicast join",
346 "multicast error",
347 "address change",
348 "timewait exit",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400349};
Chuck Lever8079fb72014-07-29 17:26:12 -0400350
351#define CONNECTION_MSG(status) \
352 ((status) < ARRAY_SIZE(conn) ? \
353 conn[(status)] : "unrecognized connection error")
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400354#endif
355
356static int
357rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
358{
359 struct rpcrdma_xprt *xprt = id->context;
360 struct rpcrdma_ia *ia = &xprt->rx_ia;
361 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800362#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400363 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800364#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400365 struct ib_qp_attr attr;
366 struct ib_qp_init_attr iattr;
367 int connstate = 0;
368
369 switch (event->event) {
370 case RDMA_CM_EVENT_ADDR_RESOLVED:
371 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400372 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400373 complete(&ia->ri_done);
374 break;
375 case RDMA_CM_EVENT_ADDR_ERROR:
376 ia->ri_async_rc = -EHOSTUNREACH;
377 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
378 __func__, ep);
379 complete(&ia->ri_done);
380 break;
381 case RDMA_CM_EVENT_ROUTE_ERROR:
382 ia->ri_async_rc = -ENETUNREACH;
383 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
384 __func__, ep);
385 complete(&ia->ri_done);
386 break;
387 case RDMA_CM_EVENT_ESTABLISHED:
388 connstate = 1;
389 ib_query_qp(ia->ri_id->qp, &attr,
390 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
391 &iattr);
392 dprintk("RPC: %s: %d responder resources"
393 " (%d initiator)\n",
394 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
395 goto connected;
396 case RDMA_CM_EVENT_CONNECT_ERROR:
397 connstate = -ENOTCONN;
398 goto connected;
399 case RDMA_CM_EVENT_UNREACHABLE:
400 connstate = -ENETDOWN;
401 goto connected;
402 case RDMA_CM_EVENT_REJECTED:
403 connstate = -ECONNREFUSED;
404 goto connected;
405 case RDMA_CM_EVENT_DISCONNECTED:
406 connstate = -ECONNABORTED;
407 goto connected;
408 case RDMA_CM_EVENT_DEVICE_REMOVAL:
409 connstate = -ENODEV;
410connected:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400411 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
412 dprintk("RPC: %s: %sconnected\n",
413 __func__, connstate > 0 ? "" : "dis");
414 ep->rep_connected = connstate;
415 ep->rep_func(ep);
416 wake_up_all(&ep->rep_connect_wait);
Chuck Lever8079fb72014-07-29 17:26:12 -0400417 /*FALLTHROUGH*/
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400418 default:
Chuck Lever8079fb72014-07-29 17:26:12 -0400419 dprintk("RPC: %s: %pI4:%u (ep 0x%p): %s\n",
420 __func__, &addr->sin_addr.s_addr,
421 ntohs(addr->sin_port), ep,
422 CONNECTION_MSG(event->event));
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400423 break;
424 }
425
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400426#ifdef RPC_DEBUG
427 if (connstate == 1) {
428 int ird = attr.max_dest_rd_atomic;
429 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700430 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400431 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700432 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400433 ntohs(addr->sin_port),
434 ia->ri_id->device->name,
435 ia->ri_memreg_strategy,
436 xprt->rx_buf.rb_max_requests,
437 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
438 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700439 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
440 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400441 ntohs(addr->sin_port),
442 connstate);
443 }
444#endif
445
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400446 return 0;
447}
448
449static struct rdma_cm_id *
450rpcrdma_create_id(struct rpcrdma_xprt *xprt,
451 struct rpcrdma_ia *ia, struct sockaddr *addr)
452{
453 struct rdma_cm_id *id;
454 int rc;
455
Tom Talpey1a954052008-10-09 15:01:31 -0400456 init_completion(&ia->ri_done);
457
Sean Heftyb26f9b92010-04-01 17:08:41 +0000458 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400459 if (IS_ERR(id)) {
460 rc = PTR_ERR(id);
461 dprintk("RPC: %s: rdma_create_id() failed %i\n",
462 __func__, rc);
463 return id;
464 }
465
Tom Talpey5675add2008-10-09 15:01:41 -0400466 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400467 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
468 if (rc) {
469 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
470 __func__, rc);
471 goto out;
472 }
Tom Talpey5675add2008-10-09 15:01:41 -0400473 wait_for_completion_interruptible_timeout(&ia->ri_done,
474 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400475 rc = ia->ri_async_rc;
476 if (rc)
477 goto out;
478
Tom Talpey5675add2008-10-09 15:01:41 -0400479 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400480 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
481 if (rc) {
482 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
483 __func__, rc);
484 goto out;
485 }
Tom Talpey5675add2008-10-09 15:01:41 -0400486 wait_for_completion_interruptible_timeout(&ia->ri_done,
487 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400488 rc = ia->ri_async_rc;
489 if (rc)
490 goto out;
491
492 return id;
493
494out:
495 rdma_destroy_id(id);
496 return ERR_PTR(rc);
497}
498
499/*
500 * Drain any cq, prior to teardown.
501 */
502static void
503rpcrdma_clean_cq(struct ib_cq *cq)
504{
505 struct ib_wc wc;
506 int count = 0;
507
508 while (1 == ib_poll_cq(cq, 1, &wc))
509 ++count;
510
511 if (count)
512 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
513 __func__, count, wc.opcode);
514}
515
516/*
517 * Exported functions.
518 */
519
520/*
521 * Open and initialize an Interface Adapter.
522 * o initializes fields of struct rpcrdma_ia, including
523 * interface and provider attributes and protection zone.
524 */
525int
526rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
527{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400528 int rc, mem_priv;
529 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400530 struct rpcrdma_ia *ia = &xprt->rx_ia;
531
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400532 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
533 if (IS_ERR(ia->ri_id)) {
534 rc = PTR_ERR(ia->ri_id);
535 goto out1;
536 }
537
538 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
539 if (IS_ERR(ia->ri_pd)) {
540 rc = PTR_ERR(ia->ri_pd);
541 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
542 __func__, rc);
543 goto out2;
544 }
545
546 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400547 * Query the device to determine if the requested memory
548 * registration strategy is supported. If it isn't, set the
549 * strategy to a globally supported model.
550 */
551 rc = ib_query_device(ia->ri_id->device, &devattr);
552 if (rc) {
553 dprintk("RPC: %s: ib_query_device failed %d\n",
554 __func__, rc);
555 goto out2;
556 }
557
558 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
559 ia->ri_have_dma_lkey = 1;
560 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
561 }
562
Chuck Leverf10eafd2014-05-28 10:32:51 -0400563 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400564 /* Requires both frmr reg and local dma lkey */
565 if ((devattr.device_cap_flags &
566 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
567 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400568 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400569 "not supported by HCA\n", __func__);
570 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400571 } else {
572 /* Mind the ia limit on FRMR page list depth */
573 ia->ri_max_frmr_depth = min_t(unsigned int,
574 RPCRDMA_MAX_DATA_SEGS,
575 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400576 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400577 }
578 if (memreg == RPCRDMA_MTHCAFMR) {
579 if (!ia->ri_id->device->alloc_fmr) {
580 dprintk("RPC: %s: MTHCAFMR registration "
581 "not supported by HCA\n", __func__);
Chuck Leverf10eafd2014-05-28 10:32:51 -0400582 memreg = RPCRDMA_ALLPHYSICAL;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400583 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400584 }
585
586 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400587 * Optionally obtain an underlying physical identity mapping in
588 * order to do a memory window-based bind. This base registration
589 * is protected from remote access - that is enabled only by binding
590 * for the specific bytes targeted during each RPC operation, and
591 * revoked after the corresponding completion similar to a storage
592 * adapter.
593 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400594 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400595 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400596 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400597 case RPCRDMA_ALLPHYSICAL:
598 mem_priv = IB_ACCESS_LOCAL_WRITE |
599 IB_ACCESS_REMOTE_WRITE |
600 IB_ACCESS_REMOTE_READ;
601 goto register_setup;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400602 case RPCRDMA_MTHCAFMR:
603 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400604 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400605 mem_priv = IB_ACCESS_LOCAL_WRITE;
606 register_setup:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400607 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
608 if (IS_ERR(ia->ri_bind_mem)) {
609 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400610 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400611 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400612 rc = -ENOMEM;
613 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400614 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400615 break;
616 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400617 printk(KERN_ERR "RPC: Unsupported memory "
618 "registration mode: %d\n", memreg);
619 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400620 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400621 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400622 dprintk("RPC: %s: memory registration strategy is %d\n",
623 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400624
625 /* Else will do memory reg/dereg for each chunk */
626 ia->ri_memreg_strategy = memreg;
627
Chuck Lever73806c82014-07-29 17:23:25 -0400628 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400629 return 0;
630out2:
631 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400632 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400633out1:
634 return rc;
635}
636
637/*
638 * Clean up/close an IA.
639 * o if event handles and PD have been initialized, free them.
640 * o close the IA
641 */
642void
643rpcrdma_ia_close(struct rpcrdma_ia *ia)
644{
645 int rc;
646
647 dprintk("RPC: %s: entering\n", __func__);
648 if (ia->ri_bind_mem != NULL) {
649 rc = ib_dereg_mr(ia->ri_bind_mem);
650 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
651 __func__, rc);
652 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400653 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
654 if (ia->ri_id->qp)
655 rdma_destroy_qp(ia->ri_id);
656 rdma_destroy_id(ia->ri_id);
657 ia->ri_id = NULL;
658 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400659 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
660 rc = ib_dealloc_pd(ia->ri_pd);
661 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
662 __func__, rc);
663 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400664}
665
666/*
667 * Create unconnected endpoint.
668 */
669int
670rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
671 struct rpcrdma_create_data_internal *cdata)
672{
673 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400674 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400675 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400676
677 rc = ib_query_device(ia->ri_id->device, &devattr);
678 if (rc) {
679 dprintk("RPC: %s: ib_query_device failed %d\n",
680 __func__, rc);
681 return rc;
682 }
683
684 /* check provider's send/recv wr limits */
685 if (cdata->max_requests > devattr.max_qp_wr)
686 cdata->max_requests = devattr.max_qp_wr;
687
688 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
689 ep->rep_attr.qp_context = ep;
690 /* send_cq and recv_cq initialized below */
691 ep->rep_attr.srq = NULL;
692 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
693 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400694 case RPCRDMA_FRMR: {
695 int depth = 7;
696
Tom Tucker15cdc6442010-08-11 12:47:24 -0400697 /* Add room for frmr register and invalidate WRs.
698 * 1. FRMR reg WR for head
699 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400700 * 3. N FRMR reg WRs for pagelist
701 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400702 * 5. FRMR reg WR for tail
703 * 6. FRMR invalidate WR for tail
704 * 7. The RDMA_SEND WR
705 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400706
707 /* Calculate N if the device max FRMR depth is smaller than
708 * RPCRDMA_MAX_DATA_SEGS.
709 */
710 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
711 int delta = RPCRDMA_MAX_DATA_SEGS -
712 ia->ri_max_frmr_depth;
713
714 do {
715 depth += 2; /* FRMR reg + invalidate */
716 delta -= ia->ri_max_frmr_depth;
717 } while (delta > 0);
718
719 }
720 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400721 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400722 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400723 if (!cdata->max_requests)
724 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400725 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
726 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400727 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400728 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400729 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400730 default:
731 break;
732 }
733 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
734 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
735 ep->rep_attr.cap.max_recv_sge = 1;
736 ep->rep_attr.cap.max_inline_data = 0;
737 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
738 ep->rep_attr.qp_type = IB_QPT_RC;
739 ep->rep_attr.port_num = ~0;
740
741 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
742 "iovs: send %d recv %d\n",
743 __func__,
744 ep->rep_attr.cap.max_send_wr,
745 ep->rep_attr.cap.max_recv_wr,
746 ep->rep_attr.cap.max_send_sge,
747 ep->rep_attr.cap.max_recv_sge);
748
749 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400750 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
Chuck Levere7104a22014-11-08 20:14:20 -0500751 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
752 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
753 else if (ep->rep_cqinit <= 2)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400754 ep->rep_cqinit = 0;
755 INIT_CQCOUNT(ep);
756 ep->rep_ia = ia;
757 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400758 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400759
Chuck Leverfc664482014-05-28 10:33:25 -0400760 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400761 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400762 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400763 if (IS_ERR(sendcq)) {
764 rc = PTR_ERR(sendcq);
765 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400766 __func__, rc);
767 goto out1;
768 }
769
Chuck Leverfc664482014-05-28 10:33:25 -0400770 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400771 if (rc) {
772 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
773 __func__, rc);
774 goto out2;
775 }
776
Chuck Leverfc664482014-05-28 10:33:25 -0400777 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400778 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400779 ep->rep_attr.cap.max_recv_wr + 1, 0);
780 if (IS_ERR(recvcq)) {
781 rc = PTR_ERR(recvcq);
782 dprintk("RPC: %s: failed to create recv CQ: %i\n",
783 __func__, rc);
784 goto out2;
785 }
786
787 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
788 if (rc) {
789 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
790 __func__, rc);
791 ib_destroy_cq(recvcq);
792 goto out2;
793 }
794
795 ep->rep_attr.send_cq = sendcq;
796 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400797
798 /* Initialize cma parameters */
799
800 /* RPC/RDMA does not use private data */
801 ep->rep_remote_cma.private_data = NULL;
802 ep->rep_remote_cma.private_data_len = 0;
803
804 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400805 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400806 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400807 ep->rep_remote_cma.responder_resources = 32;
808 else
809 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400810
811 ep->rep_remote_cma.retry_count = 7;
812 ep->rep_remote_cma.flow_control = 0;
813 ep->rep_remote_cma.rnr_retry_count = 0;
814
815 return 0;
816
817out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400818 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400819 if (err)
820 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
821 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400822out1:
823 return rc;
824}
825
826/*
827 * rpcrdma_ep_destroy
828 *
829 * Disconnect and destroy endpoint. After this, the only
830 * valid operations on the ep are to free it (if dynamically
831 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400832 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400833void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400834rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
835{
836 int rc;
837
838 dprintk("RPC: %s: entering, connected is %d\n",
839 __func__, ep->rep_connected);
840
Chuck Lever254f91e2014-05-28 10:32:17 -0400841 cancel_delayed_work_sync(&ep->rep_connect_worker);
842
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400843 if (ia->ri_id->qp) {
Chuck Lever282191c2014-07-29 17:25:55 -0400844 rpcrdma_ep_disconnect(ep, ia);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400845 rdma_destroy_qp(ia->ri_id);
846 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400847 }
848
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400849 /* padding - could be done in rpcrdma_buffer_destroy... */
850 if (ep->rep_pad_mr) {
851 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
852 ep->rep_pad_mr = NULL;
853 }
854
Chuck Leverfc664482014-05-28 10:33:25 -0400855 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
856 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
857 if (rc)
858 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
859 __func__, rc);
860
861 rpcrdma_clean_cq(ep->rep_attr.send_cq);
862 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400863 if (rc)
864 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
865 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400866}
867
868/*
869 * Connect unconnected endpoint.
870 */
871int
872rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
873{
Chuck Lever73806c82014-07-29 17:23:25 -0400874 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400875 int rc = 0;
876 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400877
Tom Talpeyc0555512008-10-10 11:32:45 -0400878 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400879 struct rpcrdma_xprt *xprt;
880retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400881 dprintk("RPC: %s: reconnecting...\n", __func__);
Chuck Lever282191c2014-07-29 17:25:55 -0400882
883 rpcrdma_ep_disconnect(ep, ia);
Chuck Levera7bc2112014-07-29 17:23:52 -0400884 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400885
Chuck Lever467c9672014-11-08 20:14:29 -0500886 switch (ia->ri_memreg_strategy) {
887 case RPCRDMA_FRMR:
Chuck Lever9f9d8022014-07-29 17:24:45 -0400888 rpcrdma_reset_frmrs(ia);
Chuck Lever467c9672014-11-08 20:14:29 -0500889 break;
890 case RPCRDMA_MTHCAFMR:
891 rpcrdma_reset_fmrs(ia);
892 break;
893 case RPCRDMA_ALLPHYSICAL:
894 break;
895 default:
896 rc = -EIO;
897 goto out;
898 }
Chuck Lever9f9d8022014-07-29 17:24:45 -0400899
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400900 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
901 id = rpcrdma_create_id(xprt, ia,
902 (struct sockaddr *)&xprt->rx_data.addr);
903 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400904 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400905 goto out;
906 }
907 /* TEMP TEMP TEMP - fail if new device:
908 * Deregister/remarshal *all* requests!
909 * Close and recreate adapter, pd, etc!
910 * Re-determine all attributes still sane!
911 * More stuff I haven't thought of!
912 * Rrrgh!
913 */
914 if (ia->ri_id->device != id->device) {
915 printk("RPC: %s: can't reconnect on "
916 "different device!\n", __func__);
917 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400918 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400919 goto out;
920 }
921 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400922 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
923 if (rc) {
924 dprintk("RPC: %s: rdma_create_qp failed %i\n",
925 __func__, rc);
926 rdma_destroy_id(id);
927 rc = -ENETUNREACH;
928 goto out;
929 }
Chuck Lever73806c82014-07-29 17:23:25 -0400930
931 write_lock(&ia->ri_qplock);
932 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400933 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400934 write_unlock(&ia->ri_qplock);
935
936 rdma_destroy_qp(old);
937 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400938 } else {
939 dprintk("RPC: %s: connecting...\n", __func__);
940 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
941 if (rc) {
942 dprintk("RPC: %s: rdma_create_qp failed %i\n",
943 __func__, rc);
944 /* do not update ep->rep_connected */
945 return -ENETUNREACH;
946 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400947 }
948
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400949 ep->rep_connected = 0;
950
951 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
952 if (rc) {
953 dprintk("RPC: %s: rdma_connect() failed with %i\n",
954 __func__, rc);
955 goto out;
956 }
957
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400958 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
959
960 /*
961 * Check state. A non-peer reject indicates no listener
962 * (ECONNREFUSED), which may be a transient state. All
963 * others indicate a transport condition which has already
964 * undergone a best-effort.
965 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800966 if (ep->rep_connected == -ECONNREFUSED &&
967 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400968 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
969 goto retry;
970 }
971 if (ep->rep_connected <= 0) {
972 /* Sometimes, the only way to reliably connect to remote
973 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400974 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
975 (ep->rep_remote_cma.responder_resources == 0 ||
976 ep->rep_remote_cma.initiator_depth !=
977 ep->rep_remote_cma.responder_resources)) {
978 if (ep->rep_remote_cma.responder_resources == 0)
979 ep->rep_remote_cma.responder_resources = 1;
980 ep->rep_remote_cma.initiator_depth =
981 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400982 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400983 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400984 rc = ep->rep_connected;
985 } else {
986 dprintk("RPC: %s: connected\n", __func__);
987 }
988
989out:
990 if (rc)
991 ep->rep_connected = rc;
992 return rc;
993}
994
995/*
996 * rpcrdma_ep_disconnect
997 *
998 * This is separate from destroy to facilitate the ability
999 * to reconnect without recreating the endpoint.
1000 *
1001 * This call is not reentrant, and must not be made in parallel
1002 * on the same endpoint.
1003 */
Chuck Lever282191c2014-07-29 17:25:55 -04001004void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001005rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
1006{
1007 int rc;
1008
Chuck Levera7bc2112014-07-29 17:23:52 -04001009 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001010 rc = rdma_disconnect(ia->ri_id);
1011 if (!rc) {
1012 /* returns without wait if not connected */
1013 wait_event_interruptible(ep->rep_connect_wait,
1014 ep->rep_connected != 1);
1015 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1016 (ep->rep_connected == 1) ? "still " : "dis");
1017 } else {
1018 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1019 ep->rep_connected = rc;
1020 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001021}
1022
Chuck Lever2e845222014-07-29 17:25:38 -04001023static int
1024rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1025{
1026 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1027 struct ib_fmr_attr fmr_attr = {
1028 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1029 .max_maps = 1,
1030 .page_shift = PAGE_SHIFT
1031 };
1032 struct rpcrdma_mw *r;
1033 int i, rc;
1034
1035 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1036 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1037
1038 while (i--) {
1039 r = kzalloc(sizeof(*r), GFP_KERNEL);
1040 if (r == NULL)
1041 return -ENOMEM;
1042
1043 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1044 if (IS_ERR(r->r.fmr)) {
1045 rc = PTR_ERR(r->r.fmr);
1046 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1047 __func__, rc);
1048 goto out_free;
1049 }
1050
1051 list_add(&r->mw_list, &buf->rb_mws);
1052 list_add(&r->mw_all, &buf->rb_all);
1053 }
1054 return 0;
1055
1056out_free:
1057 kfree(r);
1058 return rc;
1059}
1060
1061static int
1062rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1063{
1064 struct rpcrdma_frmr *f;
1065 struct rpcrdma_mw *r;
1066 int i, rc;
1067
1068 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1069 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1070
1071 while (i--) {
1072 r = kzalloc(sizeof(*r), GFP_KERNEL);
1073 if (r == NULL)
1074 return -ENOMEM;
1075 f = &r->r.frmr;
1076
1077 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1078 ia->ri_max_frmr_depth);
1079 if (IS_ERR(f->fr_mr)) {
1080 rc = PTR_ERR(f->fr_mr);
1081 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1082 "failed %i\n", __func__, rc);
1083 goto out_free;
1084 }
1085
1086 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1087 ia->ri_max_frmr_depth);
1088 if (IS_ERR(f->fr_pgl)) {
1089 rc = PTR_ERR(f->fr_pgl);
1090 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1091 "failed %i\n", __func__, rc);
1092
1093 ib_dereg_mr(f->fr_mr);
1094 goto out_free;
1095 }
1096
1097 list_add(&r->mw_list, &buf->rb_mws);
1098 list_add(&r->mw_all, &buf->rb_all);
1099 }
1100
1101 return 0;
1102
1103out_free:
1104 kfree(r);
1105 return rc;
1106}
1107
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001108int
1109rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1110 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1111{
1112 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001113 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001114 int i, rc;
1115
1116 buf->rb_max_requests = cdata->max_requests;
1117 spin_lock_init(&buf->rb_lock);
1118 atomic_set(&buf->rb_credits, 1);
1119
1120 /* Need to allocate:
1121 * 1. arrays for send and recv pointers
1122 * 2. arrays of struct rpcrdma_req to fill in pointers
1123 * 3. array of struct rpcrdma_rep for replies
1124 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001125 * Send/recv buffers in req/rep need to be registered
1126 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001127 len = buf->rb_max_requests *
1128 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1129 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001130
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001131 p = kzalloc(len, GFP_KERNEL);
1132 if (p == NULL) {
1133 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1134 __func__, len);
1135 rc = -ENOMEM;
1136 goto out;
1137 }
1138 buf->rb_pool = p; /* for freeing it later */
1139
1140 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1141 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1142 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1143 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1144
1145 /*
1146 * Register the zeroed pad buffer, if any.
1147 */
1148 if (cdata->padding) {
1149 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1150 &ep->rep_pad_mr, &ep->rep_pad);
1151 if (rc)
1152 goto out;
1153 }
1154 p += cdata->padding;
1155
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001156 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001157 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001158 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001159 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001160 rc = rpcrdma_init_frmrs(ia, buf);
1161 if (rc)
1162 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001163 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001164 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001165 rc = rpcrdma_init_fmrs(ia, buf);
1166 if (rc)
1167 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001168 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001169 default:
1170 break;
1171 }
1172
1173 /*
1174 * Allocate/init the request/reply buffers. Doing this
1175 * using kmalloc for now -- one for each buf.
1176 */
Chuck Lever65866f82014-05-28 10:33:59 -04001177 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1178 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1179 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1180 __func__, wlen, rlen);
1181
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001182 for (i = 0; i < buf->rb_max_requests; i++) {
1183 struct rpcrdma_req *req;
1184 struct rpcrdma_rep *rep;
1185
Chuck Lever65866f82014-05-28 10:33:59 -04001186 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001187 if (req == NULL) {
1188 dprintk("RPC: %s: request buffer %d alloc"
1189 " failed\n", __func__, i);
1190 rc = -ENOMEM;
1191 goto out;
1192 }
1193 memset(req, 0, sizeof(struct rpcrdma_req));
1194 buf->rb_send_bufs[i] = req;
1195 buf->rb_send_bufs[i]->rl_buffer = buf;
1196
1197 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001198 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001199 &buf->rb_send_bufs[i]->rl_handle,
1200 &buf->rb_send_bufs[i]->rl_iov);
1201 if (rc)
1202 goto out;
1203
Chuck Lever65866f82014-05-28 10:33:59 -04001204 buf->rb_send_bufs[i]->rl_size = wlen -
1205 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001206
Chuck Lever65866f82014-05-28 10:33:59 -04001207 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001208 if (rep == NULL) {
1209 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1210 __func__, i);
1211 rc = -ENOMEM;
1212 goto out;
1213 }
1214 memset(rep, 0, sizeof(struct rpcrdma_rep));
1215 buf->rb_recv_bufs[i] = rep;
1216 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001217
1218 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001219 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001220 &buf->rb_recv_bufs[i]->rr_handle,
1221 &buf->rb_recv_bufs[i]->rr_iov);
1222 if (rc)
1223 goto out;
1224
1225 }
1226 dprintk("RPC: %s: max_requests %d\n",
1227 __func__, buf->rb_max_requests);
1228 /* done */
1229 return 0;
1230out:
1231 rpcrdma_buffer_destroy(buf);
1232 return rc;
1233}
1234
Chuck Lever2e845222014-07-29 17:25:38 -04001235static void
1236rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1237{
1238 struct rpcrdma_mw *r;
1239 int rc;
1240
1241 while (!list_empty(&buf->rb_all)) {
1242 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1243 list_del(&r->mw_all);
1244 list_del(&r->mw_list);
1245
1246 rc = ib_dealloc_fmr(r->r.fmr);
1247 if (rc)
1248 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1249 __func__, rc);
1250
1251 kfree(r);
1252 }
1253}
1254
1255static void
1256rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1257{
1258 struct rpcrdma_mw *r;
1259 int rc;
1260
1261 while (!list_empty(&buf->rb_all)) {
1262 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1263 list_del(&r->mw_all);
1264 list_del(&r->mw_list);
1265
1266 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1267 if (rc)
1268 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1269 __func__, rc);
1270 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1271
1272 kfree(r);
1273 }
1274}
1275
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001276void
1277rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1278{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001279 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001280 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001281
1282 /* clean up in reverse order from create
1283 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001284 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001285 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001286 */
1287 dprintk("RPC: %s: entering\n", __func__);
1288
1289 for (i = 0; i < buf->rb_max_requests; i++) {
1290 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1291 rpcrdma_deregister_internal(ia,
1292 buf->rb_recv_bufs[i]->rr_handle,
1293 &buf->rb_recv_bufs[i]->rr_iov);
1294 kfree(buf->rb_recv_bufs[i]);
1295 }
1296 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001297 rpcrdma_deregister_internal(ia,
1298 buf->rb_send_bufs[i]->rl_handle,
1299 &buf->rb_send_bufs[i]->rl_iov);
1300 kfree(buf->rb_send_bufs[i]);
1301 }
1302 }
1303
Chuck Lever2e845222014-07-29 17:25:38 -04001304 switch (ia->ri_memreg_strategy) {
1305 case RPCRDMA_FRMR:
1306 rpcrdma_destroy_frmrs(buf);
1307 break;
1308 case RPCRDMA_MTHCAFMR:
1309 rpcrdma_destroy_fmrs(buf);
1310 break;
1311 default:
1312 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001313 }
1314
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001315 kfree(buf->rb_pool);
1316}
1317
Chuck Lever467c9672014-11-08 20:14:29 -05001318/* After a disconnect, unmap all FMRs.
1319 *
1320 * This is invoked only in the transport connect worker in order
1321 * to serialize with rpcrdma_register_fmr_external().
1322 */
1323static void
1324rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
1325{
1326 struct rpcrdma_xprt *r_xprt =
1327 container_of(ia, struct rpcrdma_xprt, rx_ia);
1328 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1329 struct list_head *pos;
1330 struct rpcrdma_mw *r;
1331 LIST_HEAD(l);
1332 int rc;
1333
1334 list_for_each(pos, &buf->rb_all) {
1335 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1336
1337 INIT_LIST_HEAD(&l);
1338 list_add(&r->r.fmr->list, &l);
1339 rc = ib_unmap_fmr(&l);
1340 if (rc)
1341 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1342 __func__, rc);
1343 }
1344}
1345
Chuck Lever9f9d8022014-07-29 17:24:45 -04001346/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1347 * an unusable state. Find FRMRs in this state and dereg / reg
1348 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1349 * also torn down.
1350 *
1351 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1352 *
1353 * This is invoked only in the transport connect worker in order
1354 * to serialize with rpcrdma_register_frmr_external().
1355 */
1356static void
1357rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1358{
1359 struct rpcrdma_xprt *r_xprt =
1360 container_of(ia, struct rpcrdma_xprt, rx_ia);
1361 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1362 struct list_head *pos;
1363 struct rpcrdma_mw *r;
1364 int rc;
1365
1366 list_for_each(pos, &buf->rb_all) {
1367 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1368
1369 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1370 continue;
1371
1372 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1373 if (rc)
1374 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1375 __func__, rc);
1376 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1377
1378 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1379 ia->ri_max_frmr_depth);
1380 if (IS_ERR(r->r.frmr.fr_mr)) {
1381 rc = PTR_ERR(r->r.frmr.fr_mr);
1382 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1383 " failed %i\n", __func__, rc);
1384 continue;
1385 }
1386 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1387 ia->ri_id->device,
1388 ia->ri_max_frmr_depth);
1389 if (IS_ERR(r->r.frmr.fr_pgl)) {
1390 rc = PTR_ERR(r->r.frmr.fr_pgl);
1391 dprintk("RPC: %s: "
1392 "ib_alloc_fast_reg_page_list "
1393 "failed %i\n", __func__, rc);
1394
1395 ib_dereg_mr(r->r.frmr.fr_mr);
1396 continue;
1397 }
1398 r->r.frmr.fr_state = FRMR_IS_INVALID;
1399 }
1400}
1401
Chuck Leverc2922c02014-07-29 17:24:36 -04001402/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1403 * some req segments uninitialized.
1404 */
1405static void
1406rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1407{
1408 if (*mw) {
1409 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1410 *mw = NULL;
1411 }
1412}
1413
1414/* Cycle mw's back in reverse order, and "spin" them.
1415 * This delays and scrambles reuse as much as possible.
1416 */
1417static void
1418rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1419{
1420 struct rpcrdma_mr_seg *seg = req->rl_segments;
1421 struct rpcrdma_mr_seg *seg1 = seg;
1422 int i;
1423
1424 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1425 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
1426 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
1427}
1428
1429static void
1430rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1431{
1432 buf->rb_send_bufs[--buf->rb_send_index] = req;
1433 req->rl_niovs = 0;
1434 if (req->rl_reply) {
1435 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1436 req->rl_reply->rr_func = NULL;
1437 req->rl_reply = NULL;
1438 }
1439}
1440
Chuck Leverddb6beb2014-07-29 17:24:54 -04001441/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1442 * Redo only the ib_post_send().
1443 */
1444static void
1445rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1446{
1447 struct rpcrdma_xprt *r_xprt =
1448 container_of(ia, struct rpcrdma_xprt, rx_ia);
1449 struct ib_send_wr invalidate_wr, *bad_wr;
1450 int rc;
1451
1452 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1453
1454 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001455 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001456
1457 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1458 invalidate_wr.wr_id = (unsigned long)(void *)r;
1459 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001460 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1461 DECR_CQCOUNT(&r_xprt->rx_ep);
1462
1463 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1464 __func__, r, r->r.frmr.fr_mr->rkey);
1465
1466 read_lock(&ia->ri_qplock);
1467 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1468 read_unlock(&ia->ri_qplock);
1469 if (rc) {
1470 /* Force rpcrdma_buffer_get() to retry */
1471 r->r.frmr.fr_state = FRMR_IS_STALE;
1472 dprintk("RPC: %s: ib_post_send failed, %i\n",
1473 __func__, rc);
1474 }
1475}
1476
1477static void
1478rpcrdma_retry_flushed_linv(struct list_head *stale,
1479 struct rpcrdma_buffer *buf)
1480{
1481 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1482 struct list_head *pos;
1483 struct rpcrdma_mw *r;
1484 unsigned long flags;
1485
1486 list_for_each(pos, stale) {
1487 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1488 rpcrdma_retry_local_inv(r, ia);
1489 }
1490
1491 spin_lock_irqsave(&buf->rb_lock, flags);
1492 list_splice_tail(stale, &buf->rb_mws);
1493 spin_unlock_irqrestore(&buf->rb_lock, flags);
1494}
1495
Chuck Leverc2922c02014-07-29 17:24:36 -04001496static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001497rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1498 struct list_head *stale)
1499{
1500 struct rpcrdma_mw *r;
1501 int i;
1502
1503 i = RPCRDMA_MAX_SEGS - 1;
1504 while (!list_empty(&buf->rb_mws)) {
1505 r = list_entry(buf->rb_mws.next,
1506 struct rpcrdma_mw, mw_list);
1507 list_del(&r->mw_list);
1508 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1509 list_add(&r->mw_list, stale);
1510 continue;
1511 }
1512 req->rl_segments[i].mr_chunk.rl_mw = r;
1513 if (unlikely(i-- == 0))
1514 return req; /* Success */
1515 }
1516
1517 /* Not enough entries on rb_mws for this req */
1518 rpcrdma_buffer_put_sendbuf(req, buf);
1519 rpcrdma_buffer_put_mrs(req, buf);
1520 return NULL;
1521}
1522
1523static struct rpcrdma_req *
1524rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001525{
1526 struct rpcrdma_mw *r;
1527 int i;
1528
1529 i = RPCRDMA_MAX_SEGS - 1;
1530 while (!list_empty(&buf->rb_mws)) {
1531 r = list_entry(buf->rb_mws.next,
1532 struct rpcrdma_mw, mw_list);
1533 list_del(&r->mw_list);
1534 req->rl_segments[i].mr_chunk.rl_mw = r;
1535 if (unlikely(i-- == 0))
1536 return req; /* Success */
1537 }
1538
1539 /* Not enough entries on rb_mws for this req */
1540 rpcrdma_buffer_put_sendbuf(req, buf);
1541 rpcrdma_buffer_put_mrs(req, buf);
1542 return NULL;
1543}
1544
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001545/*
1546 * Get a set of request/reply buffers.
1547 *
1548 * Reply buffer (if needed) is attached to send buffer upon return.
1549 * Rule:
1550 * rb_send_index and rb_recv_index MUST always be pointing to the
1551 * *next* available buffer (non-NULL). They are incremented after
1552 * removing buffers, and decremented *before* returning them.
1553 */
1554struct rpcrdma_req *
1555rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1556{
Chuck Leverc2922c02014-07-29 17:24:36 -04001557 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001558 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001559 struct rpcrdma_req *req;
1560 unsigned long flags;
1561
1562 spin_lock_irqsave(&buffers->rb_lock, flags);
1563 if (buffers->rb_send_index == buffers->rb_max_requests) {
1564 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1565 dprintk("RPC: %s: out of request buffers\n", __func__);
1566 return ((struct rpcrdma_req *)NULL);
1567 }
1568
1569 req = buffers->rb_send_bufs[buffers->rb_send_index];
1570 if (buffers->rb_send_index < buffers->rb_recv_index) {
1571 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1572 __func__,
1573 buffers->rb_recv_index - buffers->rb_send_index);
1574 req->rl_reply = NULL;
1575 } else {
1576 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1577 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1578 }
1579 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001580
1581 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001582 switch (ia->ri_memreg_strategy) {
1583 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001584 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1585 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001586 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001587 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001588 break;
1589 default:
1590 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001591 }
1592 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001593 if (!list_empty(&stale))
1594 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001595 return req;
1596}
1597
1598/*
1599 * Put request/reply buffers back into pool.
1600 * Pre-decrement counter/array index.
1601 */
1602void
1603rpcrdma_buffer_put(struct rpcrdma_req *req)
1604{
1605 struct rpcrdma_buffer *buffers = req->rl_buffer;
1606 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001607 unsigned long flags;
1608
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001609 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001610 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001611 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001612 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001613 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001614 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001615 break;
1616 default:
1617 break;
1618 }
1619 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1620}
1621
1622/*
1623 * Recover reply buffers from pool.
1624 * This happens when recovering from error conditions.
1625 * Post-increment counter/array index.
1626 */
1627void
1628rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1629{
1630 struct rpcrdma_buffer *buffers = req->rl_buffer;
1631 unsigned long flags;
1632
1633 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1634 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1635 spin_lock_irqsave(&buffers->rb_lock, flags);
1636 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1637 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1638 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1639 }
1640 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1641}
1642
1643/*
1644 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001645 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001646 */
1647void
1648rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1649{
1650 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1651 unsigned long flags;
1652
1653 rep->rr_func = NULL;
1654 spin_lock_irqsave(&buffers->rb_lock, flags);
1655 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1656 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1657}
1658
1659/*
1660 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1661 */
1662
1663int
1664rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1665 struct ib_mr **mrp, struct ib_sge *iov)
1666{
1667 struct ib_phys_buf ipb;
1668 struct ib_mr *mr;
1669 int rc;
1670
1671 /*
1672 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1673 */
1674 iov->addr = ib_dma_map_single(ia->ri_id->device,
1675 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001676 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1677 return -ENOMEM;
1678
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001679 iov->length = len;
1680
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001681 if (ia->ri_have_dma_lkey) {
1682 *mrp = NULL;
1683 iov->lkey = ia->ri_dma_lkey;
1684 return 0;
1685 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001686 *mrp = NULL;
1687 iov->lkey = ia->ri_bind_mem->lkey;
1688 return 0;
1689 }
1690
1691 ipb.addr = iov->addr;
1692 ipb.size = iov->length;
1693 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1694 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1695
1696 dprintk("RPC: %s: phys convert: 0x%llx "
1697 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001698 __func__, (unsigned long long)ipb.addr,
1699 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001700
1701 if (IS_ERR(mr)) {
1702 *mrp = NULL;
1703 rc = PTR_ERR(mr);
1704 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1705 } else {
1706 *mrp = mr;
1707 iov->lkey = mr->lkey;
1708 rc = 0;
1709 }
1710
1711 return rc;
1712}
1713
1714int
1715rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1716 struct ib_mr *mr, struct ib_sge *iov)
1717{
1718 int rc;
1719
1720 ib_dma_unmap_single(ia->ri_id->device,
1721 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1722
1723 if (NULL == mr)
1724 return 0;
1725
1726 rc = ib_dereg_mr(mr);
1727 if (rc)
1728 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1729 return rc;
1730}
1731
1732/*
1733 * Wrappers for chunk registration, shared by read/write chunk code.
1734 */
1735
1736static void
1737rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1738{
1739 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1740 seg->mr_dmalen = seg->mr_len;
1741 if (seg->mr_page)
1742 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1743 seg->mr_page, offset_in_page(seg->mr_offset),
1744 seg->mr_dmalen, seg->mr_dir);
1745 else
1746 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1747 seg->mr_offset,
1748 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001749 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1750 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1751 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001752 (unsigned long long)seg->mr_dma,
1753 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001754 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001755}
1756
1757static void
1758rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1759{
1760 if (seg->mr_page)
1761 ib_dma_unmap_page(ia->ri_id->device,
1762 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1763 else
1764 ib_dma_unmap_single(ia->ri_id->device,
1765 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1766}
1767
Tom Talpey8d4ba032008-10-09 14:59:49 -04001768static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001769rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1770 int *nsegs, int writing, struct rpcrdma_ia *ia,
1771 struct rpcrdma_xprt *r_xprt)
1772{
1773 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001774 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
1775 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1776 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001777 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001778 u8 key;
1779 int len, pageoff;
1780 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001781 int seg_len;
1782 u64 pa;
1783 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001784
1785 pageoff = offset_in_page(seg1->mr_offset);
1786 seg1->mr_offset -= pageoff; /* start of page */
1787 seg1->mr_len += pageoff;
1788 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001789 if (*nsegs > ia->ri_max_frmr_depth)
1790 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001791 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001792 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001793 pa = seg->mr_dma;
1794 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001795 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001796 pa += PAGE_SIZE;
1797 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001798 len += seg->mr_len;
1799 ++seg;
1800 ++i;
1801 /* Check for holes */
1802 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1803 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1804 break;
1805 }
1806 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001807 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001808
Chuck Lever05055722014-07-29 17:25:12 -04001809 frmr->fr_state = FRMR_IS_VALID;
1810
Chuck Leverf590e872014-07-29 17:25:29 -04001811 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1812 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1813 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1814 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1815 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1816 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1817 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1818 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1819 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001820 rc = -EIO;
1821 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001822 }
1823
1824 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001825 key = (u8)(mr->rkey & 0x000000FF);
1826 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001827
Chuck Leverf590e872014-07-29 17:25:29 -04001828 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001829 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1830 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001831 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001832 DECR_CQCOUNT(&r_xprt->rx_ep);
1833
Chuck Leverf590e872014-07-29 17:25:29 -04001834 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001835 if (rc) {
1836 dprintk("RPC: %s: failed ib_post_send for register,"
1837 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001838 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001839 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001840 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001841 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001842 seg1->mr_base = seg1->mr_dma + pageoff;
1843 seg1->mr_nsegs = i;
1844 seg1->mr_len = len;
1845 }
1846 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001847 return 0;
1848out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001849 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001850 while (i--)
1851 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001852 return rc;
1853}
1854
1855static int
1856rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1857 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1858{
1859 struct rpcrdma_mr_seg *seg1 = seg;
1860 struct ib_send_wr invalidate_wr, *bad_wr;
1861 int rc;
1862
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001863 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
1864
Tom Talpey3197d3092008-10-09 15:00:20 -04001865 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001866 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001867 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Talpey3197d3092008-10-09 15:00:20 -04001868 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1869 DECR_CQCOUNT(&r_xprt->rx_ep);
1870
Chuck Lever73806c82014-07-29 17:23:25 -04001871 read_lock(&ia->ri_qplock);
1872 while (seg1->mr_nsegs--)
1873 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001874 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001875 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001876 if (rc) {
1877 /* Force rpcrdma_buffer_get() to retry */
1878 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001879 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1880 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001881 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001882 return rc;
1883}
1884
1885static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001886rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1887 int *nsegs, int writing, struct rpcrdma_ia *ia)
1888{
1889 struct rpcrdma_mr_seg *seg1 = seg;
1890 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1891 int len, pageoff, i, rc;
1892
1893 pageoff = offset_in_page(seg1->mr_offset);
1894 seg1->mr_offset -= pageoff; /* start of page */
1895 seg1->mr_len += pageoff;
1896 len = -pageoff;
1897 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1898 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1899 for (i = 0; i < *nsegs;) {
1900 rpcrdma_map_one(ia, seg, writing);
1901 physaddrs[i] = seg->mr_dma;
1902 len += seg->mr_len;
1903 ++seg;
1904 ++i;
1905 /* Check for holes */
1906 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1907 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1908 break;
1909 }
1910 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1911 physaddrs, i, seg1->mr_dma);
1912 if (rc) {
1913 dprintk("RPC: %s: failed ib_map_phys_fmr "
1914 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1915 len, (unsigned long long)seg1->mr_dma,
1916 pageoff, i, rc);
1917 while (i--)
1918 rpcrdma_unmap_one(ia, --seg);
1919 } else {
1920 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1921 seg1->mr_base = seg1->mr_dma + pageoff;
1922 seg1->mr_nsegs = i;
1923 seg1->mr_len = len;
1924 }
1925 *nsegs = i;
1926 return rc;
1927}
1928
1929static int
1930rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1931 struct rpcrdma_ia *ia)
1932{
1933 struct rpcrdma_mr_seg *seg1 = seg;
1934 LIST_HEAD(l);
1935 int rc;
1936
1937 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1938 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001939 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001940 while (seg1->mr_nsegs--)
1941 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04001942 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001943 if (rc)
1944 dprintk("RPC: %s: failed ib_unmap_fmr,"
1945 " status %i\n", __func__, rc);
1946 return rc;
1947}
1948
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001949int
1950rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1951 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1952{
1953 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001954 int rc = 0;
1955
1956 switch (ia->ri_memreg_strategy) {
1957
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001958 case RPCRDMA_ALLPHYSICAL:
1959 rpcrdma_map_one(ia, seg, writing);
1960 seg->mr_rkey = ia->ri_bind_mem->rkey;
1961 seg->mr_base = seg->mr_dma;
1962 seg->mr_nsegs = 1;
1963 nsegs = 1;
1964 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001965
Tom Talpey3197d3092008-10-09 15:00:20 -04001966 /* Registration using frmr registration */
1967 case RPCRDMA_FRMR:
1968 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1969 break;
1970
Tom Talpey8d4ba032008-10-09 14:59:49 -04001971 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001972 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001973 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001974 break;
1975
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001976 default:
Chuck Lever92b98362014-11-08 20:14:12 -05001977 return -EIO;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001978 }
1979 if (rc)
Chuck Lever92b98362014-11-08 20:14:12 -05001980 return rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001981
1982 return nsegs;
1983}
1984
1985int
1986rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04001987 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001988{
1989 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001990 int nsegs = seg->mr_nsegs, rc;
1991
1992 switch (ia->ri_memreg_strategy) {
1993
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001994 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04001995 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001996 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04001997 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001998 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001999
Tom Talpey3197d3092008-10-09 15:00:20 -04002000 case RPCRDMA_FRMR:
2001 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
2002 break;
2003
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002004 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04002005 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002006 break;
2007
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002008 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002009 break;
2010 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002011 return nsegs;
2012}
2013
2014/*
2015 * Prepost any receive buffer, then post send.
2016 *
2017 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2018 */
2019int
2020rpcrdma_ep_post(struct rpcrdma_ia *ia,
2021 struct rpcrdma_ep *ep,
2022 struct rpcrdma_req *req)
2023{
2024 struct ib_send_wr send_wr, *send_wr_fail;
2025 struct rpcrdma_rep *rep = req->rl_reply;
2026 int rc;
2027
2028 if (rep) {
2029 rc = rpcrdma_ep_post_recv(ia, ep, rep);
2030 if (rc)
2031 goto out;
2032 req->rl_reply = NULL;
2033 }
2034
2035 send_wr.next = NULL;
2036 send_wr.wr_id = 0ULL; /* no send cookie */
2037 send_wr.sg_list = req->rl_send_iov;
2038 send_wr.num_sge = req->rl_niovs;
2039 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002040 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2041 ib_dma_sync_single_for_device(ia->ri_id->device,
2042 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2043 DMA_TO_DEVICE);
2044 ib_dma_sync_single_for_device(ia->ri_id->device,
2045 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2046 DMA_TO_DEVICE);
2047 ib_dma_sync_single_for_device(ia->ri_id->device,
2048 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2049 DMA_TO_DEVICE);
2050
2051 if (DECR_CQCOUNT(ep) > 0)
2052 send_wr.send_flags = 0;
2053 else { /* Provider must take a send completion every now and then */
2054 INIT_CQCOUNT(ep);
2055 send_wr.send_flags = IB_SEND_SIGNALED;
2056 }
2057
2058 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2059 if (rc)
2060 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2061 rc);
2062out:
2063 return rc;
2064}
2065
2066/*
2067 * (Re)post a receive buffer.
2068 */
2069int
2070rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2071 struct rpcrdma_ep *ep,
2072 struct rpcrdma_rep *rep)
2073{
2074 struct ib_recv_wr recv_wr, *recv_wr_fail;
2075 int rc;
2076
2077 recv_wr.next = NULL;
2078 recv_wr.wr_id = (u64) (unsigned long) rep;
2079 recv_wr.sg_list = &rep->rr_iov;
2080 recv_wr.num_sge = 1;
2081
2082 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2083 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2084
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002085 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2086
2087 if (rc)
2088 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2089 rc);
2090 return rc;
2091}
Chuck Lever43e95982014-07-29 17:23:34 -04002092
2093/* Physical mapping means one Read/Write list entry per-page.
2094 * All list entries must fit within an inline buffer
2095 *
2096 * NB: The server must return a Write list for NFS READ,
2097 * which has the same constraint. Factor in the inline
2098 * rsize as well.
2099 */
2100static size_t
2101rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2102{
2103 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2104 unsigned int inline_size, pages;
2105
2106 inline_size = min_t(unsigned int,
2107 cdata->inline_wsize, cdata->inline_rsize);
2108 inline_size -= RPCRDMA_HDRLEN_MIN;
2109 pages = inline_size / sizeof(struct rpcrdma_segment);
2110 return pages << PAGE_SHIFT;
2111}
2112
2113static size_t
2114rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2115{
2116 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2117}
2118
2119size_t
2120rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2121{
2122 size_t result;
2123
2124 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2125 case RPCRDMA_ALLPHYSICAL:
2126 result = rpcrdma_physical_max_payload(r_xprt);
2127 break;
2128 default:
2129 result = rpcrdma_mr_max_payload(r_xprt);
2130 }
2131 return result;
2132}