blob: edc951e1f1d9a043890ec183d4eed20616024d9f [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040051#include <linux/pci.h> /* for Tavor hack below */
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090052#include <linux/slab.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
64/*
65 * internal functions
66 */
67
68/*
69 * handle replies in tasklet context, using a single, global list
70 * rdma tasklet function -- just turn around and call the func
71 * for all replies on the list
72 */
73
74static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
75static LIST_HEAD(rpcrdma_tasklets_g);
76
77static void
78rpcrdma_run_tasklet(unsigned long data)
79{
80 struct rpcrdma_rep *rep;
81 void (*func)(struct rpcrdma_rep *);
82 unsigned long flags;
83
84 data = data;
85 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
86 while (!list_empty(&rpcrdma_tasklets_g)) {
87 rep = list_entry(rpcrdma_tasklets_g.next,
88 struct rpcrdma_rep, rr_list);
89 list_del(&rep->rr_list);
90 func = rep->rr_func;
91 rep->rr_func = NULL;
92 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
93
94 if (func)
95 func(rep);
96 else
97 rpcrdma_recv_buffer_put(rep);
98
99 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
100 }
101 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
102}
103
104static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
105
106static inline void
107rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
112 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
113 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
114 tasklet_schedule(&rpcrdma_tasklet_g);
115}
116
117static void
118rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
119{
120 struct rpcrdma_ep *ep = context;
121
122 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
123 __func__, event->event, event->device->name, context);
124 if (ep->rep_connected == 1) {
125 ep->rep_connected = -EIO;
126 ep->rep_func(ep);
127 wake_up_all(&ep->rep_connect_wait);
128 }
129}
130
131static void
132rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
133{
134 struct rpcrdma_ep *ep = context;
135
136 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
137 __func__, event->event, event->device->name, context);
138 if (ep->rep_connected == 1) {
139 ep->rep_connected = -EIO;
140 ep->rep_func(ep);
141 wake_up_all(&ep->rep_connect_wait);
142 }
143}
144
145static inline
146void rpcrdma_event_process(struct ib_wc *wc)
147{
Tom Tucker5c635e02011-02-09 19:45:34 +0000148 struct rpcrdma_mw *frmr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400149 struct rpcrdma_rep *rep =
150 (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
151
152 dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
153 __func__, rep, wc->status, wc->opcode, wc->byte_len);
154
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400155 if (!rep) /* send completion that we don't care about */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400156 return;
157
158 if (IB_WC_SUCCESS != wc->status) {
Tom Tucker5c635e02011-02-09 19:45:34 +0000159 dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
160 __func__, wc->opcode, wc->status);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400161 rep->rr_len = ~0U;
Tom Tucker5c635e02011-02-09 19:45:34 +0000162 if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
163 rpcrdma_schedule_tasklet(rep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400164 return;
165 }
166
167 switch (wc->opcode) {
Tom Tucker5c635e02011-02-09 19:45:34 +0000168 case IB_WC_FAST_REG_MR:
169 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
170 frmr->r.frmr.state = FRMR_IS_VALID;
171 break;
172 case IB_WC_LOCAL_INV:
173 frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
174 frmr->r.frmr.state = FRMR_IS_INVALID;
175 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400176 case IB_WC_RECV:
177 rep->rr_len = wc->byte_len;
178 ib_dma_sync_single_for_cpu(
179 rdmab_to_ia(rep->rr_buffer)->ri_id->device,
180 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
181 /* Keep (only) the most recent credits, after check validity */
182 if (rep->rr_len >= 16) {
183 struct rpcrdma_msg *p =
184 (struct rpcrdma_msg *) rep->rr_base;
185 unsigned int credits = ntohl(p->rm_credit);
186 if (credits == 0) {
187 dprintk("RPC: %s: server"
188 " dropped credits to 0!\n", __func__);
189 /* don't deadlock */
190 credits = 1;
191 } else if (credits > rep->rr_buffer->rb_max_requests) {
192 dprintk("RPC: %s: server"
193 " over-crediting: %d (%d)\n",
194 __func__, credits,
195 rep->rr_buffer->rb_max_requests);
196 credits = rep->rr_buffer->rb_max_requests;
197 }
198 atomic_set(&rep->rr_buffer->rb_credits, credits);
199 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400200 rpcrdma_schedule_tasklet(rep);
201 break;
202 default:
203 dprintk("RPC: %s: unexpected WC event %X\n",
204 __func__, wc->opcode);
205 break;
206 }
207}
208
209static inline int
210rpcrdma_cq_poll(struct ib_cq *cq)
211{
212 struct ib_wc wc;
213 int rc;
214
215 for (;;) {
216 rc = ib_poll_cq(cq, 1, &wc);
217 if (rc < 0) {
218 dprintk("RPC: %s: ib_poll_cq failed %i\n",
219 __func__, rc);
220 return rc;
221 }
222 if (rc == 0)
223 break;
224
225 rpcrdma_event_process(&wc);
226 }
227
228 return 0;
229}
230
231/*
232 * rpcrdma_cq_event_upcall
233 *
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400234 * This upcall handles recv and send events.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400235 * It is reentrant but processes single events in order to maintain
236 * ordering of receives to keep server credits.
237 *
238 * It is the responsibility of the scheduled tasklet to return
239 * recv buffers to the pool. NOTE: this affects synchronization of
240 * connection shutdown. That is, the structures required for
241 * the completion of the reply handler must remain intact until
242 * all memory has been reclaimed.
243 *
244 * Note that send events are suppressed and do not result in an upcall.
245 */
246static void
247rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
248{
249 int rc;
250
251 rc = rpcrdma_cq_poll(cq);
252 if (rc)
253 return;
254
255 rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
256 if (rc) {
257 dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
258 __func__, rc);
259 return;
260 }
261
262 rpcrdma_cq_poll(cq);
263}
264
265#ifdef RPC_DEBUG
266static const char * const conn[] = {
267 "address resolved",
268 "address error",
269 "route resolved",
270 "route error",
271 "connect request",
272 "connect response",
273 "connect error",
274 "unreachable",
275 "rejected",
276 "established",
277 "disconnected",
278 "device removal"
279};
280#endif
281
282static int
283rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
284{
285 struct rpcrdma_xprt *xprt = id->context;
286 struct rpcrdma_ia *ia = &xprt->rx_ia;
287 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800288#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400289 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800290#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400291 struct ib_qp_attr attr;
292 struct ib_qp_init_attr iattr;
293 int connstate = 0;
294
295 switch (event->event) {
296 case RDMA_CM_EVENT_ADDR_RESOLVED:
297 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400298 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400299 complete(&ia->ri_done);
300 break;
301 case RDMA_CM_EVENT_ADDR_ERROR:
302 ia->ri_async_rc = -EHOSTUNREACH;
303 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
304 __func__, ep);
305 complete(&ia->ri_done);
306 break;
307 case RDMA_CM_EVENT_ROUTE_ERROR:
308 ia->ri_async_rc = -ENETUNREACH;
309 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
310 __func__, ep);
311 complete(&ia->ri_done);
312 break;
313 case RDMA_CM_EVENT_ESTABLISHED:
314 connstate = 1;
315 ib_query_qp(ia->ri_id->qp, &attr,
316 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
317 &iattr);
318 dprintk("RPC: %s: %d responder resources"
319 " (%d initiator)\n",
320 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
321 goto connected;
322 case RDMA_CM_EVENT_CONNECT_ERROR:
323 connstate = -ENOTCONN;
324 goto connected;
325 case RDMA_CM_EVENT_UNREACHABLE:
326 connstate = -ENETDOWN;
327 goto connected;
328 case RDMA_CM_EVENT_REJECTED:
329 connstate = -ECONNREFUSED;
330 goto connected;
331 case RDMA_CM_EVENT_DISCONNECTED:
332 connstate = -ECONNABORTED;
333 goto connected;
334 case RDMA_CM_EVENT_DEVICE_REMOVAL:
335 connstate = -ENODEV;
336connected:
Harvey Harrison21454aa2008-10-31 00:54:56 -0700337 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400338 __func__,
339 (event->event <= 11) ? conn[event->event] :
340 "unknown connection error",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700341 &addr->sin_addr.s_addr,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400342 ntohs(addr->sin_port),
343 ep, event->event);
344 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
345 dprintk("RPC: %s: %sconnected\n",
346 __func__, connstate > 0 ? "" : "dis");
347 ep->rep_connected = connstate;
348 ep->rep_func(ep);
349 wake_up_all(&ep->rep_connect_wait);
350 break;
351 default:
Tom Talpey1a954052008-10-09 15:01:31 -0400352 dprintk("RPC: %s: unexpected CM event %d\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400353 __func__, event->event);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400354 break;
355 }
356
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400357#ifdef RPC_DEBUG
358 if (connstate == 1) {
359 int ird = attr.max_dest_rd_atomic;
360 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700361 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400362 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700363 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400364 ntohs(addr->sin_port),
365 ia->ri_id->device->name,
366 ia->ri_memreg_strategy,
367 xprt->rx_buf.rb_max_requests,
368 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
369 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700370 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
371 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400372 ntohs(addr->sin_port),
373 connstate);
374 }
375#endif
376
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400377 return 0;
378}
379
380static struct rdma_cm_id *
381rpcrdma_create_id(struct rpcrdma_xprt *xprt,
382 struct rpcrdma_ia *ia, struct sockaddr *addr)
383{
384 struct rdma_cm_id *id;
385 int rc;
386
Tom Talpey1a954052008-10-09 15:01:31 -0400387 init_completion(&ia->ri_done);
388
Sean Heftyb26f9b92010-04-01 17:08:41 +0000389 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400390 if (IS_ERR(id)) {
391 rc = PTR_ERR(id);
392 dprintk("RPC: %s: rdma_create_id() failed %i\n",
393 __func__, rc);
394 return id;
395 }
396
Tom Talpey5675add2008-10-09 15:01:41 -0400397 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400398 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
399 if (rc) {
400 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
401 __func__, rc);
402 goto out;
403 }
Tom Talpey5675add2008-10-09 15:01:41 -0400404 wait_for_completion_interruptible_timeout(&ia->ri_done,
405 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400406 rc = ia->ri_async_rc;
407 if (rc)
408 goto out;
409
Tom Talpey5675add2008-10-09 15:01:41 -0400410 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400411 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
412 if (rc) {
413 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
414 __func__, rc);
415 goto out;
416 }
Tom Talpey5675add2008-10-09 15:01:41 -0400417 wait_for_completion_interruptible_timeout(&ia->ri_done,
418 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400419 rc = ia->ri_async_rc;
420 if (rc)
421 goto out;
422
423 return id;
424
425out:
426 rdma_destroy_id(id);
427 return ERR_PTR(rc);
428}
429
430/*
431 * Drain any cq, prior to teardown.
432 */
433static void
434rpcrdma_clean_cq(struct ib_cq *cq)
435{
436 struct ib_wc wc;
437 int count = 0;
438
439 while (1 == ib_poll_cq(cq, 1, &wc))
440 ++count;
441
442 if (count)
443 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
444 __func__, count, wc.opcode);
445}
446
447/*
448 * Exported functions.
449 */
450
451/*
452 * Open and initialize an Interface Adapter.
453 * o initializes fields of struct rpcrdma_ia, including
454 * interface and provider attributes and protection zone.
455 */
456int
457rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
458{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400459 int rc, mem_priv;
460 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400461 struct rpcrdma_ia *ia = &xprt->rx_ia;
462
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400463 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
464 if (IS_ERR(ia->ri_id)) {
465 rc = PTR_ERR(ia->ri_id);
466 goto out1;
467 }
468
469 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
470 if (IS_ERR(ia->ri_pd)) {
471 rc = PTR_ERR(ia->ri_pd);
472 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
473 __func__, rc);
474 goto out2;
475 }
476
477 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400478 * Query the device to determine if the requested memory
479 * registration strategy is supported. If it isn't, set the
480 * strategy to a globally supported model.
481 */
482 rc = ib_query_device(ia->ri_id->device, &devattr);
483 if (rc) {
484 dprintk("RPC: %s: ib_query_device failed %d\n",
485 __func__, rc);
486 goto out2;
487 }
488
489 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
490 ia->ri_have_dma_lkey = 1;
491 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
492 }
493
Chuck Leverf10eafd2014-05-28 10:32:51 -0400494 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400495 /* Requires both frmr reg and local dma lkey */
496 if ((devattr.device_cap_flags &
497 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
498 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400499 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400500 "not supported by HCA\n", __func__);
501 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400502 } else {
503 /* Mind the ia limit on FRMR page list depth */
504 ia->ri_max_frmr_depth = min_t(unsigned int,
505 RPCRDMA_MAX_DATA_SEGS,
506 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400507 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400508 }
509 if (memreg == RPCRDMA_MTHCAFMR) {
510 if (!ia->ri_id->device->alloc_fmr) {
511 dprintk("RPC: %s: MTHCAFMR registration "
512 "not supported by HCA\n", __func__);
513#if RPCRDMA_PERSISTENT_REGISTRATION
514 memreg = RPCRDMA_ALLPHYSICAL;
515#else
Chuck Levercdd9ade2014-05-28 10:33:00 -0400516 rc = -ENOMEM;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400517 goto out2;
518#endif
519 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400520 }
521
522 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400523 * Optionally obtain an underlying physical identity mapping in
524 * order to do a memory window-based bind. This base registration
525 * is protected from remote access - that is enabled only by binding
526 * for the specific bytes targeted during each RPC operation, and
527 * revoked after the corresponding completion similar to a storage
528 * adapter.
529 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400530 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400531 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400532 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400533#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400534 case RPCRDMA_ALLPHYSICAL:
535 mem_priv = IB_ACCESS_LOCAL_WRITE |
536 IB_ACCESS_REMOTE_WRITE |
537 IB_ACCESS_REMOTE_READ;
538 goto register_setup;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400539#endif
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400540 case RPCRDMA_MTHCAFMR:
541 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400542 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400543 mem_priv = IB_ACCESS_LOCAL_WRITE;
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400544#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400545 register_setup:
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400546#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400547 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
548 if (IS_ERR(ia->ri_bind_mem)) {
549 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400550 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400551 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400552 rc = -ENOMEM;
553 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400554 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400555 break;
556 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400557 printk(KERN_ERR "RPC: Unsupported memory "
558 "registration mode: %d\n", memreg);
559 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400560 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400561 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400562 dprintk("RPC: %s: memory registration strategy is %d\n",
563 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400564
565 /* Else will do memory reg/dereg for each chunk */
566 ia->ri_memreg_strategy = memreg;
567
568 return 0;
569out2:
570 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400571 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400572out1:
573 return rc;
574}
575
576/*
577 * Clean up/close an IA.
578 * o if event handles and PD have been initialized, free them.
579 * o close the IA
580 */
581void
582rpcrdma_ia_close(struct rpcrdma_ia *ia)
583{
584 int rc;
585
586 dprintk("RPC: %s: entering\n", __func__);
587 if (ia->ri_bind_mem != NULL) {
588 rc = ib_dereg_mr(ia->ri_bind_mem);
589 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
590 __func__, rc);
591 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400592 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
593 if (ia->ri_id->qp)
594 rdma_destroy_qp(ia->ri_id);
595 rdma_destroy_id(ia->ri_id);
596 ia->ri_id = NULL;
597 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400598 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
599 rc = ib_dealloc_pd(ia->ri_pd);
600 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
601 __func__, rc);
602 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400603}
604
605/*
606 * Create unconnected endpoint.
607 */
608int
609rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
610 struct rpcrdma_create_data_internal *cdata)
611{
612 struct ib_device_attr devattr;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400613 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400614
615 rc = ib_query_device(ia->ri_id->device, &devattr);
616 if (rc) {
617 dprintk("RPC: %s: ib_query_device failed %d\n",
618 __func__, rc);
619 return rc;
620 }
621
622 /* check provider's send/recv wr limits */
623 if (cdata->max_requests > devattr.max_qp_wr)
624 cdata->max_requests = devattr.max_qp_wr;
625
626 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
627 ep->rep_attr.qp_context = ep;
628 /* send_cq and recv_cq initialized below */
629 ep->rep_attr.srq = NULL;
630 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
631 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400632 case RPCRDMA_FRMR: {
633 int depth = 7;
634
Tom Tucker15cdc6442010-08-11 12:47:24 -0400635 /* Add room for frmr register and invalidate WRs.
636 * 1. FRMR reg WR for head
637 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400638 * 3. N FRMR reg WRs for pagelist
639 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400640 * 5. FRMR reg WR for tail
641 * 6. FRMR invalidate WR for tail
642 * 7. The RDMA_SEND WR
643 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400644
645 /* Calculate N if the device max FRMR depth is smaller than
646 * RPCRDMA_MAX_DATA_SEGS.
647 */
648 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
649 int delta = RPCRDMA_MAX_DATA_SEGS -
650 ia->ri_max_frmr_depth;
651
652 do {
653 depth += 2; /* FRMR reg + invalidate */
654 delta -= ia->ri_max_frmr_depth;
655 } while (delta > 0);
656
657 }
658 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400659 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400660 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400661 if (!cdata->max_requests)
662 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400663 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
664 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400665 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400666 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400667 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400668 default:
669 break;
670 }
671 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
672 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
673 ep->rep_attr.cap.max_recv_sge = 1;
674 ep->rep_attr.cap.max_inline_data = 0;
675 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
676 ep->rep_attr.qp_type = IB_QPT_RC;
677 ep->rep_attr.port_num = ~0;
678
679 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
680 "iovs: send %d recv %d\n",
681 __func__,
682 ep->rep_attr.cap.max_send_wr,
683 ep->rep_attr.cap.max_recv_wr,
684 ep->rep_attr.cap.max_send_sge,
685 ep->rep_attr.cap.max_recv_sge);
686
687 /* set trigger for requesting send completion */
688 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400689 if (ep->rep_cqinit <= 2)
690 ep->rep_cqinit = 0;
691 INIT_CQCOUNT(ep);
692 ep->rep_ia = ia;
693 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400694 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400695
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400696 ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
697 rpcrdma_cq_async_error_upcall, NULL,
698 ep->rep_attr.cap.max_recv_wr +
699 ep->rep_attr.cap.max_send_wr + 1, 0);
700 if (IS_ERR(ep->rep_cq)) {
701 rc = PTR_ERR(ep->rep_cq);
702 dprintk("RPC: %s: ib_create_cq failed: %i\n",
703 __func__, rc);
704 goto out1;
705 }
706
707 rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
708 if (rc) {
709 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
710 __func__, rc);
711 goto out2;
712 }
713
714 ep->rep_attr.send_cq = ep->rep_cq;
715 ep->rep_attr.recv_cq = ep->rep_cq;
716
717 /* Initialize cma parameters */
718
719 /* RPC/RDMA does not use private data */
720 ep->rep_remote_cma.private_data = NULL;
721 ep->rep_remote_cma.private_data_len = 0;
722
723 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400724 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400725 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400726 ep->rep_remote_cma.responder_resources = 32;
727 else
728 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400729
730 ep->rep_remote_cma.retry_count = 7;
731 ep->rep_remote_cma.flow_control = 0;
732 ep->rep_remote_cma.rnr_retry_count = 0;
733
734 return 0;
735
736out2:
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400737 err = ib_destroy_cq(ep->rep_cq);
738 if (err)
739 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
740 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400741out1:
742 return rc;
743}
744
745/*
746 * rpcrdma_ep_destroy
747 *
748 * Disconnect and destroy endpoint. After this, the only
749 * valid operations on the ep are to free it (if dynamically
750 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400751 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400752void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400753rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
754{
755 int rc;
756
757 dprintk("RPC: %s: entering, connected is %d\n",
758 __func__, ep->rep_connected);
759
Chuck Lever254f91e2014-05-28 10:32:17 -0400760 cancel_delayed_work_sync(&ep->rep_connect_worker);
761
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400762 if (ia->ri_id->qp) {
763 rc = rpcrdma_ep_disconnect(ep, ia);
764 if (rc)
765 dprintk("RPC: %s: rpcrdma_ep_disconnect"
766 " returned %i\n", __func__, rc);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400767 rdma_destroy_qp(ia->ri_id);
768 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400769 }
770
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400771 /* padding - could be done in rpcrdma_buffer_destroy... */
772 if (ep->rep_pad_mr) {
773 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
774 ep->rep_pad_mr = NULL;
775 }
776
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400777 rpcrdma_clean_cq(ep->rep_cq);
778 rc = ib_destroy_cq(ep->rep_cq);
779 if (rc)
780 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
781 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400782}
783
784/*
785 * Connect unconnected endpoint.
786 */
787int
788rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
789{
790 struct rdma_cm_id *id;
791 int rc = 0;
792 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400793
Tom Talpeyc0555512008-10-10 11:32:45 -0400794 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400795 struct rpcrdma_xprt *xprt;
796retry:
797 rc = rpcrdma_ep_disconnect(ep, ia);
798 if (rc && rc != -ENOTCONN)
799 dprintk("RPC: %s: rpcrdma_ep_disconnect"
800 " status %i\n", __func__, rc);
801 rpcrdma_clean_cq(ep->rep_cq);
802
803 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
804 id = rpcrdma_create_id(xprt, ia,
805 (struct sockaddr *)&xprt->rx_data.addr);
806 if (IS_ERR(id)) {
807 rc = PTR_ERR(id);
808 goto out;
809 }
810 /* TEMP TEMP TEMP - fail if new device:
811 * Deregister/remarshal *all* requests!
812 * Close and recreate adapter, pd, etc!
813 * Re-determine all attributes still sane!
814 * More stuff I haven't thought of!
815 * Rrrgh!
816 */
817 if (ia->ri_id->device != id->device) {
818 printk("RPC: %s: can't reconnect on "
819 "different device!\n", __func__);
820 rdma_destroy_id(id);
821 rc = -ENETDOWN;
822 goto out;
823 }
824 /* END TEMP */
Tom Talpey1a954052008-10-09 15:01:31 -0400825 rdma_destroy_qp(ia->ri_id);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400826 rdma_destroy_id(ia->ri_id);
827 ia->ri_id = id;
828 }
829
830 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
831 if (rc) {
832 dprintk("RPC: %s: rdma_create_qp failed %i\n",
833 __func__, rc);
834 goto out;
835 }
836
837/* XXX Tavor device performs badly with 2K MTU! */
838if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
839 struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
840 if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
841 (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
842 pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
843 struct ib_qp_attr attr = {
844 .path_mtu = IB_MTU_1024
845 };
846 rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
847 }
848}
849
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400850 ep->rep_connected = 0;
851
852 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
853 if (rc) {
854 dprintk("RPC: %s: rdma_connect() failed with %i\n",
855 __func__, rc);
856 goto out;
857 }
858
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400859 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
860
861 /*
862 * Check state. A non-peer reject indicates no listener
863 * (ECONNREFUSED), which may be a transient state. All
864 * others indicate a transport condition which has already
865 * undergone a best-effort.
866 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800867 if (ep->rep_connected == -ECONNREFUSED &&
868 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400869 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
870 goto retry;
871 }
872 if (ep->rep_connected <= 0) {
873 /* Sometimes, the only way to reliably connect to remote
874 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400875 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
876 (ep->rep_remote_cma.responder_resources == 0 ||
877 ep->rep_remote_cma.initiator_depth !=
878 ep->rep_remote_cma.responder_resources)) {
879 if (ep->rep_remote_cma.responder_resources == 0)
880 ep->rep_remote_cma.responder_resources = 1;
881 ep->rep_remote_cma.initiator_depth =
882 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400883 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400884 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400885 rc = ep->rep_connected;
886 } else {
887 dprintk("RPC: %s: connected\n", __func__);
888 }
889
890out:
891 if (rc)
892 ep->rep_connected = rc;
893 return rc;
894}
895
896/*
897 * rpcrdma_ep_disconnect
898 *
899 * This is separate from destroy to facilitate the ability
900 * to reconnect without recreating the endpoint.
901 *
902 * This call is not reentrant, and must not be made in parallel
903 * on the same endpoint.
904 */
905int
906rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
907{
908 int rc;
909
910 rpcrdma_clean_cq(ep->rep_cq);
911 rc = rdma_disconnect(ia->ri_id);
912 if (!rc) {
913 /* returns without wait if not connected */
914 wait_event_interruptible(ep->rep_connect_wait,
915 ep->rep_connected != 1);
916 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
917 (ep->rep_connected == 1) ? "still " : "dis");
918 } else {
919 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
920 ep->rep_connected = rc;
921 }
922 return rc;
923}
924
925/*
926 * Initialize buffer memory
927 */
928int
929rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
930 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
931{
932 char *p;
933 size_t len;
934 int i, rc;
Tom Talpey8d4ba032008-10-09 14:59:49 -0400935 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400936
937 buf->rb_max_requests = cdata->max_requests;
938 spin_lock_init(&buf->rb_lock);
939 atomic_set(&buf->rb_credits, 1);
940
941 /* Need to allocate:
942 * 1. arrays for send and recv pointers
943 * 2. arrays of struct rpcrdma_req to fill in pointers
944 * 3. array of struct rpcrdma_rep for replies
945 * 4. padding, if any
Tom Talpey3197d3092008-10-09 15:00:20 -0400946 * 5. mw's, fmr's or frmr's, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400947 * Send/recv buffers in req/rep need to be registered
948 */
949
950 len = buf->rb_max_requests *
951 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
952 len += cdata->padding;
953 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400954 case RPCRDMA_FRMR:
955 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
956 sizeof(struct rpcrdma_mw);
957 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400958 case RPCRDMA_MTHCAFMR:
959 /* TBD we are perhaps overallocating here */
960 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
961 sizeof(struct rpcrdma_mw);
962 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400963 default:
964 break;
965 }
966
967 /* allocate 1, 4 and 5 in one shot */
968 p = kzalloc(len, GFP_KERNEL);
969 if (p == NULL) {
970 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
971 __func__, len);
972 rc = -ENOMEM;
973 goto out;
974 }
975 buf->rb_pool = p; /* for freeing it later */
976
977 buf->rb_send_bufs = (struct rpcrdma_req **) p;
978 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
979 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
980 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
981
982 /*
983 * Register the zeroed pad buffer, if any.
984 */
985 if (cdata->padding) {
986 rc = rpcrdma_register_internal(ia, p, cdata->padding,
987 &ep->rep_pad_mr, &ep->rep_pad);
988 if (rc)
989 goto out;
990 }
991 p += cdata->padding;
992
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400993 INIT_LIST_HEAD(&buf->rb_mws);
Tom Talpey8d4ba032008-10-09 14:59:49 -0400994 r = (struct rpcrdma_mw *)p;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400995 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400996 case RPCRDMA_FRMR:
997 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
998 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400999 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001000 if (IS_ERR(r->r.frmr.fr_mr)) {
1001 rc = PTR_ERR(r->r.frmr.fr_mr);
1002 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1003 " failed %i\n", __func__, rc);
1004 goto out;
1005 }
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001006 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1007 ia->ri_id->device,
1008 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001009 if (IS_ERR(r->r.frmr.fr_pgl)) {
1010 rc = PTR_ERR(r->r.frmr.fr_pgl);
1011 dprintk("RPC: %s: "
1012 "ib_alloc_fast_reg_page_list "
1013 "failed %i\n", __func__, rc);
Allen Andrews4034ba02014-05-28 10:32:09 -04001014
1015 ib_dereg_mr(r->r.frmr.fr_mr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001016 goto out;
1017 }
1018 list_add(&r->mw_list, &buf->rb_mws);
1019 ++r;
1020 }
1021 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001022 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001023 /* TBD we are perhaps overallocating here */
1024 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001025 static struct ib_fmr_attr fa =
1026 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001027 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1028 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1029 &fa);
1030 if (IS_ERR(r->r.fmr)) {
1031 rc = PTR_ERR(r->r.fmr);
1032 dprintk("RPC: %s: ib_alloc_fmr"
1033 " failed %i\n", __func__, rc);
1034 goto out;
1035 }
1036 list_add(&r->mw_list, &buf->rb_mws);
1037 ++r;
1038 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001039 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001040 default:
1041 break;
1042 }
1043
1044 /*
1045 * Allocate/init the request/reply buffers. Doing this
1046 * using kmalloc for now -- one for each buf.
1047 */
1048 for (i = 0; i < buf->rb_max_requests; i++) {
1049 struct rpcrdma_req *req;
1050 struct rpcrdma_rep *rep;
1051
1052 len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
1053 /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
1054 /* Typical ~2400b, so rounding up saves work later */
1055 if (len < 4096)
1056 len = 4096;
1057 req = kmalloc(len, GFP_KERNEL);
1058 if (req == NULL) {
1059 dprintk("RPC: %s: request buffer %d alloc"
1060 " failed\n", __func__, i);
1061 rc = -ENOMEM;
1062 goto out;
1063 }
1064 memset(req, 0, sizeof(struct rpcrdma_req));
1065 buf->rb_send_bufs[i] = req;
1066 buf->rb_send_bufs[i]->rl_buffer = buf;
1067
1068 rc = rpcrdma_register_internal(ia, req->rl_base,
1069 len - offsetof(struct rpcrdma_req, rl_base),
1070 &buf->rb_send_bufs[i]->rl_handle,
1071 &buf->rb_send_bufs[i]->rl_iov);
1072 if (rc)
1073 goto out;
1074
1075 buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
1076
1077 len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
1078 rep = kmalloc(len, GFP_KERNEL);
1079 if (rep == NULL) {
1080 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1081 __func__, i);
1082 rc = -ENOMEM;
1083 goto out;
1084 }
1085 memset(rep, 0, sizeof(struct rpcrdma_rep));
1086 buf->rb_recv_bufs[i] = rep;
1087 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001088
1089 rc = rpcrdma_register_internal(ia, rep->rr_base,
1090 len - offsetof(struct rpcrdma_rep, rr_base),
1091 &buf->rb_recv_bufs[i]->rr_handle,
1092 &buf->rb_recv_bufs[i]->rr_iov);
1093 if (rc)
1094 goto out;
1095
1096 }
1097 dprintk("RPC: %s: max_requests %d\n",
1098 __func__, buf->rb_max_requests);
1099 /* done */
1100 return 0;
1101out:
1102 rpcrdma_buffer_destroy(buf);
1103 return rc;
1104}
1105
1106/*
1107 * Unregister and destroy buffer memory. Need to deal with
1108 * partial initialization, so it's callable from failed create.
1109 * Must be called before destroying endpoint, as registrations
1110 * reference it.
1111 */
1112void
1113rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1114{
1115 int rc, i;
1116 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001117 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001118
1119 /* clean up in reverse order from create
1120 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001121 * 2. send mr memory (mr free, then kfree)
1122 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1123 * 4. arrays
1124 */
1125 dprintk("RPC: %s: entering\n", __func__);
1126
1127 for (i = 0; i < buf->rb_max_requests; i++) {
1128 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1129 rpcrdma_deregister_internal(ia,
1130 buf->rb_recv_bufs[i]->rr_handle,
1131 &buf->rb_recv_bufs[i]->rr_iov);
1132 kfree(buf->rb_recv_bufs[i]);
1133 }
1134 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001135 rpcrdma_deregister_internal(ia,
1136 buf->rb_send_bufs[i]->rl_handle,
1137 &buf->rb_send_bufs[i]->rl_iov);
1138 kfree(buf->rb_send_bufs[i]);
1139 }
1140 }
1141
Allen Andrews4034ba02014-05-28 10:32:09 -04001142 while (!list_empty(&buf->rb_mws)) {
1143 r = list_entry(buf->rb_mws.next,
1144 struct rpcrdma_mw, mw_list);
1145 list_del(&r->mw_list);
1146 switch (ia->ri_memreg_strategy) {
1147 case RPCRDMA_FRMR:
1148 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1149 if (rc)
1150 dprintk("RPC: %s:"
1151 " ib_dereg_mr"
1152 " failed %i\n",
1153 __func__, rc);
1154 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1155 break;
1156 case RPCRDMA_MTHCAFMR:
1157 rc = ib_dealloc_fmr(r->r.fmr);
1158 if (rc)
1159 dprintk("RPC: %s:"
1160 " ib_dealloc_fmr"
1161 " failed %i\n",
1162 __func__, rc);
1163 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001164 default:
1165 break;
1166 }
1167 }
1168
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001169 kfree(buf->rb_pool);
1170}
1171
1172/*
1173 * Get a set of request/reply buffers.
1174 *
1175 * Reply buffer (if needed) is attached to send buffer upon return.
1176 * Rule:
1177 * rb_send_index and rb_recv_index MUST always be pointing to the
1178 * *next* available buffer (non-NULL). They are incremented after
1179 * removing buffers, and decremented *before* returning them.
1180 */
1181struct rpcrdma_req *
1182rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1183{
1184 struct rpcrdma_req *req;
1185 unsigned long flags;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001186 int i;
1187 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001188
1189 spin_lock_irqsave(&buffers->rb_lock, flags);
1190 if (buffers->rb_send_index == buffers->rb_max_requests) {
1191 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1192 dprintk("RPC: %s: out of request buffers\n", __func__);
1193 return ((struct rpcrdma_req *)NULL);
1194 }
1195
1196 req = buffers->rb_send_bufs[buffers->rb_send_index];
1197 if (buffers->rb_send_index < buffers->rb_recv_index) {
1198 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1199 __func__,
1200 buffers->rb_recv_index - buffers->rb_send_index);
1201 req->rl_reply = NULL;
1202 } else {
1203 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1204 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1205 }
1206 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1207 if (!list_empty(&buffers->rb_mws)) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001208 i = RPCRDMA_MAX_SEGS - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001209 do {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001210 r = list_entry(buffers->rb_mws.next,
1211 struct rpcrdma_mw, mw_list);
1212 list_del(&r->mw_list);
1213 req->rl_segments[i].mr_chunk.rl_mw = r;
1214 } while (--i >= 0);
1215 }
1216 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1217 return req;
1218}
1219
1220/*
1221 * Put request/reply buffers back into pool.
1222 * Pre-decrement counter/array index.
1223 */
1224void
1225rpcrdma_buffer_put(struct rpcrdma_req *req)
1226{
1227 struct rpcrdma_buffer *buffers = req->rl_buffer;
1228 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1229 int i;
1230 unsigned long flags;
1231
1232 BUG_ON(req->rl_nchunks != 0);
1233 spin_lock_irqsave(&buffers->rb_lock, flags);
1234 buffers->rb_send_bufs[--buffers->rb_send_index] = req;
1235 req->rl_niovs = 0;
1236 if (req->rl_reply) {
1237 buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001238 req->rl_reply->rr_func = NULL;
1239 req->rl_reply = NULL;
1240 }
1241 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001242 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001243 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001244 /*
1245 * Cycle mw's back in reverse order, and "spin" them.
1246 * This delays and scrambles reuse as much as possible.
1247 */
1248 i = 1;
1249 do {
1250 struct rpcrdma_mw **mw;
1251 mw = &req->rl_segments[i].mr_chunk.rl_mw;
1252 list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
1253 *mw = NULL;
1254 } while (++i < RPCRDMA_MAX_SEGS);
1255 list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
1256 &buffers->rb_mws);
1257 req->rl_segments[0].mr_chunk.rl_mw = NULL;
1258 break;
1259 default:
1260 break;
1261 }
1262 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1263}
1264
1265/*
1266 * Recover reply buffers from pool.
1267 * This happens when recovering from error conditions.
1268 * Post-increment counter/array index.
1269 */
1270void
1271rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1272{
1273 struct rpcrdma_buffer *buffers = req->rl_buffer;
1274 unsigned long flags;
1275
1276 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1277 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1278 spin_lock_irqsave(&buffers->rb_lock, flags);
1279 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1280 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1281 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1282 }
1283 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1284}
1285
1286/*
1287 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001288 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001289 */
1290void
1291rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1292{
1293 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1294 unsigned long flags;
1295
1296 rep->rr_func = NULL;
1297 spin_lock_irqsave(&buffers->rb_lock, flags);
1298 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1299 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1300}
1301
1302/*
1303 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1304 */
1305
1306int
1307rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1308 struct ib_mr **mrp, struct ib_sge *iov)
1309{
1310 struct ib_phys_buf ipb;
1311 struct ib_mr *mr;
1312 int rc;
1313
1314 /*
1315 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1316 */
1317 iov->addr = ib_dma_map_single(ia->ri_id->device,
1318 va, len, DMA_BIDIRECTIONAL);
1319 iov->length = len;
1320
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001321 if (ia->ri_have_dma_lkey) {
1322 *mrp = NULL;
1323 iov->lkey = ia->ri_dma_lkey;
1324 return 0;
1325 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001326 *mrp = NULL;
1327 iov->lkey = ia->ri_bind_mem->lkey;
1328 return 0;
1329 }
1330
1331 ipb.addr = iov->addr;
1332 ipb.size = iov->length;
1333 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1334 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1335
1336 dprintk("RPC: %s: phys convert: 0x%llx "
1337 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001338 __func__, (unsigned long long)ipb.addr,
1339 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001340
1341 if (IS_ERR(mr)) {
1342 *mrp = NULL;
1343 rc = PTR_ERR(mr);
1344 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1345 } else {
1346 *mrp = mr;
1347 iov->lkey = mr->lkey;
1348 rc = 0;
1349 }
1350
1351 return rc;
1352}
1353
1354int
1355rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1356 struct ib_mr *mr, struct ib_sge *iov)
1357{
1358 int rc;
1359
1360 ib_dma_unmap_single(ia->ri_id->device,
1361 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1362
1363 if (NULL == mr)
1364 return 0;
1365
1366 rc = ib_dereg_mr(mr);
1367 if (rc)
1368 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1369 return rc;
1370}
1371
1372/*
1373 * Wrappers for chunk registration, shared by read/write chunk code.
1374 */
1375
1376static void
1377rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1378{
1379 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1380 seg->mr_dmalen = seg->mr_len;
1381 if (seg->mr_page)
1382 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1383 seg->mr_page, offset_in_page(seg->mr_offset),
1384 seg->mr_dmalen, seg->mr_dir);
1385 else
1386 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1387 seg->mr_offset,
1388 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001389 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1390 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1391 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001392 (unsigned long long)seg->mr_dma,
1393 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001394 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001395}
1396
1397static void
1398rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1399{
1400 if (seg->mr_page)
1401 ib_dma_unmap_page(ia->ri_id->device,
1402 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1403 else
1404 ib_dma_unmap_single(ia->ri_id->device,
1405 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1406}
1407
Tom Talpey8d4ba032008-10-09 14:59:49 -04001408static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001409rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1410 int *nsegs, int writing, struct rpcrdma_ia *ia,
1411 struct rpcrdma_xprt *r_xprt)
1412{
1413 struct rpcrdma_mr_seg *seg1 = seg;
Tom Tucker5c635e02011-02-09 19:45:34 +00001414 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1415
Tom Talpey3197d3092008-10-09 15:00:20 -04001416 u8 key;
1417 int len, pageoff;
1418 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001419 int seg_len;
1420 u64 pa;
1421 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001422
1423 pageoff = offset_in_page(seg1->mr_offset);
1424 seg1->mr_offset -= pageoff; /* start of page */
1425 seg1->mr_len += pageoff;
1426 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001427 if (*nsegs > ia->ri_max_frmr_depth)
1428 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001429 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001430 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001431 pa = seg->mr_dma;
1432 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
1433 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
1434 page_list[page_no++] = pa;
1435 pa += PAGE_SIZE;
1436 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001437 len += seg->mr_len;
1438 ++seg;
1439 ++i;
1440 /* Check for holes */
1441 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1442 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1443 break;
1444 }
1445 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1446 __func__, seg1->mr_chunk.rl_mw, i);
1447
Tom Tucker5c635e02011-02-09 19:45:34 +00001448 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1449 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1450 __func__,
1451 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1452 /* Invalidate before using. */
1453 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1454 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1455 invalidate_wr.next = &frmr_wr;
1456 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1457 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1458 invalidate_wr.ex.invalidate_rkey =
1459 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1460 DECR_CQCOUNT(&r_xprt->rx_ep);
1461 post_wr = &invalidate_wr;
1462 } else
1463 post_wr = &frmr_wr;
1464
Tom Talpey3197d3092008-10-09 15:00:20 -04001465 /* Bump the key */
1466 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1467 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1468
1469 /* Prepare FRMR WR */
1470 memset(&frmr_wr, 0, sizeof frmr_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001471 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001472 frmr_wr.opcode = IB_WR_FAST_REG_MR;
Tom Tucker5c635e02011-02-09 19:45:34 +00001473 frmr_wr.send_flags = IB_SEND_SIGNALED;
Steve Wise7a8b80eb2010-08-11 12:47:08 -04001474 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
Tom Talpey3197d3092008-10-09 15:00:20 -04001475 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
Tom Tucker9b781452012-02-20 13:07:57 -06001476 frmr_wr.wr.fast_reg.page_list_len = page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001477 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
Tom Tucker9b781452012-02-20 13:07:57 -06001478 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
Tom Tucker5c635e02011-02-09 19:45:34 +00001479 BUG_ON(frmr_wr.wr.fast_reg.length < len);
Tom Talpey3197d3092008-10-09 15:00:20 -04001480 frmr_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001481 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1482 IB_ACCESS_REMOTE_READ);
Tom Talpey3197d3092008-10-09 15:00:20 -04001483 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1484 DECR_CQCOUNT(&r_xprt->rx_ep);
1485
Tom Tucker5c635e02011-02-09 19:45:34 +00001486 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001487
1488 if (rc) {
1489 dprintk("RPC: %s: failed ib_post_send for register,"
1490 " status %i\n", __func__, rc);
1491 while (i--)
1492 rpcrdma_unmap_one(ia, --seg);
1493 } else {
1494 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1495 seg1->mr_base = seg1->mr_dma + pageoff;
1496 seg1->mr_nsegs = i;
1497 seg1->mr_len = len;
1498 }
1499 *nsegs = i;
1500 return rc;
1501}
1502
1503static int
1504rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1505 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1506{
1507 struct rpcrdma_mr_seg *seg1 = seg;
1508 struct ib_send_wr invalidate_wr, *bad_wr;
1509 int rc;
1510
1511 while (seg1->mr_nsegs--)
1512 rpcrdma_unmap_one(ia, seg++);
1513
1514 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001515 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001516 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Tucker5c635e02011-02-09 19:45:34 +00001517 invalidate_wr.send_flags = IB_SEND_SIGNALED;
Tom Talpey3197d3092008-10-09 15:00:20 -04001518 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1519 DECR_CQCOUNT(&r_xprt->rx_ep);
1520
1521 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1522 if (rc)
1523 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1524 " status %i\n", __func__, rc);
1525 return rc;
1526}
1527
1528static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001529rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1530 int *nsegs, int writing, struct rpcrdma_ia *ia)
1531{
1532 struct rpcrdma_mr_seg *seg1 = seg;
1533 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1534 int len, pageoff, i, rc;
1535
1536 pageoff = offset_in_page(seg1->mr_offset);
1537 seg1->mr_offset -= pageoff; /* start of page */
1538 seg1->mr_len += pageoff;
1539 len = -pageoff;
1540 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1541 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1542 for (i = 0; i < *nsegs;) {
1543 rpcrdma_map_one(ia, seg, writing);
1544 physaddrs[i] = seg->mr_dma;
1545 len += seg->mr_len;
1546 ++seg;
1547 ++i;
1548 /* Check for holes */
1549 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1550 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1551 break;
1552 }
1553 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1554 physaddrs, i, seg1->mr_dma);
1555 if (rc) {
1556 dprintk("RPC: %s: failed ib_map_phys_fmr "
1557 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1558 len, (unsigned long long)seg1->mr_dma,
1559 pageoff, i, rc);
1560 while (i--)
1561 rpcrdma_unmap_one(ia, --seg);
1562 } else {
1563 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1564 seg1->mr_base = seg1->mr_dma + pageoff;
1565 seg1->mr_nsegs = i;
1566 seg1->mr_len = len;
1567 }
1568 *nsegs = i;
1569 return rc;
1570}
1571
1572static int
1573rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1574 struct rpcrdma_ia *ia)
1575{
1576 struct rpcrdma_mr_seg *seg1 = seg;
1577 LIST_HEAD(l);
1578 int rc;
1579
1580 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1581 rc = ib_unmap_fmr(&l);
1582 while (seg1->mr_nsegs--)
1583 rpcrdma_unmap_one(ia, seg++);
1584 if (rc)
1585 dprintk("RPC: %s: failed ib_unmap_fmr,"
1586 " status %i\n", __func__, rc);
1587 return rc;
1588}
1589
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001590int
1591rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1592 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1593{
1594 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001595 int rc = 0;
1596
1597 switch (ia->ri_memreg_strategy) {
1598
1599#if RPCRDMA_PERSISTENT_REGISTRATION
1600 case RPCRDMA_ALLPHYSICAL:
1601 rpcrdma_map_one(ia, seg, writing);
1602 seg->mr_rkey = ia->ri_bind_mem->rkey;
1603 seg->mr_base = seg->mr_dma;
1604 seg->mr_nsegs = 1;
1605 nsegs = 1;
1606 break;
1607#endif
1608
Tom Talpey3197d3092008-10-09 15:00:20 -04001609 /* Registration using frmr registration */
1610 case RPCRDMA_FRMR:
1611 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1612 break;
1613
Tom Talpey8d4ba032008-10-09 14:59:49 -04001614 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001615 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001616 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001617 break;
1618
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001619 default:
Chuck Lever0ac531c2014-05-28 10:32:43 -04001620 return -1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001621 }
1622 if (rc)
1623 return -1;
1624
1625 return nsegs;
1626}
1627
1628int
1629rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04001630 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001631{
1632 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001633 int nsegs = seg->mr_nsegs, rc;
1634
1635 switch (ia->ri_memreg_strategy) {
1636
1637#if RPCRDMA_PERSISTENT_REGISTRATION
1638 case RPCRDMA_ALLPHYSICAL:
1639 BUG_ON(nsegs != 1);
1640 rpcrdma_unmap_one(ia, seg);
1641 rc = 0;
1642 break;
1643#endif
1644
Tom Talpey3197d3092008-10-09 15:00:20 -04001645 case RPCRDMA_FRMR:
1646 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1647 break;
1648
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001649 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001650 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001651 break;
1652
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001653 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001654 break;
1655 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001656 return nsegs;
1657}
1658
1659/*
1660 * Prepost any receive buffer, then post send.
1661 *
1662 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1663 */
1664int
1665rpcrdma_ep_post(struct rpcrdma_ia *ia,
1666 struct rpcrdma_ep *ep,
1667 struct rpcrdma_req *req)
1668{
1669 struct ib_send_wr send_wr, *send_wr_fail;
1670 struct rpcrdma_rep *rep = req->rl_reply;
1671 int rc;
1672
1673 if (rep) {
1674 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1675 if (rc)
1676 goto out;
1677 req->rl_reply = NULL;
1678 }
1679
1680 send_wr.next = NULL;
1681 send_wr.wr_id = 0ULL; /* no send cookie */
1682 send_wr.sg_list = req->rl_send_iov;
1683 send_wr.num_sge = req->rl_niovs;
1684 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001685 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1686 ib_dma_sync_single_for_device(ia->ri_id->device,
1687 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1688 DMA_TO_DEVICE);
1689 ib_dma_sync_single_for_device(ia->ri_id->device,
1690 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1691 DMA_TO_DEVICE);
1692 ib_dma_sync_single_for_device(ia->ri_id->device,
1693 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1694 DMA_TO_DEVICE);
1695
1696 if (DECR_CQCOUNT(ep) > 0)
1697 send_wr.send_flags = 0;
1698 else { /* Provider must take a send completion every now and then */
1699 INIT_CQCOUNT(ep);
1700 send_wr.send_flags = IB_SEND_SIGNALED;
1701 }
1702
1703 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1704 if (rc)
1705 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1706 rc);
1707out:
1708 return rc;
1709}
1710
1711/*
1712 * (Re)post a receive buffer.
1713 */
1714int
1715rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1716 struct rpcrdma_ep *ep,
1717 struct rpcrdma_rep *rep)
1718{
1719 struct ib_recv_wr recv_wr, *recv_wr_fail;
1720 int rc;
1721
1722 recv_wr.next = NULL;
1723 recv_wr.wr_id = (u64) (unsigned long) rep;
1724 recv_wr.sg_list = &rep->rr_iov;
1725 recv_wr.num_sge = 1;
1726
1727 ib_dma_sync_single_for_cpu(ia->ri_id->device,
1728 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
1729
1730 DECR_CQCOUNT(ep);
1731 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1732
1733 if (rc)
1734 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1735 rc);
1736 return rc;
1737}