blob: 3e8b3881548d9e70d683524fc7a53c81143382de [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
Chuck Lever9f9d8022014-07-29 17:24:45 -040064static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
65
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040066/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95
96 if (func)
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 }
103 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
104}
105
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107
108static inline void
109rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
110{
111 unsigned long flags;
112
113 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
114 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
115 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
116 tasklet_schedule(&rpcrdma_tasklet_g);
117}
118
119static void
120rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
121{
122 struct rpcrdma_ep *ep = context;
123
124 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
125 __func__, event->event, event->device->name, context);
126 if (ep->rep_connected == 1) {
127 ep->rep_connected = -EIO;
128 ep->rep_func(ep);
129 wake_up_all(&ep->rep_connect_wait);
130 }
131}
132
133static void
134rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
135{
136 struct rpcrdma_ep *ep = context;
137
138 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
139 __func__, event->event, event->device->name, context);
140 if (ep->rep_connected == 1) {
141 ep->rep_connected = -EIO;
142 ep->rep_func(ep);
143 wake_up_all(&ep->rep_connect_wait);
144 }
145}
146
Chuck Leverfc664482014-05-28 10:33:25 -0400147static void
148rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400149{
Chuck Leverfc664482014-05-28 10:33:25 -0400150 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400151
Chuck Leverfc664482014-05-28 10:33:25 -0400152 dprintk("RPC: %s: frmr %p status %X opcode %d\n",
153 __func__, frmr, wc->status, wc->opcode);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400154
Chuck Leverfc664482014-05-28 10:33:25 -0400155 if (wc->wr_id == 0ULL)
156 return;
Chuck Lever9f9d8022014-07-29 17:24:45 -0400157 if (wc->status != IB_WC_SUCCESS) {
158 frmr->r.frmr.fr_state = FRMR_IS_STALE;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400159 return;
Chuck Lever9f9d8022014-07-29 17:24:45 -0400160 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400161
Chuck Lever05055722014-07-29 17:25:12 -0400162 frmr->r.frmr.fr_state = FRMR_IS_INVALID;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400163}
164
Chuck Leverfc664482014-05-28 10:33:25 -0400165static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400166rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400167{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400168 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400169 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400170
Chuck Lever8301a2c2014-05-28 10:33:51 -0400171 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400172 do {
173 wcs = ep->rep_send_wcs;
174
175 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
176 if (rc <= 0)
177 return rc;
178
179 count = rc;
180 while (count-- > 0)
181 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400182 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400183 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400184}
185
186/*
Chuck Leverfc664482014-05-28 10:33:25 -0400187 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400188 *
Chuck Leverfc664482014-05-28 10:33:25 -0400189 * Send events are typically suppressed and thus do not result
190 * in an upcall. Occasionally one is signaled, however. This
191 * prevents the provider's completion queue from wrapping and
192 * losing a completion.
193 */
194static void
195rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
196{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400197 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400198 int rc;
199
Chuck Lever1c00dd02014-05-28 10:33:42 -0400200 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400201 if (rc) {
202 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
203 __func__, rc);
204 return;
205 }
206
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400207 rc = ib_req_notify_cq(cq,
208 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
209 if (rc == 0)
210 return;
211 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400212 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
213 __func__, rc);
214 return;
215 }
216
Chuck Lever1c00dd02014-05-28 10:33:42 -0400217 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400218}
219
220static void
221rpcrdma_recvcq_process_wc(struct ib_wc *wc)
222{
223 struct rpcrdma_rep *rep =
224 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
225
226 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
227 __func__, rep, wc->status, wc->opcode, wc->byte_len);
228
229 if (wc->status != IB_WC_SUCCESS) {
230 rep->rr_len = ~0U;
231 goto out_schedule;
232 }
233 if (wc->opcode != IB_WC_RECV)
234 return;
235
236 rep->rr_len = wc->byte_len;
237 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
238 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
239
240 if (rep->rr_len >= 16) {
241 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
242 unsigned int credits = ntohl(p->rm_credit);
243
244 if (credits == 0)
245 credits = 1; /* don't deadlock */
246 else if (credits > rep->rr_buffer->rb_max_requests)
247 credits = rep->rr_buffer->rb_max_requests;
248 atomic_set(&rep->rr_buffer->rb_credits, credits);
249 }
250
251out_schedule:
252 rpcrdma_schedule_tasklet(rep);
253}
254
255static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400256rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400257{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400258 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400259 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400260
Chuck Lever8301a2c2014-05-28 10:33:51 -0400261 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400262 do {
263 wcs = ep->rep_recv_wcs;
264
265 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
266 if (rc <= 0)
267 return rc;
268
269 count = rc;
270 while (count-- > 0)
271 rpcrdma_recvcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400272 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400273 return 0;
Chuck Leverfc664482014-05-28 10:33:25 -0400274}
275
276/*
277 * Handle receive completions.
278 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400279 * It is reentrant but processes single events in order to maintain
280 * ordering of receives to keep server credits.
281 *
282 * It is the responsibility of the scheduled tasklet to return
283 * recv buffers to the pool. NOTE: this affects synchronization of
284 * connection shutdown. That is, the structures required for
285 * the completion of the reply handler must remain intact until
286 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400287 */
288static void
Chuck Leverfc664482014-05-28 10:33:25 -0400289rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400290{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400291 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400292 int rc;
293
Chuck Lever1c00dd02014-05-28 10:33:42 -0400294 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400295 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400296 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400297 __func__, rc);
298 return;
299 }
300
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400301 rc = ib_req_notify_cq(cq,
302 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
303 if (rc == 0)
304 return;
305 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400306 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
307 __func__, rc);
308 return;
309 }
310
Chuck Lever1c00dd02014-05-28 10:33:42 -0400311 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400312}
313
Chuck Levera7bc2112014-07-29 17:23:52 -0400314static void
315rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
316{
317 rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
318 rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
319}
320
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400321#ifdef RPC_DEBUG
322static const char * const conn[] = {
323 "address resolved",
324 "address error",
325 "route resolved",
326 "route error",
327 "connect request",
328 "connect response",
329 "connect error",
330 "unreachable",
331 "rejected",
332 "established",
333 "disconnected",
334 "device removal"
335};
336#endif
337
338static int
339rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
340{
341 struct rpcrdma_xprt *xprt = id->context;
342 struct rpcrdma_ia *ia = &xprt->rx_ia;
343 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800344#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400345 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800346#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400347 struct ib_qp_attr attr;
348 struct ib_qp_init_attr iattr;
349 int connstate = 0;
350
351 switch (event->event) {
352 case RDMA_CM_EVENT_ADDR_RESOLVED:
353 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400354 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400355 complete(&ia->ri_done);
356 break;
357 case RDMA_CM_EVENT_ADDR_ERROR:
358 ia->ri_async_rc = -EHOSTUNREACH;
359 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
360 __func__, ep);
361 complete(&ia->ri_done);
362 break;
363 case RDMA_CM_EVENT_ROUTE_ERROR:
364 ia->ri_async_rc = -ENETUNREACH;
365 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
366 __func__, ep);
367 complete(&ia->ri_done);
368 break;
369 case RDMA_CM_EVENT_ESTABLISHED:
370 connstate = 1;
371 ib_query_qp(ia->ri_id->qp, &attr,
372 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
373 &iattr);
374 dprintk("RPC: %s: %d responder resources"
375 " (%d initiator)\n",
376 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
377 goto connected;
378 case RDMA_CM_EVENT_CONNECT_ERROR:
379 connstate = -ENOTCONN;
380 goto connected;
381 case RDMA_CM_EVENT_UNREACHABLE:
382 connstate = -ENETDOWN;
383 goto connected;
384 case RDMA_CM_EVENT_REJECTED:
385 connstate = -ECONNREFUSED;
386 goto connected;
387 case RDMA_CM_EVENT_DISCONNECTED:
388 connstate = -ECONNABORTED;
389 goto connected;
390 case RDMA_CM_EVENT_DEVICE_REMOVAL:
391 connstate = -ENODEV;
392connected:
Harvey Harrison21454aa2008-10-31 00:54:56 -0700393 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400394 __func__,
395 (event->event <= 11) ? conn[event->event] :
396 "unknown connection error",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700397 &addr->sin_addr.s_addr,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400398 ntohs(addr->sin_port),
399 ep, event->event);
400 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
401 dprintk("RPC: %s: %sconnected\n",
402 __func__, connstate > 0 ? "" : "dis");
403 ep->rep_connected = connstate;
404 ep->rep_func(ep);
405 wake_up_all(&ep->rep_connect_wait);
406 break;
407 default:
Tom Talpey1a954052008-10-09 15:01:31 -0400408 dprintk("RPC: %s: unexpected CM event %d\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400409 __func__, event->event);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400410 break;
411 }
412
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400413#ifdef RPC_DEBUG
414 if (connstate == 1) {
415 int ird = attr.max_dest_rd_atomic;
416 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700417 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400418 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700419 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400420 ntohs(addr->sin_port),
421 ia->ri_id->device->name,
422 ia->ri_memreg_strategy,
423 xprt->rx_buf.rb_max_requests,
424 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
425 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700426 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
427 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400428 ntohs(addr->sin_port),
429 connstate);
430 }
431#endif
432
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400433 return 0;
434}
435
436static struct rdma_cm_id *
437rpcrdma_create_id(struct rpcrdma_xprt *xprt,
438 struct rpcrdma_ia *ia, struct sockaddr *addr)
439{
440 struct rdma_cm_id *id;
441 int rc;
442
Tom Talpey1a954052008-10-09 15:01:31 -0400443 init_completion(&ia->ri_done);
444
Sean Heftyb26f9b92010-04-01 17:08:41 +0000445 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400446 if (IS_ERR(id)) {
447 rc = PTR_ERR(id);
448 dprintk("RPC: %s: rdma_create_id() failed %i\n",
449 __func__, rc);
450 return id;
451 }
452
Tom Talpey5675add2008-10-09 15:01:41 -0400453 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400454 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
455 if (rc) {
456 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
457 __func__, rc);
458 goto out;
459 }
Tom Talpey5675add2008-10-09 15:01:41 -0400460 wait_for_completion_interruptible_timeout(&ia->ri_done,
461 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400462 rc = ia->ri_async_rc;
463 if (rc)
464 goto out;
465
Tom Talpey5675add2008-10-09 15:01:41 -0400466 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400467 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
468 if (rc) {
469 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
470 __func__, rc);
471 goto out;
472 }
Tom Talpey5675add2008-10-09 15:01:41 -0400473 wait_for_completion_interruptible_timeout(&ia->ri_done,
474 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400475 rc = ia->ri_async_rc;
476 if (rc)
477 goto out;
478
479 return id;
480
481out:
482 rdma_destroy_id(id);
483 return ERR_PTR(rc);
484}
485
486/*
487 * Drain any cq, prior to teardown.
488 */
489static void
490rpcrdma_clean_cq(struct ib_cq *cq)
491{
492 struct ib_wc wc;
493 int count = 0;
494
495 while (1 == ib_poll_cq(cq, 1, &wc))
496 ++count;
497
498 if (count)
499 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
500 __func__, count, wc.opcode);
501}
502
503/*
504 * Exported functions.
505 */
506
507/*
508 * Open and initialize an Interface Adapter.
509 * o initializes fields of struct rpcrdma_ia, including
510 * interface and provider attributes and protection zone.
511 */
512int
513rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
514{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400515 int rc, mem_priv;
516 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400517 struct rpcrdma_ia *ia = &xprt->rx_ia;
518
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400519 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
520 if (IS_ERR(ia->ri_id)) {
521 rc = PTR_ERR(ia->ri_id);
522 goto out1;
523 }
524
525 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
526 if (IS_ERR(ia->ri_pd)) {
527 rc = PTR_ERR(ia->ri_pd);
528 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
529 __func__, rc);
530 goto out2;
531 }
532
533 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400534 * Query the device to determine if the requested memory
535 * registration strategy is supported. If it isn't, set the
536 * strategy to a globally supported model.
537 */
538 rc = ib_query_device(ia->ri_id->device, &devattr);
539 if (rc) {
540 dprintk("RPC: %s: ib_query_device failed %d\n",
541 __func__, rc);
542 goto out2;
543 }
544
545 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
546 ia->ri_have_dma_lkey = 1;
547 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
548 }
549
Chuck Leverf10eafd2014-05-28 10:32:51 -0400550 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400551 /* Requires both frmr reg and local dma lkey */
552 if ((devattr.device_cap_flags &
553 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
554 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400555 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400556 "not supported by HCA\n", __func__);
557 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400558 } else {
559 /* Mind the ia limit on FRMR page list depth */
560 ia->ri_max_frmr_depth = min_t(unsigned int,
561 RPCRDMA_MAX_DATA_SEGS,
562 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400563 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400564 }
565 if (memreg == RPCRDMA_MTHCAFMR) {
566 if (!ia->ri_id->device->alloc_fmr) {
567 dprintk("RPC: %s: MTHCAFMR registration "
568 "not supported by HCA\n", __func__);
569#if RPCRDMA_PERSISTENT_REGISTRATION
570 memreg = RPCRDMA_ALLPHYSICAL;
571#else
Chuck Levercdd9ade2014-05-28 10:33:00 -0400572 rc = -ENOMEM;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400573 goto out2;
574#endif
575 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400576 }
577
578 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400579 * Optionally obtain an underlying physical identity mapping in
580 * order to do a memory window-based bind. This base registration
581 * is protected from remote access - that is enabled only by binding
582 * for the specific bytes targeted during each RPC operation, and
583 * revoked after the corresponding completion similar to a storage
584 * adapter.
585 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400586 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400587 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400588 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400589#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400590 case RPCRDMA_ALLPHYSICAL:
591 mem_priv = IB_ACCESS_LOCAL_WRITE |
592 IB_ACCESS_REMOTE_WRITE |
593 IB_ACCESS_REMOTE_READ;
594 goto register_setup;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400595#endif
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400596 case RPCRDMA_MTHCAFMR:
597 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400598 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400599 mem_priv = IB_ACCESS_LOCAL_WRITE;
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400600#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400601 register_setup:
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400602#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400603 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
604 if (IS_ERR(ia->ri_bind_mem)) {
605 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400606 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400607 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400608 rc = -ENOMEM;
609 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400610 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400611 break;
612 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400613 printk(KERN_ERR "RPC: Unsupported memory "
614 "registration mode: %d\n", memreg);
615 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400616 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400617 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400618 dprintk("RPC: %s: memory registration strategy is %d\n",
619 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400620
621 /* Else will do memory reg/dereg for each chunk */
622 ia->ri_memreg_strategy = memreg;
623
Chuck Lever73806c82014-07-29 17:23:25 -0400624 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400625 return 0;
626out2:
627 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400628 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400629out1:
630 return rc;
631}
632
633/*
634 * Clean up/close an IA.
635 * o if event handles and PD have been initialized, free them.
636 * o close the IA
637 */
638void
639rpcrdma_ia_close(struct rpcrdma_ia *ia)
640{
641 int rc;
642
643 dprintk("RPC: %s: entering\n", __func__);
644 if (ia->ri_bind_mem != NULL) {
645 rc = ib_dereg_mr(ia->ri_bind_mem);
646 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
647 __func__, rc);
648 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400649 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
650 if (ia->ri_id->qp)
651 rdma_destroy_qp(ia->ri_id);
652 rdma_destroy_id(ia->ri_id);
653 ia->ri_id = NULL;
654 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400655 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
656 rc = ib_dealloc_pd(ia->ri_pd);
657 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
658 __func__, rc);
659 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400660}
661
662/*
663 * Create unconnected endpoint.
664 */
665int
666rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
667 struct rpcrdma_create_data_internal *cdata)
668{
669 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400670 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400671 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400672
673 rc = ib_query_device(ia->ri_id->device, &devattr);
674 if (rc) {
675 dprintk("RPC: %s: ib_query_device failed %d\n",
676 __func__, rc);
677 return rc;
678 }
679
680 /* check provider's send/recv wr limits */
681 if (cdata->max_requests > devattr.max_qp_wr)
682 cdata->max_requests = devattr.max_qp_wr;
683
684 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
685 ep->rep_attr.qp_context = ep;
686 /* send_cq and recv_cq initialized below */
687 ep->rep_attr.srq = NULL;
688 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
689 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400690 case RPCRDMA_FRMR: {
691 int depth = 7;
692
Tom Tucker15cdc6442010-08-11 12:47:24 -0400693 /* Add room for frmr register and invalidate WRs.
694 * 1. FRMR reg WR for head
695 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400696 * 3. N FRMR reg WRs for pagelist
697 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400698 * 5. FRMR reg WR for tail
699 * 6. FRMR invalidate WR for tail
700 * 7. The RDMA_SEND WR
701 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400702
703 /* Calculate N if the device max FRMR depth is smaller than
704 * RPCRDMA_MAX_DATA_SEGS.
705 */
706 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
707 int delta = RPCRDMA_MAX_DATA_SEGS -
708 ia->ri_max_frmr_depth;
709
710 do {
711 depth += 2; /* FRMR reg + invalidate */
712 delta -= ia->ri_max_frmr_depth;
713 } while (delta > 0);
714
715 }
716 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400717 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400718 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400719 if (!cdata->max_requests)
720 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400721 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
722 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400723 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400724 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400725 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400726 default:
727 break;
728 }
729 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
730 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
731 ep->rep_attr.cap.max_recv_sge = 1;
732 ep->rep_attr.cap.max_inline_data = 0;
733 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
734 ep->rep_attr.qp_type = IB_QPT_RC;
735 ep->rep_attr.port_num = ~0;
736
737 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
738 "iovs: send %d recv %d\n",
739 __func__,
740 ep->rep_attr.cap.max_send_wr,
741 ep->rep_attr.cap.max_recv_wr,
742 ep->rep_attr.cap.max_send_sge,
743 ep->rep_attr.cap.max_recv_sge);
744
745 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400746 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400747 if (ep->rep_cqinit <= 2)
748 ep->rep_cqinit = 0;
749 INIT_CQCOUNT(ep);
750 ep->rep_ia = ia;
751 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400752 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400753
Chuck Leverfc664482014-05-28 10:33:25 -0400754 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400755 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400756 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400757 if (IS_ERR(sendcq)) {
758 rc = PTR_ERR(sendcq);
759 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400760 __func__, rc);
761 goto out1;
762 }
763
Chuck Leverfc664482014-05-28 10:33:25 -0400764 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400765 if (rc) {
766 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
767 __func__, rc);
768 goto out2;
769 }
770
Chuck Leverfc664482014-05-28 10:33:25 -0400771 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400772 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400773 ep->rep_attr.cap.max_recv_wr + 1, 0);
774 if (IS_ERR(recvcq)) {
775 rc = PTR_ERR(recvcq);
776 dprintk("RPC: %s: failed to create recv CQ: %i\n",
777 __func__, rc);
778 goto out2;
779 }
780
781 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
782 if (rc) {
783 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
784 __func__, rc);
785 ib_destroy_cq(recvcq);
786 goto out2;
787 }
788
789 ep->rep_attr.send_cq = sendcq;
790 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400791
792 /* Initialize cma parameters */
793
794 /* RPC/RDMA does not use private data */
795 ep->rep_remote_cma.private_data = NULL;
796 ep->rep_remote_cma.private_data_len = 0;
797
798 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400799 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400800 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400801 ep->rep_remote_cma.responder_resources = 32;
802 else
803 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400804
805 ep->rep_remote_cma.retry_count = 7;
806 ep->rep_remote_cma.flow_control = 0;
807 ep->rep_remote_cma.rnr_retry_count = 0;
808
809 return 0;
810
811out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400812 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400813 if (err)
814 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
815 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400816out1:
817 return rc;
818}
819
820/*
821 * rpcrdma_ep_destroy
822 *
823 * Disconnect and destroy endpoint. After this, the only
824 * valid operations on the ep are to free it (if dynamically
825 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400826 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400827void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400828rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
829{
830 int rc;
831
832 dprintk("RPC: %s: entering, connected is %d\n",
833 __func__, ep->rep_connected);
834
Chuck Lever254f91e2014-05-28 10:32:17 -0400835 cancel_delayed_work_sync(&ep->rep_connect_worker);
836
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400837 if (ia->ri_id->qp) {
838 rc = rpcrdma_ep_disconnect(ep, ia);
839 if (rc)
840 dprintk("RPC: %s: rpcrdma_ep_disconnect"
841 " returned %i\n", __func__, rc);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400842 rdma_destroy_qp(ia->ri_id);
843 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400844 }
845
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400846 /* padding - could be done in rpcrdma_buffer_destroy... */
847 if (ep->rep_pad_mr) {
848 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
849 ep->rep_pad_mr = NULL;
850 }
851
Chuck Leverfc664482014-05-28 10:33:25 -0400852 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
853 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
854 if (rc)
855 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
856 __func__, rc);
857
858 rpcrdma_clean_cq(ep->rep_attr.send_cq);
859 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400860 if (rc)
861 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
862 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400863}
864
865/*
866 * Connect unconnected endpoint.
867 */
868int
869rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
870{
Chuck Lever73806c82014-07-29 17:23:25 -0400871 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400872 int rc = 0;
873 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400874
Tom Talpeyc0555512008-10-10 11:32:45 -0400875 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400876 struct rpcrdma_xprt *xprt;
877retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400878 dprintk("RPC: %s: reconnecting...\n", __func__);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400879 rc = rpcrdma_ep_disconnect(ep, ia);
880 if (rc && rc != -ENOTCONN)
881 dprintk("RPC: %s: rpcrdma_ep_disconnect"
882 " status %i\n", __func__, rc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400883 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400884
Chuck Lever9f9d8022014-07-29 17:24:45 -0400885 if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
886 rpcrdma_reset_frmrs(ia);
887
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400888 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
889 id = rpcrdma_create_id(xprt, ia,
890 (struct sockaddr *)&xprt->rx_data.addr);
891 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400892 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400893 goto out;
894 }
895 /* TEMP TEMP TEMP - fail if new device:
896 * Deregister/remarshal *all* requests!
897 * Close and recreate adapter, pd, etc!
898 * Re-determine all attributes still sane!
899 * More stuff I haven't thought of!
900 * Rrrgh!
901 */
902 if (ia->ri_id->device != id->device) {
903 printk("RPC: %s: can't reconnect on "
904 "different device!\n", __func__);
905 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400906 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400907 goto out;
908 }
909 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400910 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
911 if (rc) {
912 dprintk("RPC: %s: rdma_create_qp failed %i\n",
913 __func__, rc);
914 rdma_destroy_id(id);
915 rc = -ENETUNREACH;
916 goto out;
917 }
Chuck Lever73806c82014-07-29 17:23:25 -0400918
919 write_lock(&ia->ri_qplock);
920 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400921 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400922 write_unlock(&ia->ri_qplock);
923
924 rdma_destroy_qp(old);
925 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400926 } else {
927 dprintk("RPC: %s: connecting...\n", __func__);
928 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
929 if (rc) {
930 dprintk("RPC: %s: rdma_create_qp failed %i\n",
931 __func__, rc);
932 /* do not update ep->rep_connected */
933 return -ENETUNREACH;
934 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400935 }
936
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400937 ep->rep_connected = 0;
938
939 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
940 if (rc) {
941 dprintk("RPC: %s: rdma_connect() failed with %i\n",
942 __func__, rc);
943 goto out;
944 }
945
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400946 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
947
948 /*
949 * Check state. A non-peer reject indicates no listener
950 * (ECONNREFUSED), which may be a transient state. All
951 * others indicate a transport condition which has already
952 * undergone a best-effort.
953 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800954 if (ep->rep_connected == -ECONNREFUSED &&
955 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400956 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
957 goto retry;
958 }
959 if (ep->rep_connected <= 0) {
960 /* Sometimes, the only way to reliably connect to remote
961 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400962 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
963 (ep->rep_remote_cma.responder_resources == 0 ||
964 ep->rep_remote_cma.initiator_depth !=
965 ep->rep_remote_cma.responder_resources)) {
966 if (ep->rep_remote_cma.responder_resources == 0)
967 ep->rep_remote_cma.responder_resources = 1;
968 ep->rep_remote_cma.initiator_depth =
969 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400970 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400971 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400972 rc = ep->rep_connected;
973 } else {
974 dprintk("RPC: %s: connected\n", __func__);
975 }
976
977out:
978 if (rc)
979 ep->rep_connected = rc;
980 return rc;
981}
982
983/*
984 * rpcrdma_ep_disconnect
985 *
986 * This is separate from destroy to facilitate the ability
987 * to reconnect without recreating the endpoint.
988 *
989 * This call is not reentrant, and must not be made in parallel
990 * on the same endpoint.
991 */
992int
993rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
994{
995 int rc;
996
Chuck Levera7bc2112014-07-29 17:23:52 -0400997 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400998 rc = rdma_disconnect(ia->ri_id);
999 if (!rc) {
1000 /* returns without wait if not connected */
1001 wait_event_interruptible(ep->rep_connect_wait,
1002 ep->rep_connected != 1);
1003 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1004 (ep->rep_connected == 1) ? "still " : "dis");
1005 } else {
1006 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1007 ep->rep_connected = rc;
1008 }
1009 return rc;
1010}
1011
1012/*
1013 * Initialize buffer memory
1014 */
1015int
1016rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1017 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1018{
1019 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001020 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001021 int i, rc;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001022 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001023
1024 buf->rb_max_requests = cdata->max_requests;
1025 spin_lock_init(&buf->rb_lock);
1026 atomic_set(&buf->rb_credits, 1);
1027
1028 /* Need to allocate:
1029 * 1. arrays for send and recv pointers
1030 * 2. arrays of struct rpcrdma_req to fill in pointers
1031 * 3. array of struct rpcrdma_rep for replies
1032 * 4. padding, if any
Tom Talpey3197d3092008-10-09 15:00:20 -04001033 * 5. mw's, fmr's or frmr's, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001034 * Send/recv buffers in req/rep need to be registered
1035 */
1036
1037 len = buf->rb_max_requests *
1038 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1039 len += cdata->padding;
1040 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001041 case RPCRDMA_FRMR:
1042 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
1043 sizeof(struct rpcrdma_mw);
1044 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001045 case RPCRDMA_MTHCAFMR:
1046 /* TBD we are perhaps overallocating here */
1047 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
1048 sizeof(struct rpcrdma_mw);
1049 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001050 default:
1051 break;
1052 }
1053
1054 /* allocate 1, 4 and 5 in one shot */
1055 p = kzalloc(len, GFP_KERNEL);
1056 if (p == NULL) {
1057 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1058 __func__, len);
1059 rc = -ENOMEM;
1060 goto out;
1061 }
1062 buf->rb_pool = p; /* for freeing it later */
1063
1064 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1065 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1066 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1067 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1068
1069 /*
1070 * Register the zeroed pad buffer, if any.
1071 */
1072 if (cdata->padding) {
1073 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1074 &ep->rep_pad_mr, &ep->rep_pad);
1075 if (rc)
1076 goto out;
1077 }
1078 p += cdata->padding;
1079
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001080 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001081 INIT_LIST_HEAD(&buf->rb_all);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001082 r = (struct rpcrdma_mw *)p;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001083 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001084 case RPCRDMA_FRMR:
1085 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1086 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001087 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001088 if (IS_ERR(r->r.frmr.fr_mr)) {
1089 rc = PTR_ERR(r->r.frmr.fr_mr);
1090 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1091 " failed %i\n", __func__, rc);
1092 goto out;
1093 }
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001094 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1095 ia->ri_id->device,
1096 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001097 if (IS_ERR(r->r.frmr.fr_pgl)) {
1098 rc = PTR_ERR(r->r.frmr.fr_pgl);
1099 dprintk("RPC: %s: "
1100 "ib_alloc_fast_reg_page_list "
1101 "failed %i\n", __func__, rc);
Allen Andrews4034ba02014-05-28 10:32:09 -04001102
1103 ib_dereg_mr(r->r.frmr.fr_mr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001104 goto out;
1105 }
Chuck Lever3111d722014-07-29 17:24:28 -04001106 list_add(&r->mw_all, &buf->rb_all);
Tom Talpey3197d3092008-10-09 15:00:20 -04001107 list_add(&r->mw_list, &buf->rb_mws);
1108 ++r;
1109 }
1110 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001111 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001112 /* TBD we are perhaps overallocating here */
1113 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001114 static struct ib_fmr_attr fa =
1115 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001116 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1117 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1118 &fa);
1119 if (IS_ERR(r->r.fmr)) {
1120 rc = PTR_ERR(r->r.fmr);
1121 dprintk("RPC: %s: ib_alloc_fmr"
1122 " failed %i\n", __func__, rc);
1123 goto out;
1124 }
Chuck Lever3111d722014-07-29 17:24:28 -04001125 list_add(&r->mw_all, &buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001126 list_add(&r->mw_list, &buf->rb_mws);
1127 ++r;
1128 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001129 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001130 default:
1131 break;
1132 }
1133
1134 /*
1135 * Allocate/init the request/reply buffers. Doing this
1136 * using kmalloc for now -- one for each buf.
1137 */
Chuck Lever65866f82014-05-28 10:33:59 -04001138 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1139 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1140 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1141 __func__, wlen, rlen);
1142
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001143 for (i = 0; i < buf->rb_max_requests; i++) {
1144 struct rpcrdma_req *req;
1145 struct rpcrdma_rep *rep;
1146
Chuck Lever65866f82014-05-28 10:33:59 -04001147 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001148 if (req == NULL) {
1149 dprintk("RPC: %s: request buffer %d alloc"
1150 " failed\n", __func__, i);
1151 rc = -ENOMEM;
1152 goto out;
1153 }
1154 memset(req, 0, sizeof(struct rpcrdma_req));
1155 buf->rb_send_bufs[i] = req;
1156 buf->rb_send_bufs[i]->rl_buffer = buf;
1157
1158 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001159 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001160 &buf->rb_send_bufs[i]->rl_handle,
1161 &buf->rb_send_bufs[i]->rl_iov);
1162 if (rc)
1163 goto out;
1164
Chuck Lever65866f82014-05-28 10:33:59 -04001165 buf->rb_send_bufs[i]->rl_size = wlen -
1166 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001167
Chuck Lever65866f82014-05-28 10:33:59 -04001168 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001169 if (rep == NULL) {
1170 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1171 __func__, i);
1172 rc = -ENOMEM;
1173 goto out;
1174 }
1175 memset(rep, 0, sizeof(struct rpcrdma_rep));
1176 buf->rb_recv_bufs[i] = rep;
1177 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001178
1179 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001180 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001181 &buf->rb_recv_bufs[i]->rr_handle,
1182 &buf->rb_recv_bufs[i]->rr_iov);
1183 if (rc)
1184 goto out;
1185
1186 }
1187 dprintk("RPC: %s: max_requests %d\n",
1188 __func__, buf->rb_max_requests);
1189 /* done */
1190 return 0;
1191out:
1192 rpcrdma_buffer_destroy(buf);
1193 return rc;
1194}
1195
1196/*
1197 * Unregister and destroy buffer memory. Need to deal with
1198 * partial initialization, so it's callable from failed create.
1199 * Must be called before destroying endpoint, as registrations
1200 * reference it.
1201 */
1202void
1203rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1204{
1205 int rc, i;
1206 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001207 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001208
1209 /* clean up in reverse order from create
1210 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001211 * 2. send mr memory (mr free, then kfree)
1212 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1213 * 4. arrays
1214 */
1215 dprintk("RPC: %s: entering\n", __func__);
1216
1217 for (i = 0; i < buf->rb_max_requests; i++) {
1218 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1219 rpcrdma_deregister_internal(ia,
1220 buf->rb_recv_bufs[i]->rr_handle,
1221 &buf->rb_recv_bufs[i]->rr_iov);
1222 kfree(buf->rb_recv_bufs[i]);
1223 }
1224 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001225 rpcrdma_deregister_internal(ia,
1226 buf->rb_send_bufs[i]->rl_handle,
1227 &buf->rb_send_bufs[i]->rl_iov);
1228 kfree(buf->rb_send_bufs[i]);
1229 }
1230 }
1231
Allen Andrews4034ba02014-05-28 10:32:09 -04001232 while (!list_empty(&buf->rb_mws)) {
1233 r = list_entry(buf->rb_mws.next,
1234 struct rpcrdma_mw, mw_list);
Chuck Lever3111d722014-07-29 17:24:28 -04001235 list_del(&r->mw_all);
Allen Andrews4034ba02014-05-28 10:32:09 -04001236 list_del(&r->mw_list);
1237 switch (ia->ri_memreg_strategy) {
1238 case RPCRDMA_FRMR:
1239 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1240 if (rc)
1241 dprintk("RPC: %s:"
1242 " ib_dereg_mr"
1243 " failed %i\n",
1244 __func__, rc);
1245 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1246 break;
1247 case RPCRDMA_MTHCAFMR:
1248 rc = ib_dealloc_fmr(r->r.fmr);
1249 if (rc)
1250 dprintk("RPC: %s:"
1251 " ib_dealloc_fmr"
1252 " failed %i\n",
1253 __func__, rc);
1254 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001255 default:
1256 break;
1257 }
1258 }
1259
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001260 kfree(buf->rb_pool);
1261}
1262
Chuck Lever9f9d8022014-07-29 17:24:45 -04001263/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1264 * an unusable state. Find FRMRs in this state and dereg / reg
1265 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1266 * also torn down.
1267 *
1268 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1269 *
1270 * This is invoked only in the transport connect worker in order
1271 * to serialize with rpcrdma_register_frmr_external().
1272 */
1273static void
1274rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1275{
1276 struct rpcrdma_xprt *r_xprt =
1277 container_of(ia, struct rpcrdma_xprt, rx_ia);
1278 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1279 struct list_head *pos;
1280 struct rpcrdma_mw *r;
1281 int rc;
1282
1283 list_for_each(pos, &buf->rb_all) {
1284 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1285
1286 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1287 continue;
1288
1289 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1290 if (rc)
1291 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1292 __func__, rc);
1293 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1294
1295 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1296 ia->ri_max_frmr_depth);
1297 if (IS_ERR(r->r.frmr.fr_mr)) {
1298 rc = PTR_ERR(r->r.frmr.fr_mr);
1299 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1300 " failed %i\n", __func__, rc);
1301 continue;
1302 }
1303 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1304 ia->ri_id->device,
1305 ia->ri_max_frmr_depth);
1306 if (IS_ERR(r->r.frmr.fr_pgl)) {
1307 rc = PTR_ERR(r->r.frmr.fr_pgl);
1308 dprintk("RPC: %s: "
1309 "ib_alloc_fast_reg_page_list "
1310 "failed %i\n", __func__, rc);
1311
1312 ib_dereg_mr(r->r.frmr.fr_mr);
1313 continue;
1314 }
1315 r->r.frmr.fr_state = FRMR_IS_INVALID;
1316 }
1317}
1318
Chuck Leverc2922c02014-07-29 17:24:36 -04001319/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1320 * some req segments uninitialized.
1321 */
1322static void
1323rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1324{
1325 if (*mw) {
1326 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1327 *mw = NULL;
1328 }
1329}
1330
1331/* Cycle mw's back in reverse order, and "spin" them.
1332 * This delays and scrambles reuse as much as possible.
1333 */
1334static void
1335rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1336{
1337 struct rpcrdma_mr_seg *seg = req->rl_segments;
1338 struct rpcrdma_mr_seg *seg1 = seg;
1339 int i;
1340
1341 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1342 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
1343 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
1344}
1345
1346static void
1347rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1348{
1349 buf->rb_send_bufs[--buf->rb_send_index] = req;
1350 req->rl_niovs = 0;
1351 if (req->rl_reply) {
1352 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1353 req->rl_reply->rr_func = NULL;
1354 req->rl_reply = NULL;
1355 }
1356}
1357
Chuck Leverddb6beb2014-07-29 17:24:54 -04001358/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1359 * Redo only the ib_post_send().
1360 */
1361static void
1362rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1363{
1364 struct rpcrdma_xprt *r_xprt =
1365 container_of(ia, struct rpcrdma_xprt, rx_ia);
1366 struct ib_send_wr invalidate_wr, *bad_wr;
1367 int rc;
1368
1369 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1370
1371 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
1372 r->r.frmr.fr_state = FRMR_IS_VALID;
1373
1374 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1375 invalidate_wr.wr_id = (unsigned long)(void *)r;
1376 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1377 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1378 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1379 DECR_CQCOUNT(&r_xprt->rx_ep);
1380
1381 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1382 __func__, r, r->r.frmr.fr_mr->rkey);
1383
1384 read_lock(&ia->ri_qplock);
1385 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1386 read_unlock(&ia->ri_qplock);
1387 if (rc) {
1388 /* Force rpcrdma_buffer_get() to retry */
1389 r->r.frmr.fr_state = FRMR_IS_STALE;
1390 dprintk("RPC: %s: ib_post_send failed, %i\n",
1391 __func__, rc);
1392 }
1393}
1394
1395static void
1396rpcrdma_retry_flushed_linv(struct list_head *stale,
1397 struct rpcrdma_buffer *buf)
1398{
1399 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1400 struct list_head *pos;
1401 struct rpcrdma_mw *r;
1402 unsigned long flags;
1403
1404 list_for_each(pos, stale) {
1405 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1406 rpcrdma_retry_local_inv(r, ia);
1407 }
1408
1409 spin_lock_irqsave(&buf->rb_lock, flags);
1410 list_splice_tail(stale, &buf->rb_mws);
1411 spin_unlock_irqrestore(&buf->rb_lock, flags);
1412}
1413
Chuck Leverc2922c02014-07-29 17:24:36 -04001414static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001415rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1416 struct list_head *stale)
1417{
1418 struct rpcrdma_mw *r;
1419 int i;
1420
1421 i = RPCRDMA_MAX_SEGS - 1;
1422 while (!list_empty(&buf->rb_mws)) {
1423 r = list_entry(buf->rb_mws.next,
1424 struct rpcrdma_mw, mw_list);
1425 list_del(&r->mw_list);
1426 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1427 list_add(&r->mw_list, stale);
1428 continue;
1429 }
1430 req->rl_segments[i].mr_chunk.rl_mw = r;
1431 if (unlikely(i-- == 0))
1432 return req; /* Success */
1433 }
1434
1435 /* Not enough entries on rb_mws for this req */
1436 rpcrdma_buffer_put_sendbuf(req, buf);
1437 rpcrdma_buffer_put_mrs(req, buf);
1438 return NULL;
1439}
1440
1441static struct rpcrdma_req *
1442rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001443{
1444 struct rpcrdma_mw *r;
1445 int i;
1446
1447 i = RPCRDMA_MAX_SEGS - 1;
1448 while (!list_empty(&buf->rb_mws)) {
1449 r = list_entry(buf->rb_mws.next,
1450 struct rpcrdma_mw, mw_list);
1451 list_del(&r->mw_list);
1452 req->rl_segments[i].mr_chunk.rl_mw = r;
1453 if (unlikely(i-- == 0))
1454 return req; /* Success */
1455 }
1456
1457 /* Not enough entries on rb_mws for this req */
1458 rpcrdma_buffer_put_sendbuf(req, buf);
1459 rpcrdma_buffer_put_mrs(req, buf);
1460 return NULL;
1461}
1462
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001463/*
1464 * Get a set of request/reply buffers.
1465 *
1466 * Reply buffer (if needed) is attached to send buffer upon return.
1467 * Rule:
1468 * rb_send_index and rb_recv_index MUST always be pointing to the
1469 * *next* available buffer (non-NULL). They are incremented after
1470 * removing buffers, and decremented *before* returning them.
1471 */
1472struct rpcrdma_req *
1473rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1474{
Chuck Leverc2922c02014-07-29 17:24:36 -04001475 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001476 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001477 struct rpcrdma_req *req;
1478 unsigned long flags;
1479
1480 spin_lock_irqsave(&buffers->rb_lock, flags);
1481 if (buffers->rb_send_index == buffers->rb_max_requests) {
1482 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1483 dprintk("RPC: %s: out of request buffers\n", __func__);
1484 return ((struct rpcrdma_req *)NULL);
1485 }
1486
1487 req = buffers->rb_send_bufs[buffers->rb_send_index];
1488 if (buffers->rb_send_index < buffers->rb_recv_index) {
1489 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1490 __func__,
1491 buffers->rb_recv_index - buffers->rb_send_index);
1492 req->rl_reply = NULL;
1493 } else {
1494 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1495 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1496 }
1497 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001498
1499 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001500 switch (ia->ri_memreg_strategy) {
1501 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001502 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1503 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001504 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001505 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001506 break;
1507 default:
1508 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001509 }
1510 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001511 if (!list_empty(&stale))
1512 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001513 return req;
1514}
1515
1516/*
1517 * Put request/reply buffers back into pool.
1518 * Pre-decrement counter/array index.
1519 */
1520void
1521rpcrdma_buffer_put(struct rpcrdma_req *req)
1522{
1523 struct rpcrdma_buffer *buffers = req->rl_buffer;
1524 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001525 unsigned long flags;
1526
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001527 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001528 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001529 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001530 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001531 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001532 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001533 break;
1534 default:
1535 break;
1536 }
1537 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1538}
1539
1540/*
1541 * Recover reply buffers from pool.
1542 * This happens when recovering from error conditions.
1543 * Post-increment counter/array index.
1544 */
1545void
1546rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1547{
1548 struct rpcrdma_buffer *buffers = req->rl_buffer;
1549 unsigned long flags;
1550
1551 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1552 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1553 spin_lock_irqsave(&buffers->rb_lock, flags);
1554 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1555 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1556 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1557 }
1558 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1559}
1560
1561/*
1562 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001563 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001564 */
1565void
1566rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1567{
1568 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1569 unsigned long flags;
1570
1571 rep->rr_func = NULL;
1572 spin_lock_irqsave(&buffers->rb_lock, flags);
1573 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1574 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1575}
1576
1577/*
1578 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1579 */
1580
1581int
1582rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1583 struct ib_mr **mrp, struct ib_sge *iov)
1584{
1585 struct ib_phys_buf ipb;
1586 struct ib_mr *mr;
1587 int rc;
1588
1589 /*
1590 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1591 */
1592 iov->addr = ib_dma_map_single(ia->ri_id->device,
1593 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001594 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1595 return -ENOMEM;
1596
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001597 iov->length = len;
1598
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001599 if (ia->ri_have_dma_lkey) {
1600 *mrp = NULL;
1601 iov->lkey = ia->ri_dma_lkey;
1602 return 0;
1603 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001604 *mrp = NULL;
1605 iov->lkey = ia->ri_bind_mem->lkey;
1606 return 0;
1607 }
1608
1609 ipb.addr = iov->addr;
1610 ipb.size = iov->length;
1611 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1612 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1613
1614 dprintk("RPC: %s: phys convert: 0x%llx "
1615 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001616 __func__, (unsigned long long)ipb.addr,
1617 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001618
1619 if (IS_ERR(mr)) {
1620 *mrp = NULL;
1621 rc = PTR_ERR(mr);
1622 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1623 } else {
1624 *mrp = mr;
1625 iov->lkey = mr->lkey;
1626 rc = 0;
1627 }
1628
1629 return rc;
1630}
1631
1632int
1633rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1634 struct ib_mr *mr, struct ib_sge *iov)
1635{
1636 int rc;
1637
1638 ib_dma_unmap_single(ia->ri_id->device,
1639 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1640
1641 if (NULL == mr)
1642 return 0;
1643
1644 rc = ib_dereg_mr(mr);
1645 if (rc)
1646 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1647 return rc;
1648}
1649
1650/*
1651 * Wrappers for chunk registration, shared by read/write chunk code.
1652 */
1653
1654static void
1655rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1656{
1657 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1658 seg->mr_dmalen = seg->mr_len;
1659 if (seg->mr_page)
1660 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1661 seg->mr_page, offset_in_page(seg->mr_offset),
1662 seg->mr_dmalen, seg->mr_dir);
1663 else
1664 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1665 seg->mr_offset,
1666 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001667 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1668 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1669 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001670 (unsigned long long)seg->mr_dma,
1671 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001672 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001673}
1674
1675static void
1676rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1677{
1678 if (seg->mr_page)
1679 ib_dma_unmap_page(ia->ri_id->device,
1680 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1681 else
1682 ib_dma_unmap_single(ia->ri_id->device,
1683 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1684}
1685
Tom Talpey8d4ba032008-10-09 14:59:49 -04001686static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001687rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1688 int *nsegs, int writing, struct rpcrdma_ia *ia,
1689 struct rpcrdma_xprt *r_xprt)
1690{
1691 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001692 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
1693 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1694 struct ib_mr *mr = frmr->fr_mr;
Chuck Lever440ddad52014-07-29 17:25:03 -04001695 struct ib_send_wr frmr_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001696 u8 key;
1697 int len, pageoff;
1698 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001699 int seg_len;
1700 u64 pa;
1701 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001702
1703 pageoff = offset_in_page(seg1->mr_offset);
1704 seg1->mr_offset -= pageoff; /* start of page */
1705 seg1->mr_len += pageoff;
1706 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001707 if (*nsegs > ia->ri_max_frmr_depth)
1708 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001709 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001710 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001711 pa = seg->mr_dma;
1712 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001713 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001714 pa += PAGE_SIZE;
1715 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001716 len += seg->mr_len;
1717 ++seg;
1718 ++i;
1719 /* Check for holes */
1720 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1721 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1722 break;
1723 }
1724 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001725 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001726
Chuck Lever05055722014-07-29 17:25:12 -04001727 frmr->fr_state = FRMR_IS_VALID;
1728
Tom Talpey3197d3092008-10-09 15:00:20 -04001729 memset(&frmr_wr, 0, sizeof frmr_wr);
Chuck Lever0dbb4102014-07-29 17:24:09 -04001730 frmr_wr.wr_id = (unsigned long)(void *)mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001731 frmr_wr.opcode = IB_WR_FAST_REG_MR;
Steve Wise7a8b80eb2010-08-11 12:47:08 -04001732 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001733 frmr_wr.wr.fast_reg.page_list = frmr->fr_pgl;
Tom Tucker9b781452012-02-20 13:07:57 -06001734 frmr_wr.wr.fast_reg.page_list_len = page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001735 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
Tom Tucker9b781452012-02-20 13:07:57 -06001736 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
Chuck Leverc977dea2014-05-28 10:35:06 -04001737 if (frmr_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001738 rc = -EIO;
1739 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001740 }
1741
1742 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001743 key = (u8)(mr->rkey & 0x000000FF);
1744 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001745
Tom Talpey3197d3092008-10-09 15:00:20 -04001746 frmr_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001747 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1748 IB_ACCESS_REMOTE_READ);
Chuck Lever0dbb4102014-07-29 17:24:09 -04001749 frmr_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001750 DECR_CQCOUNT(&r_xprt->rx_ep);
1751
Chuck Lever440ddad52014-07-29 17:25:03 -04001752 rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001753 if (rc) {
1754 dprintk("RPC: %s: failed ib_post_send for register,"
1755 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001756 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001757 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001758 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001759 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001760 seg1->mr_base = seg1->mr_dma + pageoff;
1761 seg1->mr_nsegs = i;
1762 seg1->mr_len = len;
1763 }
1764 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001765 return 0;
1766out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001767 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001768 while (i--)
1769 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001770 return rc;
1771}
1772
1773static int
1774rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1775 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1776{
1777 struct rpcrdma_mr_seg *seg1 = seg;
1778 struct ib_send_wr invalidate_wr, *bad_wr;
1779 int rc;
1780
Tom Talpey3197d3092008-10-09 15:00:20 -04001781 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001782 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001783 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Tucker5c635e02011-02-09 19:45:34 +00001784 invalidate_wr.send_flags = IB_SEND_SIGNALED;
Tom Talpey3197d3092008-10-09 15:00:20 -04001785 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1786 DECR_CQCOUNT(&r_xprt->rx_ep);
1787
Chuck Lever73806c82014-07-29 17:23:25 -04001788 read_lock(&ia->ri_qplock);
1789 while (seg1->mr_nsegs--)
1790 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001791 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001792 read_unlock(&ia->ri_qplock);
Tom Talpey3197d3092008-10-09 15:00:20 -04001793 if (rc)
1794 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1795 " status %i\n", __func__, rc);
1796 return rc;
1797}
1798
1799static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001800rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1801 int *nsegs, int writing, struct rpcrdma_ia *ia)
1802{
1803 struct rpcrdma_mr_seg *seg1 = seg;
1804 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1805 int len, pageoff, i, rc;
1806
1807 pageoff = offset_in_page(seg1->mr_offset);
1808 seg1->mr_offset -= pageoff; /* start of page */
1809 seg1->mr_len += pageoff;
1810 len = -pageoff;
1811 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1812 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1813 for (i = 0; i < *nsegs;) {
1814 rpcrdma_map_one(ia, seg, writing);
1815 physaddrs[i] = seg->mr_dma;
1816 len += seg->mr_len;
1817 ++seg;
1818 ++i;
1819 /* Check for holes */
1820 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1821 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1822 break;
1823 }
1824 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1825 physaddrs, i, seg1->mr_dma);
1826 if (rc) {
1827 dprintk("RPC: %s: failed ib_map_phys_fmr "
1828 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1829 len, (unsigned long long)seg1->mr_dma,
1830 pageoff, i, rc);
1831 while (i--)
1832 rpcrdma_unmap_one(ia, --seg);
1833 } else {
1834 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1835 seg1->mr_base = seg1->mr_dma + pageoff;
1836 seg1->mr_nsegs = i;
1837 seg1->mr_len = len;
1838 }
1839 *nsegs = i;
1840 return rc;
1841}
1842
1843static int
1844rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1845 struct rpcrdma_ia *ia)
1846{
1847 struct rpcrdma_mr_seg *seg1 = seg;
1848 LIST_HEAD(l);
1849 int rc;
1850
1851 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1852 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001853 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001854 while (seg1->mr_nsegs--)
1855 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04001856 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001857 if (rc)
1858 dprintk("RPC: %s: failed ib_unmap_fmr,"
1859 " status %i\n", __func__, rc);
1860 return rc;
1861}
1862
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001863int
1864rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1865 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1866{
1867 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001868 int rc = 0;
1869
1870 switch (ia->ri_memreg_strategy) {
1871
1872#if RPCRDMA_PERSISTENT_REGISTRATION
1873 case RPCRDMA_ALLPHYSICAL:
1874 rpcrdma_map_one(ia, seg, writing);
1875 seg->mr_rkey = ia->ri_bind_mem->rkey;
1876 seg->mr_base = seg->mr_dma;
1877 seg->mr_nsegs = 1;
1878 nsegs = 1;
1879 break;
1880#endif
1881
Tom Talpey3197d3092008-10-09 15:00:20 -04001882 /* Registration using frmr registration */
1883 case RPCRDMA_FRMR:
1884 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1885 break;
1886
Tom Talpey8d4ba032008-10-09 14:59:49 -04001887 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001888 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001889 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001890 break;
1891
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001892 default:
Chuck Lever0ac531c2014-05-28 10:32:43 -04001893 return -1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001894 }
1895 if (rc)
1896 return -1;
1897
1898 return nsegs;
1899}
1900
1901int
1902rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04001903 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001904{
1905 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001906 int nsegs = seg->mr_nsegs, rc;
1907
1908 switch (ia->ri_memreg_strategy) {
1909
1910#if RPCRDMA_PERSISTENT_REGISTRATION
1911 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04001912 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001913 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04001914 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001915 break;
1916#endif
1917
Tom Talpey3197d3092008-10-09 15:00:20 -04001918 case RPCRDMA_FRMR:
1919 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1920 break;
1921
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001922 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001923 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001924 break;
1925
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001926 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001927 break;
1928 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001929 return nsegs;
1930}
1931
1932/*
1933 * Prepost any receive buffer, then post send.
1934 *
1935 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1936 */
1937int
1938rpcrdma_ep_post(struct rpcrdma_ia *ia,
1939 struct rpcrdma_ep *ep,
1940 struct rpcrdma_req *req)
1941{
1942 struct ib_send_wr send_wr, *send_wr_fail;
1943 struct rpcrdma_rep *rep = req->rl_reply;
1944 int rc;
1945
1946 if (rep) {
1947 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1948 if (rc)
1949 goto out;
1950 req->rl_reply = NULL;
1951 }
1952
1953 send_wr.next = NULL;
1954 send_wr.wr_id = 0ULL; /* no send cookie */
1955 send_wr.sg_list = req->rl_send_iov;
1956 send_wr.num_sge = req->rl_niovs;
1957 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001958 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1959 ib_dma_sync_single_for_device(ia->ri_id->device,
1960 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1961 DMA_TO_DEVICE);
1962 ib_dma_sync_single_for_device(ia->ri_id->device,
1963 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1964 DMA_TO_DEVICE);
1965 ib_dma_sync_single_for_device(ia->ri_id->device,
1966 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1967 DMA_TO_DEVICE);
1968
1969 if (DECR_CQCOUNT(ep) > 0)
1970 send_wr.send_flags = 0;
1971 else { /* Provider must take a send completion every now and then */
1972 INIT_CQCOUNT(ep);
1973 send_wr.send_flags = IB_SEND_SIGNALED;
1974 }
1975
1976 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1977 if (rc)
1978 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1979 rc);
1980out:
1981 return rc;
1982}
1983
1984/*
1985 * (Re)post a receive buffer.
1986 */
1987int
1988rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1989 struct rpcrdma_ep *ep,
1990 struct rpcrdma_rep *rep)
1991{
1992 struct ib_recv_wr recv_wr, *recv_wr_fail;
1993 int rc;
1994
1995 recv_wr.next = NULL;
1996 recv_wr.wr_id = (u64) (unsigned long) rep;
1997 recv_wr.sg_list = &rep->rr_iov;
1998 recv_wr.num_sge = 1;
1999
2000 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2001 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2002
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002003 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2004
2005 if (rc)
2006 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2007 rc);
2008 return rc;
2009}
Chuck Lever43e95982014-07-29 17:23:34 -04002010
2011/* Physical mapping means one Read/Write list entry per-page.
2012 * All list entries must fit within an inline buffer
2013 *
2014 * NB: The server must return a Write list for NFS READ,
2015 * which has the same constraint. Factor in the inline
2016 * rsize as well.
2017 */
2018static size_t
2019rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2020{
2021 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2022 unsigned int inline_size, pages;
2023
2024 inline_size = min_t(unsigned int,
2025 cdata->inline_wsize, cdata->inline_rsize);
2026 inline_size -= RPCRDMA_HDRLEN_MIN;
2027 pages = inline_size / sizeof(struct rpcrdma_segment);
2028 return pages << PAGE_SHIFT;
2029}
2030
2031static size_t
2032rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2033{
2034 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2035}
2036
2037size_t
2038rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2039{
2040 size_t result;
2041
2042 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2043 case RPCRDMA_ALLPHYSICAL:
2044 result = rpcrdma_physical_max_payload(r_xprt);
2045 break;
2046 default:
2047 result = rpcrdma_mr_max_payload(r_xprt);
2048 }
2049 return result;
2050}