blob: 176dafc6e6d7c7327f56203b0d6d255941a59709 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
64/*
65 * internal functions
66 */
67
68/*
69 * handle replies in tasklet context, using a single, global list
70 * rdma tasklet function -- just turn around and call the func
71 * for all replies on the list
72 */
73
74static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
75static LIST_HEAD(rpcrdma_tasklets_g);
76
77static void
78rpcrdma_run_tasklet(unsigned long data)
79{
80 struct rpcrdma_rep *rep;
81 void (*func)(struct rpcrdma_rep *);
82 unsigned long flags;
83
84 data = data;
85 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
86 while (!list_empty(&rpcrdma_tasklets_g)) {
87 rep = list_entry(rpcrdma_tasklets_g.next,
88 struct rpcrdma_rep, rr_list);
89 list_del(&rep->rr_list);
90 func = rep->rr_func;
91 rep->rr_func = NULL;
92 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
93
94 if (func)
95 func(rep);
96 else
97 rpcrdma_recv_buffer_put(rep);
98
99 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
100 }
101 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
102}
103
104static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
105
106static inline void
107rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
112 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
113 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
114 tasklet_schedule(&rpcrdma_tasklet_g);
115}
116
117static void
118rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
119{
120 struct rpcrdma_ep *ep = context;
121
122 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
123 __func__, event->event, event->device->name, context);
124 if (ep->rep_connected == 1) {
125 ep->rep_connected = -EIO;
126 ep->rep_func(ep);
127 wake_up_all(&ep->rep_connect_wait);
128 }
129}
130
131static void
132rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
133{
134 struct rpcrdma_ep *ep = context;
135
136 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
137 __func__, event->event, event->device->name, context);
138 if (ep->rep_connected == 1) {
139 ep->rep_connected = -EIO;
140 ep->rep_func(ep);
141 wake_up_all(&ep->rep_connect_wait);
142 }
143}
144
Chuck Leverfc664482014-05-28 10:33:25 -0400145static void
146rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400147{
Chuck Leverfc664482014-05-28 10:33:25 -0400148 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400149
Chuck Leverfc664482014-05-28 10:33:25 -0400150 dprintk("RPC: %s: frmr %p status %X opcode %d\n",
151 __func__, frmr, wc->status, wc->opcode);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400152
Chuck Leverfc664482014-05-28 10:33:25 -0400153 if (wc->wr_id == 0ULL)
154 return;
155 if (wc->status != IB_WC_SUCCESS)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400156 return;
157
Chuck Leverfc664482014-05-28 10:33:25 -0400158 if (wc->opcode == IB_WC_FAST_REG_MR)
Tom Tucker5c635e02011-02-09 19:45:34 +0000159 frmr->r.frmr.state = FRMR_IS_VALID;
Chuck Leverfc664482014-05-28 10:33:25 -0400160 else if (wc->opcode == IB_WC_LOCAL_INV)
Tom Tucker5c635e02011-02-09 19:45:34 +0000161 frmr->r.frmr.state = FRMR_IS_INVALID;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400162}
163
Chuck Leverfc664482014-05-28 10:33:25 -0400164static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400165rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400166{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400167 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400168 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400169
Chuck Lever8301a2c2014-05-28 10:33:51 -0400170 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400171 do {
172 wcs = ep->rep_send_wcs;
173
174 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
175 if (rc <= 0)
176 return rc;
177
178 count = rc;
179 while (count-- > 0)
180 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400181 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400182 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400183}
184
185/*
Chuck Leverfc664482014-05-28 10:33:25 -0400186 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400187 *
Chuck Leverfc664482014-05-28 10:33:25 -0400188 * Send events are typically suppressed and thus do not result
189 * in an upcall. Occasionally one is signaled, however. This
190 * prevents the provider's completion queue from wrapping and
191 * losing a completion.
192 */
193static void
194rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
195{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400196 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400197 int rc;
198
Chuck Lever1c00dd02014-05-28 10:33:42 -0400199 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400200 if (rc) {
201 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
202 __func__, rc);
203 return;
204 }
205
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400206 rc = ib_req_notify_cq(cq,
207 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
208 if (rc == 0)
209 return;
210 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400211 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
212 __func__, rc);
213 return;
214 }
215
Chuck Lever1c00dd02014-05-28 10:33:42 -0400216 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400217}
218
219static void
220rpcrdma_recvcq_process_wc(struct ib_wc *wc)
221{
222 struct rpcrdma_rep *rep =
223 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
224
225 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
226 __func__, rep, wc->status, wc->opcode, wc->byte_len);
227
228 if (wc->status != IB_WC_SUCCESS) {
229 rep->rr_len = ~0U;
230 goto out_schedule;
231 }
232 if (wc->opcode != IB_WC_RECV)
233 return;
234
235 rep->rr_len = wc->byte_len;
236 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
237 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
238
239 if (rep->rr_len >= 16) {
240 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
241 unsigned int credits = ntohl(p->rm_credit);
242
243 if (credits == 0)
244 credits = 1; /* don't deadlock */
245 else if (credits > rep->rr_buffer->rb_max_requests)
246 credits = rep->rr_buffer->rb_max_requests;
247 atomic_set(&rep->rr_buffer->rb_credits, credits);
248 }
249
250out_schedule:
251 rpcrdma_schedule_tasklet(rep);
252}
253
254static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400255rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400256{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400257 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400258 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400259
Chuck Lever8301a2c2014-05-28 10:33:51 -0400260 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400261 do {
262 wcs = ep->rep_recv_wcs;
263
264 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
265 if (rc <= 0)
266 return rc;
267
268 count = rc;
269 while (count-- > 0)
270 rpcrdma_recvcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400271 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400272 return 0;
Chuck Leverfc664482014-05-28 10:33:25 -0400273}
274
275/*
276 * Handle receive completions.
277 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400278 * It is reentrant but processes single events in order to maintain
279 * ordering of receives to keep server credits.
280 *
281 * It is the responsibility of the scheduled tasklet to return
282 * recv buffers to the pool. NOTE: this affects synchronization of
283 * connection shutdown. That is, the structures required for
284 * the completion of the reply handler must remain intact until
285 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400286 */
287static void
Chuck Leverfc664482014-05-28 10:33:25 -0400288rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400289{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400290 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400291 int rc;
292
Chuck Lever1c00dd02014-05-28 10:33:42 -0400293 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400294 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400295 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400296 __func__, rc);
297 return;
298 }
299
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400300 rc = ib_req_notify_cq(cq,
301 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
302 if (rc == 0)
303 return;
304 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400305 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
306 __func__, rc);
307 return;
308 }
309
Chuck Lever1c00dd02014-05-28 10:33:42 -0400310 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400311}
312
313#ifdef RPC_DEBUG
314static const char * const conn[] = {
315 "address resolved",
316 "address error",
317 "route resolved",
318 "route error",
319 "connect request",
320 "connect response",
321 "connect error",
322 "unreachable",
323 "rejected",
324 "established",
325 "disconnected",
326 "device removal"
327};
328#endif
329
330static int
331rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
332{
333 struct rpcrdma_xprt *xprt = id->context;
334 struct rpcrdma_ia *ia = &xprt->rx_ia;
335 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800336#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400337 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800338#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400339 struct ib_qp_attr attr;
340 struct ib_qp_init_attr iattr;
341 int connstate = 0;
342
343 switch (event->event) {
344 case RDMA_CM_EVENT_ADDR_RESOLVED:
345 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400346 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400347 complete(&ia->ri_done);
348 break;
349 case RDMA_CM_EVENT_ADDR_ERROR:
350 ia->ri_async_rc = -EHOSTUNREACH;
351 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
352 __func__, ep);
353 complete(&ia->ri_done);
354 break;
355 case RDMA_CM_EVENT_ROUTE_ERROR:
356 ia->ri_async_rc = -ENETUNREACH;
357 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
358 __func__, ep);
359 complete(&ia->ri_done);
360 break;
361 case RDMA_CM_EVENT_ESTABLISHED:
362 connstate = 1;
363 ib_query_qp(ia->ri_id->qp, &attr,
364 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
365 &iattr);
366 dprintk("RPC: %s: %d responder resources"
367 " (%d initiator)\n",
368 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
369 goto connected;
370 case RDMA_CM_EVENT_CONNECT_ERROR:
371 connstate = -ENOTCONN;
372 goto connected;
373 case RDMA_CM_EVENT_UNREACHABLE:
374 connstate = -ENETDOWN;
375 goto connected;
376 case RDMA_CM_EVENT_REJECTED:
377 connstate = -ECONNREFUSED;
378 goto connected;
379 case RDMA_CM_EVENT_DISCONNECTED:
380 connstate = -ECONNABORTED;
381 goto connected;
382 case RDMA_CM_EVENT_DEVICE_REMOVAL:
383 connstate = -ENODEV;
384connected:
Harvey Harrison21454aa2008-10-31 00:54:56 -0700385 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400386 __func__,
387 (event->event <= 11) ? conn[event->event] :
388 "unknown connection error",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700389 &addr->sin_addr.s_addr,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400390 ntohs(addr->sin_port),
391 ep, event->event);
392 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
393 dprintk("RPC: %s: %sconnected\n",
394 __func__, connstate > 0 ? "" : "dis");
395 ep->rep_connected = connstate;
396 ep->rep_func(ep);
397 wake_up_all(&ep->rep_connect_wait);
398 break;
399 default:
Tom Talpey1a954052008-10-09 15:01:31 -0400400 dprintk("RPC: %s: unexpected CM event %d\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400401 __func__, event->event);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400402 break;
403 }
404
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400405#ifdef RPC_DEBUG
406 if (connstate == 1) {
407 int ird = attr.max_dest_rd_atomic;
408 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700409 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400410 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700411 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400412 ntohs(addr->sin_port),
413 ia->ri_id->device->name,
414 ia->ri_memreg_strategy,
415 xprt->rx_buf.rb_max_requests,
416 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
417 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700418 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
419 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400420 ntohs(addr->sin_port),
421 connstate);
422 }
423#endif
424
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400425 return 0;
426}
427
428static struct rdma_cm_id *
429rpcrdma_create_id(struct rpcrdma_xprt *xprt,
430 struct rpcrdma_ia *ia, struct sockaddr *addr)
431{
432 struct rdma_cm_id *id;
433 int rc;
434
Tom Talpey1a954052008-10-09 15:01:31 -0400435 init_completion(&ia->ri_done);
436
Sean Heftyb26f9b92010-04-01 17:08:41 +0000437 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400438 if (IS_ERR(id)) {
439 rc = PTR_ERR(id);
440 dprintk("RPC: %s: rdma_create_id() failed %i\n",
441 __func__, rc);
442 return id;
443 }
444
Tom Talpey5675add2008-10-09 15:01:41 -0400445 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400446 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
447 if (rc) {
448 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
449 __func__, rc);
450 goto out;
451 }
Tom Talpey5675add2008-10-09 15:01:41 -0400452 wait_for_completion_interruptible_timeout(&ia->ri_done,
453 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400454 rc = ia->ri_async_rc;
455 if (rc)
456 goto out;
457
Tom Talpey5675add2008-10-09 15:01:41 -0400458 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400459 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
460 if (rc) {
461 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
462 __func__, rc);
463 goto out;
464 }
Tom Talpey5675add2008-10-09 15:01:41 -0400465 wait_for_completion_interruptible_timeout(&ia->ri_done,
466 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400467 rc = ia->ri_async_rc;
468 if (rc)
469 goto out;
470
471 return id;
472
473out:
474 rdma_destroy_id(id);
475 return ERR_PTR(rc);
476}
477
478/*
479 * Drain any cq, prior to teardown.
480 */
481static void
482rpcrdma_clean_cq(struct ib_cq *cq)
483{
484 struct ib_wc wc;
485 int count = 0;
486
487 while (1 == ib_poll_cq(cq, 1, &wc))
488 ++count;
489
490 if (count)
491 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
492 __func__, count, wc.opcode);
493}
494
495/*
496 * Exported functions.
497 */
498
499/*
500 * Open and initialize an Interface Adapter.
501 * o initializes fields of struct rpcrdma_ia, including
502 * interface and provider attributes and protection zone.
503 */
504int
505rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
506{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400507 int rc, mem_priv;
508 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400509 struct rpcrdma_ia *ia = &xprt->rx_ia;
510
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400511 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
512 if (IS_ERR(ia->ri_id)) {
513 rc = PTR_ERR(ia->ri_id);
514 goto out1;
515 }
516
517 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
518 if (IS_ERR(ia->ri_pd)) {
519 rc = PTR_ERR(ia->ri_pd);
520 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
521 __func__, rc);
522 goto out2;
523 }
524
525 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400526 * Query the device to determine if the requested memory
527 * registration strategy is supported. If it isn't, set the
528 * strategy to a globally supported model.
529 */
530 rc = ib_query_device(ia->ri_id->device, &devattr);
531 if (rc) {
532 dprintk("RPC: %s: ib_query_device failed %d\n",
533 __func__, rc);
534 goto out2;
535 }
536
537 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
538 ia->ri_have_dma_lkey = 1;
539 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
540 }
541
Chuck Leverf10eafd2014-05-28 10:32:51 -0400542 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400543 /* Requires both frmr reg and local dma lkey */
544 if ((devattr.device_cap_flags &
545 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
546 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400547 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400548 "not supported by HCA\n", __func__);
549 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400550 } else {
551 /* Mind the ia limit on FRMR page list depth */
552 ia->ri_max_frmr_depth = min_t(unsigned int,
553 RPCRDMA_MAX_DATA_SEGS,
554 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400555 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400556 }
557 if (memreg == RPCRDMA_MTHCAFMR) {
558 if (!ia->ri_id->device->alloc_fmr) {
559 dprintk("RPC: %s: MTHCAFMR registration "
560 "not supported by HCA\n", __func__);
561#if RPCRDMA_PERSISTENT_REGISTRATION
562 memreg = RPCRDMA_ALLPHYSICAL;
563#else
Chuck Levercdd9ade2014-05-28 10:33:00 -0400564 rc = -ENOMEM;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400565 goto out2;
566#endif
567 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400568 }
569
570 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400571 * Optionally obtain an underlying physical identity mapping in
572 * order to do a memory window-based bind. This base registration
573 * is protected from remote access - that is enabled only by binding
574 * for the specific bytes targeted during each RPC operation, and
575 * revoked after the corresponding completion similar to a storage
576 * adapter.
577 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400578 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400579 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400580 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400581#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400582 case RPCRDMA_ALLPHYSICAL:
583 mem_priv = IB_ACCESS_LOCAL_WRITE |
584 IB_ACCESS_REMOTE_WRITE |
585 IB_ACCESS_REMOTE_READ;
586 goto register_setup;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400587#endif
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400588 case RPCRDMA_MTHCAFMR:
589 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400590 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400591 mem_priv = IB_ACCESS_LOCAL_WRITE;
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400592#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400593 register_setup:
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400594#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400595 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
596 if (IS_ERR(ia->ri_bind_mem)) {
597 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400598 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400599 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400600 rc = -ENOMEM;
601 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400602 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400603 break;
604 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400605 printk(KERN_ERR "RPC: Unsupported memory "
606 "registration mode: %d\n", memreg);
607 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400608 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400609 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400610 dprintk("RPC: %s: memory registration strategy is %d\n",
611 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400612
613 /* Else will do memory reg/dereg for each chunk */
614 ia->ri_memreg_strategy = memreg;
615
616 return 0;
617out2:
618 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400619 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400620out1:
621 return rc;
622}
623
624/*
625 * Clean up/close an IA.
626 * o if event handles and PD have been initialized, free them.
627 * o close the IA
628 */
629void
630rpcrdma_ia_close(struct rpcrdma_ia *ia)
631{
632 int rc;
633
634 dprintk("RPC: %s: entering\n", __func__);
635 if (ia->ri_bind_mem != NULL) {
636 rc = ib_dereg_mr(ia->ri_bind_mem);
637 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
638 __func__, rc);
639 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400640 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
641 if (ia->ri_id->qp)
642 rdma_destroy_qp(ia->ri_id);
643 rdma_destroy_id(ia->ri_id);
644 ia->ri_id = NULL;
645 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400646 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
647 rc = ib_dealloc_pd(ia->ri_pd);
648 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
649 __func__, rc);
650 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400651}
652
653/*
654 * Create unconnected endpoint.
655 */
656int
657rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
658 struct rpcrdma_create_data_internal *cdata)
659{
660 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400661 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400662 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400663
664 rc = ib_query_device(ia->ri_id->device, &devattr);
665 if (rc) {
666 dprintk("RPC: %s: ib_query_device failed %d\n",
667 __func__, rc);
668 return rc;
669 }
670
671 /* check provider's send/recv wr limits */
672 if (cdata->max_requests > devattr.max_qp_wr)
673 cdata->max_requests = devattr.max_qp_wr;
674
675 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
676 ep->rep_attr.qp_context = ep;
677 /* send_cq and recv_cq initialized below */
678 ep->rep_attr.srq = NULL;
679 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
680 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400681 case RPCRDMA_FRMR: {
682 int depth = 7;
683
Tom Tucker15cdc6442010-08-11 12:47:24 -0400684 /* Add room for frmr register and invalidate WRs.
685 * 1. FRMR reg WR for head
686 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400687 * 3. N FRMR reg WRs for pagelist
688 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400689 * 5. FRMR reg WR for tail
690 * 6. FRMR invalidate WR for tail
691 * 7. The RDMA_SEND WR
692 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400693
694 /* Calculate N if the device max FRMR depth is smaller than
695 * RPCRDMA_MAX_DATA_SEGS.
696 */
697 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
698 int delta = RPCRDMA_MAX_DATA_SEGS -
699 ia->ri_max_frmr_depth;
700
701 do {
702 depth += 2; /* FRMR reg + invalidate */
703 delta -= ia->ri_max_frmr_depth;
704 } while (delta > 0);
705
706 }
707 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400708 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400709 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400710 if (!cdata->max_requests)
711 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400712 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
713 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400714 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400715 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400716 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400717 default:
718 break;
719 }
720 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
721 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
722 ep->rep_attr.cap.max_recv_sge = 1;
723 ep->rep_attr.cap.max_inline_data = 0;
724 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
725 ep->rep_attr.qp_type = IB_QPT_RC;
726 ep->rep_attr.port_num = ~0;
727
728 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
729 "iovs: send %d recv %d\n",
730 __func__,
731 ep->rep_attr.cap.max_send_wr,
732 ep->rep_attr.cap.max_recv_wr,
733 ep->rep_attr.cap.max_send_sge,
734 ep->rep_attr.cap.max_recv_sge);
735
736 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400737 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400738 if (ep->rep_cqinit <= 2)
739 ep->rep_cqinit = 0;
740 INIT_CQCOUNT(ep);
741 ep->rep_ia = ia;
742 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400743 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400744
Chuck Leverfc664482014-05-28 10:33:25 -0400745 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400746 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400747 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400748 if (IS_ERR(sendcq)) {
749 rc = PTR_ERR(sendcq);
750 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400751 __func__, rc);
752 goto out1;
753 }
754
Chuck Leverfc664482014-05-28 10:33:25 -0400755 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400756 if (rc) {
757 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
758 __func__, rc);
759 goto out2;
760 }
761
Chuck Leverfc664482014-05-28 10:33:25 -0400762 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400763 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400764 ep->rep_attr.cap.max_recv_wr + 1, 0);
765 if (IS_ERR(recvcq)) {
766 rc = PTR_ERR(recvcq);
767 dprintk("RPC: %s: failed to create recv CQ: %i\n",
768 __func__, rc);
769 goto out2;
770 }
771
772 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
773 if (rc) {
774 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
775 __func__, rc);
776 ib_destroy_cq(recvcq);
777 goto out2;
778 }
779
780 ep->rep_attr.send_cq = sendcq;
781 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400782
783 /* Initialize cma parameters */
784
785 /* RPC/RDMA does not use private data */
786 ep->rep_remote_cma.private_data = NULL;
787 ep->rep_remote_cma.private_data_len = 0;
788
789 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400790 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400791 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400792 ep->rep_remote_cma.responder_resources = 32;
793 else
794 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400795
796 ep->rep_remote_cma.retry_count = 7;
797 ep->rep_remote_cma.flow_control = 0;
798 ep->rep_remote_cma.rnr_retry_count = 0;
799
800 return 0;
801
802out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400803 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400804 if (err)
805 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
806 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400807out1:
808 return rc;
809}
810
811/*
812 * rpcrdma_ep_destroy
813 *
814 * Disconnect and destroy endpoint. After this, the only
815 * valid operations on the ep are to free it (if dynamically
816 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400817 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400818void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400819rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
820{
821 int rc;
822
823 dprintk("RPC: %s: entering, connected is %d\n",
824 __func__, ep->rep_connected);
825
Chuck Lever254f91e2014-05-28 10:32:17 -0400826 cancel_delayed_work_sync(&ep->rep_connect_worker);
827
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400828 if (ia->ri_id->qp) {
829 rc = rpcrdma_ep_disconnect(ep, ia);
830 if (rc)
831 dprintk("RPC: %s: rpcrdma_ep_disconnect"
832 " returned %i\n", __func__, rc);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400833 rdma_destroy_qp(ia->ri_id);
834 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400835 }
836
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400837 /* padding - could be done in rpcrdma_buffer_destroy... */
838 if (ep->rep_pad_mr) {
839 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
840 ep->rep_pad_mr = NULL;
841 }
842
Chuck Leverfc664482014-05-28 10:33:25 -0400843 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
844 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
845 if (rc)
846 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
847 __func__, rc);
848
849 rpcrdma_clean_cq(ep->rep_attr.send_cq);
850 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400851 if (rc)
852 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
853 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400854}
855
856/*
857 * Connect unconnected endpoint.
858 */
859int
860rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
861{
862 struct rdma_cm_id *id;
863 int rc = 0;
864 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400865
Tom Talpeyc0555512008-10-10 11:32:45 -0400866 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400867 struct rpcrdma_xprt *xprt;
868retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400869 dprintk("RPC: %s: reconnecting...\n", __func__);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400870 rc = rpcrdma_ep_disconnect(ep, ia);
871 if (rc && rc != -ENOTCONN)
872 dprintk("RPC: %s: rpcrdma_ep_disconnect"
873 " status %i\n", __func__, rc);
Chuck Leverfc664482014-05-28 10:33:25 -0400874
875 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
876 rpcrdma_clean_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400877
878 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
879 id = rpcrdma_create_id(xprt, ia,
880 (struct sockaddr *)&xprt->rx_data.addr);
881 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400882 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400883 goto out;
884 }
885 /* TEMP TEMP TEMP - fail if new device:
886 * Deregister/remarshal *all* requests!
887 * Close and recreate adapter, pd, etc!
888 * Re-determine all attributes still sane!
889 * More stuff I haven't thought of!
890 * Rrrgh!
891 */
892 if (ia->ri_id->device != id->device) {
893 printk("RPC: %s: can't reconnect on "
894 "different device!\n", __func__);
895 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400896 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400897 goto out;
898 }
899 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400900 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
901 if (rc) {
902 dprintk("RPC: %s: rdma_create_qp failed %i\n",
903 __func__, rc);
904 rdma_destroy_id(id);
905 rc = -ENETUNREACH;
906 goto out;
907 }
Tom Talpey1a954052008-10-09 15:01:31 -0400908 rdma_destroy_qp(ia->ri_id);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400909 rdma_destroy_id(ia->ri_id);
910 ia->ri_id = id;
Chuck Leverec62f402014-05-28 10:34:07 -0400911 } else {
912 dprintk("RPC: %s: connecting...\n", __func__);
913 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
914 if (rc) {
915 dprintk("RPC: %s: rdma_create_qp failed %i\n",
916 __func__, rc);
917 /* do not update ep->rep_connected */
918 return -ENETUNREACH;
919 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400920 }
921
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400922 ep->rep_connected = 0;
923
924 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
925 if (rc) {
926 dprintk("RPC: %s: rdma_connect() failed with %i\n",
927 __func__, rc);
928 goto out;
929 }
930
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400931 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
932
933 /*
934 * Check state. A non-peer reject indicates no listener
935 * (ECONNREFUSED), which may be a transient state. All
936 * others indicate a transport condition which has already
937 * undergone a best-effort.
938 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800939 if (ep->rep_connected == -ECONNREFUSED &&
940 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400941 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
942 goto retry;
943 }
944 if (ep->rep_connected <= 0) {
945 /* Sometimes, the only way to reliably connect to remote
946 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400947 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
948 (ep->rep_remote_cma.responder_resources == 0 ||
949 ep->rep_remote_cma.initiator_depth !=
950 ep->rep_remote_cma.responder_resources)) {
951 if (ep->rep_remote_cma.responder_resources == 0)
952 ep->rep_remote_cma.responder_resources = 1;
953 ep->rep_remote_cma.initiator_depth =
954 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400955 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400956 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400957 rc = ep->rep_connected;
958 } else {
959 dprintk("RPC: %s: connected\n", __func__);
960 }
961
962out:
963 if (rc)
964 ep->rep_connected = rc;
965 return rc;
966}
967
968/*
969 * rpcrdma_ep_disconnect
970 *
971 * This is separate from destroy to facilitate the ability
972 * to reconnect without recreating the endpoint.
973 *
974 * This call is not reentrant, and must not be made in parallel
975 * on the same endpoint.
976 */
977int
978rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
979{
980 int rc;
981
Chuck Leverfc664482014-05-28 10:33:25 -0400982 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
983 rpcrdma_clean_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400984 rc = rdma_disconnect(ia->ri_id);
985 if (!rc) {
986 /* returns without wait if not connected */
987 wait_event_interruptible(ep->rep_connect_wait,
988 ep->rep_connected != 1);
989 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
990 (ep->rep_connected == 1) ? "still " : "dis");
991 } else {
992 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
993 ep->rep_connected = rc;
994 }
995 return rc;
996}
997
998/*
999 * Initialize buffer memory
1000 */
1001int
1002rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1003 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1004{
1005 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001006 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001007 int i, rc;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001008 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001009
1010 buf->rb_max_requests = cdata->max_requests;
1011 spin_lock_init(&buf->rb_lock);
1012 atomic_set(&buf->rb_credits, 1);
1013
1014 /* Need to allocate:
1015 * 1. arrays for send and recv pointers
1016 * 2. arrays of struct rpcrdma_req to fill in pointers
1017 * 3. array of struct rpcrdma_rep for replies
1018 * 4. padding, if any
Tom Talpey3197d3092008-10-09 15:00:20 -04001019 * 5. mw's, fmr's or frmr's, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001020 * Send/recv buffers in req/rep need to be registered
1021 */
1022
1023 len = buf->rb_max_requests *
1024 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1025 len += cdata->padding;
1026 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001027 case RPCRDMA_FRMR:
1028 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
1029 sizeof(struct rpcrdma_mw);
1030 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001031 case RPCRDMA_MTHCAFMR:
1032 /* TBD we are perhaps overallocating here */
1033 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
1034 sizeof(struct rpcrdma_mw);
1035 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001036 default:
1037 break;
1038 }
1039
1040 /* allocate 1, 4 and 5 in one shot */
1041 p = kzalloc(len, GFP_KERNEL);
1042 if (p == NULL) {
1043 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1044 __func__, len);
1045 rc = -ENOMEM;
1046 goto out;
1047 }
1048 buf->rb_pool = p; /* for freeing it later */
1049
1050 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1051 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1052 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1053 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1054
1055 /*
1056 * Register the zeroed pad buffer, if any.
1057 */
1058 if (cdata->padding) {
1059 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1060 &ep->rep_pad_mr, &ep->rep_pad);
1061 if (rc)
1062 goto out;
1063 }
1064 p += cdata->padding;
1065
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001066 INIT_LIST_HEAD(&buf->rb_mws);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001067 r = (struct rpcrdma_mw *)p;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001068 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001069 case RPCRDMA_FRMR:
1070 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1071 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001072 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001073 if (IS_ERR(r->r.frmr.fr_mr)) {
1074 rc = PTR_ERR(r->r.frmr.fr_mr);
1075 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1076 " failed %i\n", __func__, rc);
1077 goto out;
1078 }
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001079 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1080 ia->ri_id->device,
1081 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001082 if (IS_ERR(r->r.frmr.fr_pgl)) {
1083 rc = PTR_ERR(r->r.frmr.fr_pgl);
1084 dprintk("RPC: %s: "
1085 "ib_alloc_fast_reg_page_list "
1086 "failed %i\n", __func__, rc);
Allen Andrews4034ba02014-05-28 10:32:09 -04001087
1088 ib_dereg_mr(r->r.frmr.fr_mr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001089 goto out;
1090 }
1091 list_add(&r->mw_list, &buf->rb_mws);
1092 ++r;
1093 }
1094 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001095 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001096 /* TBD we are perhaps overallocating here */
1097 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001098 static struct ib_fmr_attr fa =
1099 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001100 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1101 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1102 &fa);
1103 if (IS_ERR(r->r.fmr)) {
1104 rc = PTR_ERR(r->r.fmr);
1105 dprintk("RPC: %s: ib_alloc_fmr"
1106 " failed %i\n", __func__, rc);
1107 goto out;
1108 }
1109 list_add(&r->mw_list, &buf->rb_mws);
1110 ++r;
1111 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001112 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001113 default:
1114 break;
1115 }
1116
1117 /*
1118 * Allocate/init the request/reply buffers. Doing this
1119 * using kmalloc for now -- one for each buf.
1120 */
Chuck Lever65866f82014-05-28 10:33:59 -04001121 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1122 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1123 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1124 __func__, wlen, rlen);
1125
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001126 for (i = 0; i < buf->rb_max_requests; i++) {
1127 struct rpcrdma_req *req;
1128 struct rpcrdma_rep *rep;
1129
Chuck Lever65866f82014-05-28 10:33:59 -04001130 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001131 if (req == NULL) {
1132 dprintk("RPC: %s: request buffer %d alloc"
1133 " failed\n", __func__, i);
1134 rc = -ENOMEM;
1135 goto out;
1136 }
1137 memset(req, 0, sizeof(struct rpcrdma_req));
1138 buf->rb_send_bufs[i] = req;
1139 buf->rb_send_bufs[i]->rl_buffer = buf;
1140
1141 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001142 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001143 &buf->rb_send_bufs[i]->rl_handle,
1144 &buf->rb_send_bufs[i]->rl_iov);
1145 if (rc)
1146 goto out;
1147
Chuck Lever65866f82014-05-28 10:33:59 -04001148 buf->rb_send_bufs[i]->rl_size = wlen -
1149 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001150
Chuck Lever65866f82014-05-28 10:33:59 -04001151 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001152 if (rep == NULL) {
1153 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1154 __func__, i);
1155 rc = -ENOMEM;
1156 goto out;
1157 }
1158 memset(rep, 0, sizeof(struct rpcrdma_rep));
1159 buf->rb_recv_bufs[i] = rep;
1160 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001161
1162 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001163 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001164 &buf->rb_recv_bufs[i]->rr_handle,
1165 &buf->rb_recv_bufs[i]->rr_iov);
1166 if (rc)
1167 goto out;
1168
1169 }
1170 dprintk("RPC: %s: max_requests %d\n",
1171 __func__, buf->rb_max_requests);
1172 /* done */
1173 return 0;
1174out:
1175 rpcrdma_buffer_destroy(buf);
1176 return rc;
1177}
1178
1179/*
1180 * Unregister and destroy buffer memory. Need to deal with
1181 * partial initialization, so it's callable from failed create.
1182 * Must be called before destroying endpoint, as registrations
1183 * reference it.
1184 */
1185void
1186rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1187{
1188 int rc, i;
1189 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001190 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001191
1192 /* clean up in reverse order from create
1193 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001194 * 2. send mr memory (mr free, then kfree)
1195 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1196 * 4. arrays
1197 */
1198 dprintk("RPC: %s: entering\n", __func__);
1199
1200 for (i = 0; i < buf->rb_max_requests; i++) {
1201 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1202 rpcrdma_deregister_internal(ia,
1203 buf->rb_recv_bufs[i]->rr_handle,
1204 &buf->rb_recv_bufs[i]->rr_iov);
1205 kfree(buf->rb_recv_bufs[i]);
1206 }
1207 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001208 rpcrdma_deregister_internal(ia,
1209 buf->rb_send_bufs[i]->rl_handle,
1210 &buf->rb_send_bufs[i]->rl_iov);
1211 kfree(buf->rb_send_bufs[i]);
1212 }
1213 }
1214
Allen Andrews4034ba02014-05-28 10:32:09 -04001215 while (!list_empty(&buf->rb_mws)) {
1216 r = list_entry(buf->rb_mws.next,
1217 struct rpcrdma_mw, mw_list);
1218 list_del(&r->mw_list);
1219 switch (ia->ri_memreg_strategy) {
1220 case RPCRDMA_FRMR:
1221 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1222 if (rc)
1223 dprintk("RPC: %s:"
1224 " ib_dereg_mr"
1225 " failed %i\n",
1226 __func__, rc);
1227 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1228 break;
1229 case RPCRDMA_MTHCAFMR:
1230 rc = ib_dealloc_fmr(r->r.fmr);
1231 if (rc)
1232 dprintk("RPC: %s:"
1233 " ib_dealloc_fmr"
1234 " failed %i\n",
1235 __func__, rc);
1236 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001237 default:
1238 break;
1239 }
1240 }
1241
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001242 kfree(buf->rb_pool);
1243}
1244
1245/*
1246 * Get a set of request/reply buffers.
1247 *
1248 * Reply buffer (if needed) is attached to send buffer upon return.
1249 * Rule:
1250 * rb_send_index and rb_recv_index MUST always be pointing to the
1251 * *next* available buffer (non-NULL). They are incremented after
1252 * removing buffers, and decremented *before* returning them.
1253 */
1254struct rpcrdma_req *
1255rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1256{
1257 struct rpcrdma_req *req;
1258 unsigned long flags;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001259 int i;
1260 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001261
1262 spin_lock_irqsave(&buffers->rb_lock, flags);
1263 if (buffers->rb_send_index == buffers->rb_max_requests) {
1264 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1265 dprintk("RPC: %s: out of request buffers\n", __func__);
1266 return ((struct rpcrdma_req *)NULL);
1267 }
1268
1269 req = buffers->rb_send_bufs[buffers->rb_send_index];
1270 if (buffers->rb_send_index < buffers->rb_recv_index) {
1271 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1272 __func__,
1273 buffers->rb_recv_index - buffers->rb_send_index);
1274 req->rl_reply = NULL;
1275 } else {
1276 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1277 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1278 }
1279 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1280 if (!list_empty(&buffers->rb_mws)) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001281 i = RPCRDMA_MAX_SEGS - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001282 do {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001283 r = list_entry(buffers->rb_mws.next,
1284 struct rpcrdma_mw, mw_list);
1285 list_del(&r->mw_list);
1286 req->rl_segments[i].mr_chunk.rl_mw = r;
1287 } while (--i >= 0);
1288 }
1289 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1290 return req;
1291}
1292
1293/*
1294 * Put request/reply buffers back into pool.
1295 * Pre-decrement counter/array index.
1296 */
1297void
1298rpcrdma_buffer_put(struct rpcrdma_req *req)
1299{
1300 struct rpcrdma_buffer *buffers = req->rl_buffer;
1301 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1302 int i;
1303 unsigned long flags;
1304
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001305 spin_lock_irqsave(&buffers->rb_lock, flags);
1306 buffers->rb_send_bufs[--buffers->rb_send_index] = req;
1307 req->rl_niovs = 0;
1308 if (req->rl_reply) {
1309 buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001310 req->rl_reply->rr_func = NULL;
1311 req->rl_reply = NULL;
1312 }
1313 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001314 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001315 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001316 /*
1317 * Cycle mw's back in reverse order, and "spin" them.
1318 * This delays and scrambles reuse as much as possible.
1319 */
1320 i = 1;
1321 do {
1322 struct rpcrdma_mw **mw;
1323 mw = &req->rl_segments[i].mr_chunk.rl_mw;
1324 list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
1325 *mw = NULL;
1326 } while (++i < RPCRDMA_MAX_SEGS);
1327 list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
1328 &buffers->rb_mws);
1329 req->rl_segments[0].mr_chunk.rl_mw = NULL;
1330 break;
1331 default:
1332 break;
1333 }
1334 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1335}
1336
1337/*
1338 * Recover reply buffers from pool.
1339 * This happens when recovering from error conditions.
1340 * Post-increment counter/array index.
1341 */
1342void
1343rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1344{
1345 struct rpcrdma_buffer *buffers = req->rl_buffer;
1346 unsigned long flags;
1347
1348 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1349 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1350 spin_lock_irqsave(&buffers->rb_lock, flags);
1351 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1352 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1353 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1354 }
1355 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1356}
1357
1358/*
1359 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001360 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001361 */
1362void
1363rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1364{
1365 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1366 unsigned long flags;
1367
1368 rep->rr_func = NULL;
1369 spin_lock_irqsave(&buffers->rb_lock, flags);
1370 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1371 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1372}
1373
1374/*
1375 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1376 */
1377
1378int
1379rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1380 struct ib_mr **mrp, struct ib_sge *iov)
1381{
1382 struct ib_phys_buf ipb;
1383 struct ib_mr *mr;
1384 int rc;
1385
1386 /*
1387 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1388 */
1389 iov->addr = ib_dma_map_single(ia->ri_id->device,
1390 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001391 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1392 return -ENOMEM;
1393
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001394 iov->length = len;
1395
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001396 if (ia->ri_have_dma_lkey) {
1397 *mrp = NULL;
1398 iov->lkey = ia->ri_dma_lkey;
1399 return 0;
1400 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001401 *mrp = NULL;
1402 iov->lkey = ia->ri_bind_mem->lkey;
1403 return 0;
1404 }
1405
1406 ipb.addr = iov->addr;
1407 ipb.size = iov->length;
1408 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1409 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1410
1411 dprintk("RPC: %s: phys convert: 0x%llx "
1412 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001413 __func__, (unsigned long long)ipb.addr,
1414 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001415
1416 if (IS_ERR(mr)) {
1417 *mrp = NULL;
1418 rc = PTR_ERR(mr);
1419 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1420 } else {
1421 *mrp = mr;
1422 iov->lkey = mr->lkey;
1423 rc = 0;
1424 }
1425
1426 return rc;
1427}
1428
1429int
1430rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1431 struct ib_mr *mr, struct ib_sge *iov)
1432{
1433 int rc;
1434
1435 ib_dma_unmap_single(ia->ri_id->device,
1436 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1437
1438 if (NULL == mr)
1439 return 0;
1440
1441 rc = ib_dereg_mr(mr);
1442 if (rc)
1443 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1444 return rc;
1445}
1446
1447/*
1448 * Wrappers for chunk registration, shared by read/write chunk code.
1449 */
1450
1451static void
1452rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1453{
1454 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1455 seg->mr_dmalen = seg->mr_len;
1456 if (seg->mr_page)
1457 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1458 seg->mr_page, offset_in_page(seg->mr_offset),
1459 seg->mr_dmalen, seg->mr_dir);
1460 else
1461 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1462 seg->mr_offset,
1463 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001464 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1465 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1466 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001467 (unsigned long long)seg->mr_dma,
1468 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001469 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001470}
1471
1472static void
1473rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1474{
1475 if (seg->mr_page)
1476 ib_dma_unmap_page(ia->ri_id->device,
1477 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1478 else
1479 ib_dma_unmap_single(ia->ri_id->device,
1480 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1481}
1482
Tom Talpey8d4ba032008-10-09 14:59:49 -04001483static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001484rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1485 int *nsegs, int writing, struct rpcrdma_ia *ia,
1486 struct rpcrdma_xprt *r_xprt)
1487{
1488 struct rpcrdma_mr_seg *seg1 = seg;
Tom Tucker5c635e02011-02-09 19:45:34 +00001489 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1490
Tom Talpey3197d3092008-10-09 15:00:20 -04001491 u8 key;
1492 int len, pageoff;
1493 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001494 int seg_len;
1495 u64 pa;
1496 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001497
1498 pageoff = offset_in_page(seg1->mr_offset);
1499 seg1->mr_offset -= pageoff; /* start of page */
1500 seg1->mr_len += pageoff;
1501 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001502 if (*nsegs > ia->ri_max_frmr_depth)
1503 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001504 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001505 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001506 pa = seg->mr_dma;
1507 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
1508 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
1509 page_list[page_no++] = pa;
1510 pa += PAGE_SIZE;
1511 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001512 len += seg->mr_len;
1513 ++seg;
1514 ++i;
1515 /* Check for holes */
1516 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1517 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1518 break;
1519 }
1520 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1521 __func__, seg1->mr_chunk.rl_mw, i);
1522
Tom Tucker5c635e02011-02-09 19:45:34 +00001523 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1524 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1525 __func__,
1526 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1527 /* Invalidate before using. */
1528 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1529 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1530 invalidate_wr.next = &frmr_wr;
1531 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1532 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1533 invalidate_wr.ex.invalidate_rkey =
1534 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1535 DECR_CQCOUNT(&r_xprt->rx_ep);
1536 post_wr = &invalidate_wr;
1537 } else
1538 post_wr = &frmr_wr;
1539
Tom Talpey3197d3092008-10-09 15:00:20 -04001540 /* Prepare FRMR WR */
1541 memset(&frmr_wr, 0, sizeof frmr_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001542 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001543 frmr_wr.opcode = IB_WR_FAST_REG_MR;
Tom Tucker5c635e02011-02-09 19:45:34 +00001544 frmr_wr.send_flags = IB_SEND_SIGNALED;
Steve Wise7a8b80eb2010-08-11 12:47:08 -04001545 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
Tom Talpey3197d3092008-10-09 15:00:20 -04001546 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
Tom Tucker9b781452012-02-20 13:07:57 -06001547 frmr_wr.wr.fast_reg.page_list_len = page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001548 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
Tom Tucker9b781452012-02-20 13:07:57 -06001549 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
Chuck Leverc977dea2014-05-28 10:35:06 -04001550 if (frmr_wr.wr.fast_reg.length < len) {
1551 while (seg1->mr_nsegs--)
1552 rpcrdma_unmap_one(ia, seg++);
1553 return -EIO;
1554 }
1555
1556 /* Bump the key */
1557 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1558 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1559
Tom Talpey3197d3092008-10-09 15:00:20 -04001560 frmr_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001561 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1562 IB_ACCESS_REMOTE_READ);
Tom Talpey3197d3092008-10-09 15:00:20 -04001563 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1564 DECR_CQCOUNT(&r_xprt->rx_ep);
1565
Tom Tucker5c635e02011-02-09 19:45:34 +00001566 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001567
1568 if (rc) {
1569 dprintk("RPC: %s: failed ib_post_send for register,"
1570 " status %i\n", __func__, rc);
1571 while (i--)
1572 rpcrdma_unmap_one(ia, --seg);
1573 } else {
1574 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1575 seg1->mr_base = seg1->mr_dma + pageoff;
1576 seg1->mr_nsegs = i;
1577 seg1->mr_len = len;
1578 }
1579 *nsegs = i;
1580 return rc;
1581}
1582
1583static int
1584rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1585 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1586{
1587 struct rpcrdma_mr_seg *seg1 = seg;
1588 struct ib_send_wr invalidate_wr, *bad_wr;
1589 int rc;
1590
1591 while (seg1->mr_nsegs--)
1592 rpcrdma_unmap_one(ia, seg++);
1593
1594 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001595 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001596 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Tucker5c635e02011-02-09 19:45:34 +00001597 invalidate_wr.send_flags = IB_SEND_SIGNALED;
Tom Talpey3197d3092008-10-09 15:00:20 -04001598 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1599 DECR_CQCOUNT(&r_xprt->rx_ep);
1600
1601 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1602 if (rc)
1603 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1604 " status %i\n", __func__, rc);
1605 return rc;
1606}
1607
1608static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001609rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1610 int *nsegs, int writing, struct rpcrdma_ia *ia)
1611{
1612 struct rpcrdma_mr_seg *seg1 = seg;
1613 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1614 int len, pageoff, i, rc;
1615
1616 pageoff = offset_in_page(seg1->mr_offset);
1617 seg1->mr_offset -= pageoff; /* start of page */
1618 seg1->mr_len += pageoff;
1619 len = -pageoff;
1620 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1621 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1622 for (i = 0; i < *nsegs;) {
1623 rpcrdma_map_one(ia, seg, writing);
1624 physaddrs[i] = seg->mr_dma;
1625 len += seg->mr_len;
1626 ++seg;
1627 ++i;
1628 /* Check for holes */
1629 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1630 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1631 break;
1632 }
1633 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1634 physaddrs, i, seg1->mr_dma);
1635 if (rc) {
1636 dprintk("RPC: %s: failed ib_map_phys_fmr "
1637 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1638 len, (unsigned long long)seg1->mr_dma,
1639 pageoff, i, rc);
1640 while (i--)
1641 rpcrdma_unmap_one(ia, --seg);
1642 } else {
1643 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1644 seg1->mr_base = seg1->mr_dma + pageoff;
1645 seg1->mr_nsegs = i;
1646 seg1->mr_len = len;
1647 }
1648 *nsegs = i;
1649 return rc;
1650}
1651
1652static int
1653rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1654 struct rpcrdma_ia *ia)
1655{
1656 struct rpcrdma_mr_seg *seg1 = seg;
1657 LIST_HEAD(l);
1658 int rc;
1659
1660 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1661 rc = ib_unmap_fmr(&l);
1662 while (seg1->mr_nsegs--)
1663 rpcrdma_unmap_one(ia, seg++);
1664 if (rc)
1665 dprintk("RPC: %s: failed ib_unmap_fmr,"
1666 " status %i\n", __func__, rc);
1667 return rc;
1668}
1669
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001670int
1671rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1672 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1673{
1674 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001675 int rc = 0;
1676
1677 switch (ia->ri_memreg_strategy) {
1678
1679#if RPCRDMA_PERSISTENT_REGISTRATION
1680 case RPCRDMA_ALLPHYSICAL:
1681 rpcrdma_map_one(ia, seg, writing);
1682 seg->mr_rkey = ia->ri_bind_mem->rkey;
1683 seg->mr_base = seg->mr_dma;
1684 seg->mr_nsegs = 1;
1685 nsegs = 1;
1686 break;
1687#endif
1688
Tom Talpey3197d3092008-10-09 15:00:20 -04001689 /* Registration using frmr registration */
1690 case RPCRDMA_FRMR:
1691 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1692 break;
1693
Tom Talpey8d4ba032008-10-09 14:59:49 -04001694 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001695 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001696 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001697 break;
1698
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001699 default:
Chuck Lever0ac531c2014-05-28 10:32:43 -04001700 return -1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001701 }
1702 if (rc)
1703 return -1;
1704
1705 return nsegs;
1706}
1707
1708int
1709rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04001710 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001711{
1712 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001713 int nsegs = seg->mr_nsegs, rc;
1714
1715 switch (ia->ri_memreg_strategy) {
1716
1717#if RPCRDMA_PERSISTENT_REGISTRATION
1718 case RPCRDMA_ALLPHYSICAL:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001719 rpcrdma_unmap_one(ia, seg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001720 break;
1721#endif
1722
Tom Talpey3197d3092008-10-09 15:00:20 -04001723 case RPCRDMA_FRMR:
1724 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1725 break;
1726
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001727 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001728 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001729 break;
1730
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001731 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001732 break;
1733 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001734 return nsegs;
1735}
1736
1737/*
1738 * Prepost any receive buffer, then post send.
1739 *
1740 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1741 */
1742int
1743rpcrdma_ep_post(struct rpcrdma_ia *ia,
1744 struct rpcrdma_ep *ep,
1745 struct rpcrdma_req *req)
1746{
1747 struct ib_send_wr send_wr, *send_wr_fail;
1748 struct rpcrdma_rep *rep = req->rl_reply;
1749 int rc;
1750
1751 if (rep) {
1752 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1753 if (rc)
1754 goto out;
1755 req->rl_reply = NULL;
1756 }
1757
1758 send_wr.next = NULL;
1759 send_wr.wr_id = 0ULL; /* no send cookie */
1760 send_wr.sg_list = req->rl_send_iov;
1761 send_wr.num_sge = req->rl_niovs;
1762 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001763 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1764 ib_dma_sync_single_for_device(ia->ri_id->device,
1765 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1766 DMA_TO_DEVICE);
1767 ib_dma_sync_single_for_device(ia->ri_id->device,
1768 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1769 DMA_TO_DEVICE);
1770 ib_dma_sync_single_for_device(ia->ri_id->device,
1771 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1772 DMA_TO_DEVICE);
1773
1774 if (DECR_CQCOUNT(ep) > 0)
1775 send_wr.send_flags = 0;
1776 else { /* Provider must take a send completion every now and then */
1777 INIT_CQCOUNT(ep);
1778 send_wr.send_flags = IB_SEND_SIGNALED;
1779 }
1780
1781 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1782 if (rc)
1783 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1784 rc);
1785out:
1786 return rc;
1787}
1788
1789/*
1790 * (Re)post a receive buffer.
1791 */
1792int
1793rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1794 struct rpcrdma_ep *ep,
1795 struct rpcrdma_rep *rep)
1796{
1797 struct ib_recv_wr recv_wr, *recv_wr_fail;
1798 int rc;
1799
1800 recv_wr.next = NULL;
1801 recv_wr.wr_id = (u64) (unsigned long) rep;
1802 recv_wr.sg_list = &rep->rr_iov;
1803 recv_wr.num_sge = 1;
1804
1805 ib_dma_sync_single_for_cpu(ia->ri_id->device,
1806 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
1807
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001808 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1809
1810 if (rc)
1811 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1812 rc);
1813 return rc;
1814}