blob: f124f04e2e4e5ef01b3730c3a7f706e6bdbcb065 [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
Chuck Lever9f9d8022014-07-29 17:24:45 -040064static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
65
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040066/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95
96 if (func)
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 }
103 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
104}
105
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400108static void
109rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
110{
111 struct rpcrdma_ep *ep = context;
112
113 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
114 __func__, event->event, event->device->name, context);
115 if (ep->rep_connected == 1) {
116 ep->rep_connected = -EIO;
117 ep->rep_func(ep);
118 wake_up_all(&ep->rep_connect_wait);
119 }
120}
121
122static void
123rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
124{
125 struct rpcrdma_ep *ep = context;
126
127 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
128 __func__, event->event, event->device->name, context);
129 if (ep->rep_connected == 1) {
130 ep->rep_connected = -EIO;
131 ep->rep_func(ep);
132 wake_up_all(&ep->rep_connect_wait);
133 }
134}
135
Chuck Leverfc664482014-05-28 10:33:25 -0400136static void
137rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400138{
Chuck Leverfc664482014-05-28 10:33:25 -0400139 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400140
Chuck Leverfc664482014-05-28 10:33:25 -0400141 dprintk("RPC: %s: frmr %p status %X opcode %d\n",
142 __func__, frmr, wc->status, wc->opcode);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400143
Chuck Leverfc664482014-05-28 10:33:25 -0400144 if (wc->wr_id == 0ULL)
145 return;
Chuck Leverdab7e3b2014-07-29 17:25:20 -0400146 if (wc->status != IB_WC_SUCCESS)
Chuck Lever9f9d8022014-07-29 17:24:45 -0400147 frmr->r.frmr.fr_state = FRMR_IS_STALE;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400148}
149
Chuck Leverfc664482014-05-28 10:33:25 -0400150static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400151rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400152{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400153 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400154 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400155
Chuck Lever8301a2c2014-05-28 10:33:51 -0400156 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400157 do {
158 wcs = ep->rep_send_wcs;
159
160 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
161 if (rc <= 0)
162 return rc;
163
164 count = rc;
165 while (count-- > 0)
166 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400167 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400168 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400169}
170
171/*
Chuck Leverfc664482014-05-28 10:33:25 -0400172 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400173 *
Chuck Leverfc664482014-05-28 10:33:25 -0400174 * Send events are typically suppressed and thus do not result
175 * in an upcall. Occasionally one is signaled, however. This
176 * prevents the provider's completion queue from wrapping and
177 * losing a completion.
178 */
179static void
180rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
181{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400182 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400183 int rc;
184
Chuck Lever1c00dd02014-05-28 10:33:42 -0400185 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400186 if (rc) {
187 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
188 __func__, rc);
189 return;
190 }
191
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400192 rc = ib_req_notify_cq(cq,
193 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
194 if (rc == 0)
195 return;
196 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400197 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
198 __func__, rc);
199 return;
200 }
201
Chuck Lever1c00dd02014-05-28 10:33:42 -0400202 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400203}
204
205static void
Chuck Leverbb961932014-07-29 17:25:46 -0400206rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
Chuck Leverfc664482014-05-28 10:33:25 -0400207{
208 struct rpcrdma_rep *rep =
209 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
210
211 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
212 __func__, rep, wc->status, wc->opcode, wc->byte_len);
213
214 if (wc->status != IB_WC_SUCCESS) {
215 rep->rr_len = ~0U;
216 goto out_schedule;
217 }
218 if (wc->opcode != IB_WC_RECV)
219 return;
220
221 rep->rr_len = wc->byte_len;
222 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
223 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
224
225 if (rep->rr_len >= 16) {
226 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
227 unsigned int credits = ntohl(p->rm_credit);
228
229 if (credits == 0)
230 credits = 1; /* don't deadlock */
231 else if (credits > rep->rr_buffer->rb_max_requests)
232 credits = rep->rr_buffer->rb_max_requests;
233 atomic_set(&rep->rr_buffer->rb_credits, credits);
234 }
235
236out_schedule:
Chuck Leverbb961932014-07-29 17:25:46 -0400237 list_add_tail(&rep->rr_list, sched_list);
Chuck Leverfc664482014-05-28 10:33:25 -0400238}
239
240static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400241rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400242{
Chuck Leverbb961932014-07-29 17:25:46 -0400243 struct list_head sched_list;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400244 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400245 int budget, count, rc;
Chuck Leverbb961932014-07-29 17:25:46 -0400246 unsigned long flags;
Chuck Leverfc664482014-05-28 10:33:25 -0400247
Chuck Leverbb961932014-07-29 17:25:46 -0400248 INIT_LIST_HEAD(&sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400249 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400250 do {
251 wcs = ep->rep_recv_wcs;
252
253 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
254 if (rc <= 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400255 goto out_schedule;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400256
257 count = rc;
258 while (count-- > 0)
Chuck Leverbb961932014-07-29 17:25:46 -0400259 rpcrdma_recvcq_process_wc(wcs++, &sched_list);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400260 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Leverbb961932014-07-29 17:25:46 -0400261 rc = 0;
262
263out_schedule:
264 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
265 list_splice_tail(&sched_list, &rpcrdma_tasklets_g);
266 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
267 tasklet_schedule(&rpcrdma_tasklet_g);
268 return rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400269}
270
271/*
272 * Handle receive completions.
273 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400274 * It is reentrant but processes single events in order to maintain
275 * ordering of receives to keep server credits.
276 *
277 * It is the responsibility of the scheduled tasklet to return
278 * recv buffers to the pool. NOTE: this affects synchronization of
279 * connection shutdown. That is, the structures required for
280 * the completion of the reply handler must remain intact until
281 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400282 */
283static void
Chuck Leverfc664482014-05-28 10:33:25 -0400284rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400285{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400286 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400287 int rc;
288
Chuck Lever1c00dd02014-05-28 10:33:42 -0400289 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400290 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400291 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400292 __func__, rc);
293 return;
294 }
295
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400296 rc = ib_req_notify_cq(cq,
297 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
298 if (rc == 0)
299 return;
300 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400301 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
302 __func__, rc);
303 return;
304 }
305
Chuck Lever1c00dd02014-05-28 10:33:42 -0400306 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400307}
308
Chuck Levera7bc2112014-07-29 17:23:52 -0400309static void
310rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
311{
312 rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
313 rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
314}
315
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400316#ifdef RPC_DEBUG
317static const char * const conn[] = {
318 "address resolved",
319 "address error",
320 "route resolved",
321 "route error",
322 "connect request",
323 "connect response",
324 "connect error",
325 "unreachable",
326 "rejected",
327 "established",
328 "disconnected",
329 "device removal"
330};
331#endif
332
333static int
334rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
335{
336 struct rpcrdma_xprt *xprt = id->context;
337 struct rpcrdma_ia *ia = &xprt->rx_ia;
338 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800339#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400340 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800341#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400342 struct ib_qp_attr attr;
343 struct ib_qp_init_attr iattr;
344 int connstate = 0;
345
346 switch (event->event) {
347 case RDMA_CM_EVENT_ADDR_RESOLVED:
348 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400349 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400350 complete(&ia->ri_done);
351 break;
352 case RDMA_CM_EVENT_ADDR_ERROR:
353 ia->ri_async_rc = -EHOSTUNREACH;
354 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
355 __func__, ep);
356 complete(&ia->ri_done);
357 break;
358 case RDMA_CM_EVENT_ROUTE_ERROR:
359 ia->ri_async_rc = -ENETUNREACH;
360 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
361 __func__, ep);
362 complete(&ia->ri_done);
363 break;
364 case RDMA_CM_EVENT_ESTABLISHED:
365 connstate = 1;
366 ib_query_qp(ia->ri_id->qp, &attr,
367 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
368 &iattr);
369 dprintk("RPC: %s: %d responder resources"
370 " (%d initiator)\n",
371 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
372 goto connected;
373 case RDMA_CM_EVENT_CONNECT_ERROR:
374 connstate = -ENOTCONN;
375 goto connected;
376 case RDMA_CM_EVENT_UNREACHABLE:
377 connstate = -ENETDOWN;
378 goto connected;
379 case RDMA_CM_EVENT_REJECTED:
380 connstate = -ECONNREFUSED;
381 goto connected;
382 case RDMA_CM_EVENT_DISCONNECTED:
383 connstate = -ECONNABORTED;
384 goto connected;
385 case RDMA_CM_EVENT_DEVICE_REMOVAL:
386 connstate = -ENODEV;
387connected:
Harvey Harrison21454aa2008-10-31 00:54:56 -0700388 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400389 __func__,
390 (event->event <= 11) ? conn[event->event] :
391 "unknown connection error",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700392 &addr->sin_addr.s_addr,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400393 ntohs(addr->sin_port),
394 ep, event->event);
395 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
396 dprintk("RPC: %s: %sconnected\n",
397 __func__, connstate > 0 ? "" : "dis");
398 ep->rep_connected = connstate;
399 ep->rep_func(ep);
400 wake_up_all(&ep->rep_connect_wait);
401 break;
402 default:
Tom Talpey1a954052008-10-09 15:01:31 -0400403 dprintk("RPC: %s: unexpected CM event %d\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400404 __func__, event->event);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400405 break;
406 }
407
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400408#ifdef RPC_DEBUG
409 if (connstate == 1) {
410 int ird = attr.max_dest_rd_atomic;
411 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700412 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400413 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700414 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400415 ntohs(addr->sin_port),
416 ia->ri_id->device->name,
417 ia->ri_memreg_strategy,
418 xprt->rx_buf.rb_max_requests,
419 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
420 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700421 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
422 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400423 ntohs(addr->sin_port),
424 connstate);
425 }
426#endif
427
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400428 return 0;
429}
430
431static struct rdma_cm_id *
432rpcrdma_create_id(struct rpcrdma_xprt *xprt,
433 struct rpcrdma_ia *ia, struct sockaddr *addr)
434{
435 struct rdma_cm_id *id;
436 int rc;
437
Tom Talpey1a954052008-10-09 15:01:31 -0400438 init_completion(&ia->ri_done);
439
Sean Heftyb26f9b92010-04-01 17:08:41 +0000440 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400441 if (IS_ERR(id)) {
442 rc = PTR_ERR(id);
443 dprintk("RPC: %s: rdma_create_id() failed %i\n",
444 __func__, rc);
445 return id;
446 }
447
Tom Talpey5675add2008-10-09 15:01:41 -0400448 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400449 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
450 if (rc) {
451 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
452 __func__, rc);
453 goto out;
454 }
Tom Talpey5675add2008-10-09 15:01:41 -0400455 wait_for_completion_interruptible_timeout(&ia->ri_done,
456 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400457 rc = ia->ri_async_rc;
458 if (rc)
459 goto out;
460
Tom Talpey5675add2008-10-09 15:01:41 -0400461 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400462 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
463 if (rc) {
464 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
465 __func__, rc);
466 goto out;
467 }
Tom Talpey5675add2008-10-09 15:01:41 -0400468 wait_for_completion_interruptible_timeout(&ia->ri_done,
469 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400470 rc = ia->ri_async_rc;
471 if (rc)
472 goto out;
473
474 return id;
475
476out:
477 rdma_destroy_id(id);
478 return ERR_PTR(rc);
479}
480
481/*
482 * Drain any cq, prior to teardown.
483 */
484static void
485rpcrdma_clean_cq(struct ib_cq *cq)
486{
487 struct ib_wc wc;
488 int count = 0;
489
490 while (1 == ib_poll_cq(cq, 1, &wc))
491 ++count;
492
493 if (count)
494 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
495 __func__, count, wc.opcode);
496}
497
498/*
499 * Exported functions.
500 */
501
502/*
503 * Open and initialize an Interface Adapter.
504 * o initializes fields of struct rpcrdma_ia, including
505 * interface and provider attributes and protection zone.
506 */
507int
508rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
509{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400510 int rc, mem_priv;
511 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400512 struct rpcrdma_ia *ia = &xprt->rx_ia;
513
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400514 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
515 if (IS_ERR(ia->ri_id)) {
516 rc = PTR_ERR(ia->ri_id);
517 goto out1;
518 }
519
520 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
521 if (IS_ERR(ia->ri_pd)) {
522 rc = PTR_ERR(ia->ri_pd);
523 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
524 __func__, rc);
525 goto out2;
526 }
527
528 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400529 * Query the device to determine if the requested memory
530 * registration strategy is supported. If it isn't, set the
531 * strategy to a globally supported model.
532 */
533 rc = ib_query_device(ia->ri_id->device, &devattr);
534 if (rc) {
535 dprintk("RPC: %s: ib_query_device failed %d\n",
536 __func__, rc);
537 goto out2;
538 }
539
540 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
541 ia->ri_have_dma_lkey = 1;
542 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
543 }
544
Chuck Leverf10eafd2014-05-28 10:32:51 -0400545 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400546 /* Requires both frmr reg and local dma lkey */
547 if ((devattr.device_cap_flags &
548 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
549 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400550 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400551 "not supported by HCA\n", __func__);
552 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400553 } else {
554 /* Mind the ia limit on FRMR page list depth */
555 ia->ri_max_frmr_depth = min_t(unsigned int,
556 RPCRDMA_MAX_DATA_SEGS,
557 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400558 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400559 }
560 if (memreg == RPCRDMA_MTHCAFMR) {
561 if (!ia->ri_id->device->alloc_fmr) {
562 dprintk("RPC: %s: MTHCAFMR registration "
563 "not supported by HCA\n", __func__);
564#if RPCRDMA_PERSISTENT_REGISTRATION
565 memreg = RPCRDMA_ALLPHYSICAL;
566#else
Chuck Levercdd9ade2014-05-28 10:33:00 -0400567 rc = -ENOMEM;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400568 goto out2;
569#endif
570 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400571 }
572
573 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400574 * Optionally obtain an underlying physical identity mapping in
575 * order to do a memory window-based bind. This base registration
576 * is protected from remote access - that is enabled only by binding
577 * for the specific bytes targeted during each RPC operation, and
578 * revoked after the corresponding completion similar to a storage
579 * adapter.
580 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400581 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400582 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400583 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400584#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400585 case RPCRDMA_ALLPHYSICAL:
586 mem_priv = IB_ACCESS_LOCAL_WRITE |
587 IB_ACCESS_REMOTE_WRITE |
588 IB_ACCESS_REMOTE_READ;
589 goto register_setup;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400590#endif
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400591 case RPCRDMA_MTHCAFMR:
592 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400593 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400594 mem_priv = IB_ACCESS_LOCAL_WRITE;
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400595#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400596 register_setup:
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400597#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400598 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
599 if (IS_ERR(ia->ri_bind_mem)) {
600 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400601 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400602 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400603 rc = -ENOMEM;
604 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400605 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400606 break;
607 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400608 printk(KERN_ERR "RPC: Unsupported memory "
609 "registration mode: %d\n", memreg);
610 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400611 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400612 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400613 dprintk("RPC: %s: memory registration strategy is %d\n",
614 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400615
616 /* Else will do memory reg/dereg for each chunk */
617 ia->ri_memreg_strategy = memreg;
618
Chuck Lever73806c82014-07-29 17:23:25 -0400619 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400620 return 0;
621out2:
622 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400623 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400624out1:
625 return rc;
626}
627
628/*
629 * Clean up/close an IA.
630 * o if event handles and PD have been initialized, free them.
631 * o close the IA
632 */
633void
634rpcrdma_ia_close(struct rpcrdma_ia *ia)
635{
636 int rc;
637
638 dprintk("RPC: %s: entering\n", __func__);
639 if (ia->ri_bind_mem != NULL) {
640 rc = ib_dereg_mr(ia->ri_bind_mem);
641 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
642 __func__, rc);
643 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400644 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
645 if (ia->ri_id->qp)
646 rdma_destroy_qp(ia->ri_id);
647 rdma_destroy_id(ia->ri_id);
648 ia->ri_id = NULL;
649 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400650 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
651 rc = ib_dealloc_pd(ia->ri_pd);
652 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
653 __func__, rc);
654 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400655}
656
657/*
658 * Create unconnected endpoint.
659 */
660int
661rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
662 struct rpcrdma_create_data_internal *cdata)
663{
664 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400665 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400666 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400667
668 rc = ib_query_device(ia->ri_id->device, &devattr);
669 if (rc) {
670 dprintk("RPC: %s: ib_query_device failed %d\n",
671 __func__, rc);
672 return rc;
673 }
674
675 /* check provider's send/recv wr limits */
676 if (cdata->max_requests > devattr.max_qp_wr)
677 cdata->max_requests = devattr.max_qp_wr;
678
679 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
680 ep->rep_attr.qp_context = ep;
681 /* send_cq and recv_cq initialized below */
682 ep->rep_attr.srq = NULL;
683 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
684 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400685 case RPCRDMA_FRMR: {
686 int depth = 7;
687
Tom Tucker15cdc6442010-08-11 12:47:24 -0400688 /* Add room for frmr register and invalidate WRs.
689 * 1. FRMR reg WR for head
690 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400691 * 3. N FRMR reg WRs for pagelist
692 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400693 * 5. FRMR reg WR for tail
694 * 6. FRMR invalidate WR for tail
695 * 7. The RDMA_SEND WR
696 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400697
698 /* Calculate N if the device max FRMR depth is smaller than
699 * RPCRDMA_MAX_DATA_SEGS.
700 */
701 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
702 int delta = RPCRDMA_MAX_DATA_SEGS -
703 ia->ri_max_frmr_depth;
704
705 do {
706 depth += 2; /* FRMR reg + invalidate */
707 delta -= ia->ri_max_frmr_depth;
708 } while (delta > 0);
709
710 }
711 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400712 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400713 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400714 if (!cdata->max_requests)
715 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400716 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
717 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400718 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400719 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400720 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400721 default:
722 break;
723 }
724 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
725 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
726 ep->rep_attr.cap.max_recv_sge = 1;
727 ep->rep_attr.cap.max_inline_data = 0;
728 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
729 ep->rep_attr.qp_type = IB_QPT_RC;
730 ep->rep_attr.port_num = ~0;
731
732 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
733 "iovs: send %d recv %d\n",
734 __func__,
735 ep->rep_attr.cap.max_send_wr,
736 ep->rep_attr.cap.max_recv_wr,
737 ep->rep_attr.cap.max_send_sge,
738 ep->rep_attr.cap.max_recv_sge);
739
740 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400741 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400742 if (ep->rep_cqinit <= 2)
743 ep->rep_cqinit = 0;
744 INIT_CQCOUNT(ep);
745 ep->rep_ia = ia;
746 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400747 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400748
Chuck Leverfc664482014-05-28 10:33:25 -0400749 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400750 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400751 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400752 if (IS_ERR(sendcq)) {
753 rc = PTR_ERR(sendcq);
754 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400755 __func__, rc);
756 goto out1;
757 }
758
Chuck Leverfc664482014-05-28 10:33:25 -0400759 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400760 if (rc) {
761 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
762 __func__, rc);
763 goto out2;
764 }
765
Chuck Leverfc664482014-05-28 10:33:25 -0400766 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400767 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400768 ep->rep_attr.cap.max_recv_wr + 1, 0);
769 if (IS_ERR(recvcq)) {
770 rc = PTR_ERR(recvcq);
771 dprintk("RPC: %s: failed to create recv CQ: %i\n",
772 __func__, rc);
773 goto out2;
774 }
775
776 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
777 if (rc) {
778 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
779 __func__, rc);
780 ib_destroy_cq(recvcq);
781 goto out2;
782 }
783
784 ep->rep_attr.send_cq = sendcq;
785 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400786
787 /* Initialize cma parameters */
788
789 /* RPC/RDMA does not use private data */
790 ep->rep_remote_cma.private_data = NULL;
791 ep->rep_remote_cma.private_data_len = 0;
792
793 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400794 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400795 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400796 ep->rep_remote_cma.responder_resources = 32;
797 else
798 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400799
800 ep->rep_remote_cma.retry_count = 7;
801 ep->rep_remote_cma.flow_control = 0;
802 ep->rep_remote_cma.rnr_retry_count = 0;
803
804 return 0;
805
806out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400807 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400808 if (err)
809 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
810 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400811out1:
812 return rc;
813}
814
815/*
816 * rpcrdma_ep_destroy
817 *
818 * Disconnect and destroy endpoint. After this, the only
819 * valid operations on the ep are to free it (if dynamically
820 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400821 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400822void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400823rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
824{
825 int rc;
826
827 dprintk("RPC: %s: entering, connected is %d\n",
828 __func__, ep->rep_connected);
829
Chuck Lever254f91e2014-05-28 10:32:17 -0400830 cancel_delayed_work_sync(&ep->rep_connect_worker);
831
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400832 if (ia->ri_id->qp) {
833 rc = rpcrdma_ep_disconnect(ep, ia);
834 if (rc)
835 dprintk("RPC: %s: rpcrdma_ep_disconnect"
836 " returned %i\n", __func__, rc);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400837 rdma_destroy_qp(ia->ri_id);
838 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400839 }
840
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400841 /* padding - could be done in rpcrdma_buffer_destroy... */
842 if (ep->rep_pad_mr) {
843 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
844 ep->rep_pad_mr = NULL;
845 }
846
Chuck Leverfc664482014-05-28 10:33:25 -0400847 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
848 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
849 if (rc)
850 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
851 __func__, rc);
852
853 rpcrdma_clean_cq(ep->rep_attr.send_cq);
854 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400855 if (rc)
856 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
857 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400858}
859
860/*
861 * Connect unconnected endpoint.
862 */
863int
864rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
865{
Chuck Lever73806c82014-07-29 17:23:25 -0400866 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400867 int rc = 0;
868 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400869
Tom Talpeyc0555512008-10-10 11:32:45 -0400870 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400871 struct rpcrdma_xprt *xprt;
872retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400873 dprintk("RPC: %s: reconnecting...\n", __func__);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400874 rc = rpcrdma_ep_disconnect(ep, ia);
875 if (rc && rc != -ENOTCONN)
876 dprintk("RPC: %s: rpcrdma_ep_disconnect"
877 " status %i\n", __func__, rc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400878 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400879
Chuck Lever9f9d8022014-07-29 17:24:45 -0400880 if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
881 rpcrdma_reset_frmrs(ia);
882
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400883 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
884 id = rpcrdma_create_id(xprt, ia,
885 (struct sockaddr *)&xprt->rx_data.addr);
886 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400887 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400888 goto out;
889 }
890 /* TEMP TEMP TEMP - fail if new device:
891 * Deregister/remarshal *all* requests!
892 * Close and recreate adapter, pd, etc!
893 * Re-determine all attributes still sane!
894 * More stuff I haven't thought of!
895 * Rrrgh!
896 */
897 if (ia->ri_id->device != id->device) {
898 printk("RPC: %s: can't reconnect on "
899 "different device!\n", __func__);
900 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400901 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400902 goto out;
903 }
904 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400905 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
906 if (rc) {
907 dprintk("RPC: %s: rdma_create_qp failed %i\n",
908 __func__, rc);
909 rdma_destroy_id(id);
910 rc = -ENETUNREACH;
911 goto out;
912 }
Chuck Lever73806c82014-07-29 17:23:25 -0400913
914 write_lock(&ia->ri_qplock);
915 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400916 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400917 write_unlock(&ia->ri_qplock);
918
919 rdma_destroy_qp(old);
920 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400921 } else {
922 dprintk("RPC: %s: connecting...\n", __func__);
923 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
924 if (rc) {
925 dprintk("RPC: %s: rdma_create_qp failed %i\n",
926 __func__, rc);
927 /* do not update ep->rep_connected */
928 return -ENETUNREACH;
929 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400930 }
931
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400932 ep->rep_connected = 0;
933
934 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
935 if (rc) {
936 dprintk("RPC: %s: rdma_connect() failed with %i\n",
937 __func__, rc);
938 goto out;
939 }
940
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400941 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
942
943 /*
944 * Check state. A non-peer reject indicates no listener
945 * (ECONNREFUSED), which may be a transient state. All
946 * others indicate a transport condition which has already
947 * undergone a best-effort.
948 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800949 if (ep->rep_connected == -ECONNREFUSED &&
950 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400951 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
952 goto retry;
953 }
954 if (ep->rep_connected <= 0) {
955 /* Sometimes, the only way to reliably connect to remote
956 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400957 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
958 (ep->rep_remote_cma.responder_resources == 0 ||
959 ep->rep_remote_cma.initiator_depth !=
960 ep->rep_remote_cma.responder_resources)) {
961 if (ep->rep_remote_cma.responder_resources == 0)
962 ep->rep_remote_cma.responder_resources = 1;
963 ep->rep_remote_cma.initiator_depth =
964 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400965 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400966 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400967 rc = ep->rep_connected;
968 } else {
969 dprintk("RPC: %s: connected\n", __func__);
970 }
971
972out:
973 if (rc)
974 ep->rep_connected = rc;
975 return rc;
976}
977
978/*
979 * rpcrdma_ep_disconnect
980 *
981 * This is separate from destroy to facilitate the ability
982 * to reconnect without recreating the endpoint.
983 *
984 * This call is not reentrant, and must not be made in parallel
985 * on the same endpoint.
986 */
987int
988rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
989{
990 int rc;
991
Chuck Levera7bc2112014-07-29 17:23:52 -0400992 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400993 rc = rdma_disconnect(ia->ri_id);
994 if (!rc) {
995 /* returns without wait if not connected */
996 wait_event_interruptible(ep->rep_connect_wait,
997 ep->rep_connected != 1);
998 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
999 (ep->rep_connected == 1) ? "still " : "dis");
1000 } else {
1001 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1002 ep->rep_connected = rc;
1003 }
1004 return rc;
1005}
1006
Chuck Lever2e845222014-07-29 17:25:38 -04001007static int
1008rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1009{
1010 int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
1011 struct ib_fmr_attr fmr_attr = {
1012 .max_pages = RPCRDMA_MAX_DATA_SEGS,
1013 .max_maps = 1,
1014 .page_shift = PAGE_SHIFT
1015 };
1016 struct rpcrdma_mw *r;
1017 int i, rc;
1018
1019 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1020 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
1021
1022 while (i--) {
1023 r = kzalloc(sizeof(*r), GFP_KERNEL);
1024 if (r == NULL)
1025 return -ENOMEM;
1026
1027 r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
1028 if (IS_ERR(r->r.fmr)) {
1029 rc = PTR_ERR(r->r.fmr);
1030 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1031 __func__, rc);
1032 goto out_free;
1033 }
1034
1035 list_add(&r->mw_list, &buf->rb_mws);
1036 list_add(&r->mw_all, &buf->rb_all);
1037 }
1038 return 0;
1039
1040out_free:
1041 kfree(r);
1042 return rc;
1043}
1044
1045static int
1046rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
1047{
1048 struct rpcrdma_frmr *f;
1049 struct rpcrdma_mw *r;
1050 int i, rc;
1051
1052 i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
1053 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i);
1054
1055 while (i--) {
1056 r = kzalloc(sizeof(*r), GFP_KERNEL);
1057 if (r == NULL)
1058 return -ENOMEM;
1059 f = &r->r.frmr;
1060
1061 f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1062 ia->ri_max_frmr_depth);
1063 if (IS_ERR(f->fr_mr)) {
1064 rc = PTR_ERR(f->fr_mr);
1065 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1066 "failed %i\n", __func__, rc);
1067 goto out_free;
1068 }
1069
1070 f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
1071 ia->ri_max_frmr_depth);
1072 if (IS_ERR(f->fr_pgl)) {
1073 rc = PTR_ERR(f->fr_pgl);
1074 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1075 "failed %i\n", __func__, rc);
1076
1077 ib_dereg_mr(f->fr_mr);
1078 goto out_free;
1079 }
1080
1081 list_add(&r->mw_list, &buf->rb_mws);
1082 list_add(&r->mw_all, &buf->rb_all);
1083 }
1084
1085 return 0;
1086
1087out_free:
1088 kfree(r);
1089 return rc;
1090}
1091
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001092int
1093rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1094 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1095{
1096 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001097 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001098 int i, rc;
1099
1100 buf->rb_max_requests = cdata->max_requests;
1101 spin_lock_init(&buf->rb_lock);
1102 atomic_set(&buf->rb_credits, 1);
1103
1104 /* Need to allocate:
1105 * 1. arrays for send and recv pointers
1106 * 2. arrays of struct rpcrdma_req to fill in pointers
1107 * 3. array of struct rpcrdma_rep for replies
1108 * 4. padding, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001109 * Send/recv buffers in req/rep need to be registered
1110 */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001111 len = buf->rb_max_requests *
1112 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1113 len += cdata->padding;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001114
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001115 p = kzalloc(len, GFP_KERNEL);
1116 if (p == NULL) {
1117 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1118 __func__, len);
1119 rc = -ENOMEM;
1120 goto out;
1121 }
1122 buf->rb_pool = p; /* for freeing it later */
1123
1124 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1125 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1126 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1127 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1128
1129 /*
1130 * Register the zeroed pad buffer, if any.
1131 */
1132 if (cdata->padding) {
1133 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1134 &ep->rep_pad_mr, &ep->rep_pad);
1135 if (rc)
1136 goto out;
1137 }
1138 p += cdata->padding;
1139
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001140 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001141 INIT_LIST_HEAD(&buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001142 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001143 case RPCRDMA_FRMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001144 rc = rpcrdma_init_frmrs(ia, buf);
1145 if (rc)
1146 goto out;
Tom Talpey3197d3092008-10-09 15:00:20 -04001147 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001148 case RPCRDMA_MTHCAFMR:
Chuck Lever2e845222014-07-29 17:25:38 -04001149 rc = rpcrdma_init_fmrs(ia, buf);
1150 if (rc)
1151 goto out;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001152 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001153 default:
1154 break;
1155 }
1156
1157 /*
1158 * Allocate/init the request/reply buffers. Doing this
1159 * using kmalloc for now -- one for each buf.
1160 */
Chuck Lever65866f82014-05-28 10:33:59 -04001161 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1162 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1163 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1164 __func__, wlen, rlen);
1165
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001166 for (i = 0; i < buf->rb_max_requests; i++) {
1167 struct rpcrdma_req *req;
1168 struct rpcrdma_rep *rep;
1169
Chuck Lever65866f82014-05-28 10:33:59 -04001170 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001171 if (req == NULL) {
1172 dprintk("RPC: %s: request buffer %d alloc"
1173 " failed\n", __func__, i);
1174 rc = -ENOMEM;
1175 goto out;
1176 }
1177 memset(req, 0, sizeof(struct rpcrdma_req));
1178 buf->rb_send_bufs[i] = req;
1179 buf->rb_send_bufs[i]->rl_buffer = buf;
1180
1181 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001182 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001183 &buf->rb_send_bufs[i]->rl_handle,
1184 &buf->rb_send_bufs[i]->rl_iov);
1185 if (rc)
1186 goto out;
1187
Chuck Lever65866f82014-05-28 10:33:59 -04001188 buf->rb_send_bufs[i]->rl_size = wlen -
1189 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001190
Chuck Lever65866f82014-05-28 10:33:59 -04001191 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001192 if (rep == NULL) {
1193 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1194 __func__, i);
1195 rc = -ENOMEM;
1196 goto out;
1197 }
1198 memset(rep, 0, sizeof(struct rpcrdma_rep));
1199 buf->rb_recv_bufs[i] = rep;
1200 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001201
1202 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001203 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001204 &buf->rb_recv_bufs[i]->rr_handle,
1205 &buf->rb_recv_bufs[i]->rr_iov);
1206 if (rc)
1207 goto out;
1208
1209 }
1210 dprintk("RPC: %s: max_requests %d\n",
1211 __func__, buf->rb_max_requests);
1212 /* done */
1213 return 0;
1214out:
1215 rpcrdma_buffer_destroy(buf);
1216 return rc;
1217}
1218
Chuck Lever2e845222014-07-29 17:25:38 -04001219static void
1220rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
1221{
1222 struct rpcrdma_mw *r;
1223 int rc;
1224
1225 while (!list_empty(&buf->rb_all)) {
1226 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1227 list_del(&r->mw_all);
1228 list_del(&r->mw_list);
1229
1230 rc = ib_dealloc_fmr(r->r.fmr);
1231 if (rc)
1232 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1233 __func__, rc);
1234
1235 kfree(r);
1236 }
1237}
1238
1239static void
1240rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
1241{
1242 struct rpcrdma_mw *r;
1243 int rc;
1244
1245 while (!list_empty(&buf->rb_all)) {
1246 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
1247 list_del(&r->mw_all);
1248 list_del(&r->mw_list);
1249
1250 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1251 if (rc)
1252 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1253 __func__, rc);
1254 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1255
1256 kfree(r);
1257 }
1258}
1259
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001260void
1261rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1262{
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001263 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Chuck Lever2e845222014-07-29 17:25:38 -04001264 int i;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001265
1266 /* clean up in reverse order from create
1267 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001268 * 2. send mr memory (mr free, then kfree)
Chuck Lever2e845222014-07-29 17:25:38 -04001269 * 3. MWs
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001270 */
1271 dprintk("RPC: %s: entering\n", __func__);
1272
1273 for (i = 0; i < buf->rb_max_requests; i++) {
1274 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1275 rpcrdma_deregister_internal(ia,
1276 buf->rb_recv_bufs[i]->rr_handle,
1277 &buf->rb_recv_bufs[i]->rr_iov);
1278 kfree(buf->rb_recv_bufs[i]);
1279 }
1280 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001281 rpcrdma_deregister_internal(ia,
1282 buf->rb_send_bufs[i]->rl_handle,
1283 &buf->rb_send_bufs[i]->rl_iov);
1284 kfree(buf->rb_send_bufs[i]);
1285 }
1286 }
1287
Chuck Lever2e845222014-07-29 17:25:38 -04001288 switch (ia->ri_memreg_strategy) {
1289 case RPCRDMA_FRMR:
1290 rpcrdma_destroy_frmrs(buf);
1291 break;
1292 case RPCRDMA_MTHCAFMR:
1293 rpcrdma_destroy_fmrs(buf);
1294 break;
1295 default:
1296 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001297 }
1298
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001299 kfree(buf->rb_pool);
1300}
1301
Chuck Lever9f9d8022014-07-29 17:24:45 -04001302/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1303 * an unusable state. Find FRMRs in this state and dereg / reg
1304 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1305 * also torn down.
1306 *
1307 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1308 *
1309 * This is invoked only in the transport connect worker in order
1310 * to serialize with rpcrdma_register_frmr_external().
1311 */
1312static void
1313rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1314{
1315 struct rpcrdma_xprt *r_xprt =
1316 container_of(ia, struct rpcrdma_xprt, rx_ia);
1317 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1318 struct list_head *pos;
1319 struct rpcrdma_mw *r;
1320 int rc;
1321
1322 list_for_each(pos, &buf->rb_all) {
1323 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1324
1325 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1326 continue;
1327
1328 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1329 if (rc)
1330 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1331 __func__, rc);
1332 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1333
1334 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1335 ia->ri_max_frmr_depth);
1336 if (IS_ERR(r->r.frmr.fr_mr)) {
1337 rc = PTR_ERR(r->r.frmr.fr_mr);
1338 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1339 " failed %i\n", __func__, rc);
1340 continue;
1341 }
1342 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1343 ia->ri_id->device,
1344 ia->ri_max_frmr_depth);
1345 if (IS_ERR(r->r.frmr.fr_pgl)) {
1346 rc = PTR_ERR(r->r.frmr.fr_pgl);
1347 dprintk("RPC: %s: "
1348 "ib_alloc_fast_reg_page_list "
1349 "failed %i\n", __func__, rc);
1350
1351 ib_dereg_mr(r->r.frmr.fr_mr);
1352 continue;
1353 }
1354 r->r.frmr.fr_state = FRMR_IS_INVALID;
1355 }
1356}
1357
Chuck Leverc2922c02014-07-29 17:24:36 -04001358/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1359 * some req segments uninitialized.
1360 */
1361static void
1362rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1363{
1364 if (*mw) {
1365 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1366 *mw = NULL;
1367 }
1368}
1369
1370/* Cycle mw's back in reverse order, and "spin" them.
1371 * This delays and scrambles reuse as much as possible.
1372 */
1373static void
1374rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1375{
1376 struct rpcrdma_mr_seg *seg = req->rl_segments;
1377 struct rpcrdma_mr_seg *seg1 = seg;
1378 int i;
1379
1380 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1381 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
1382 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
1383}
1384
1385static void
1386rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1387{
1388 buf->rb_send_bufs[--buf->rb_send_index] = req;
1389 req->rl_niovs = 0;
1390 if (req->rl_reply) {
1391 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1392 req->rl_reply->rr_func = NULL;
1393 req->rl_reply = NULL;
1394 }
1395}
1396
Chuck Leverddb6beb2014-07-29 17:24:54 -04001397/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1398 * Redo only the ib_post_send().
1399 */
1400static void
1401rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1402{
1403 struct rpcrdma_xprt *r_xprt =
1404 container_of(ia, struct rpcrdma_xprt, rx_ia);
1405 struct ib_send_wr invalidate_wr, *bad_wr;
1406 int rc;
1407
1408 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1409
1410 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001411 r->r.frmr.fr_state = FRMR_IS_INVALID;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001412
1413 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1414 invalidate_wr.wr_id = (unsigned long)(void *)r;
1415 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001416 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1417 DECR_CQCOUNT(&r_xprt->rx_ep);
1418
1419 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1420 __func__, r, r->r.frmr.fr_mr->rkey);
1421
1422 read_lock(&ia->ri_qplock);
1423 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1424 read_unlock(&ia->ri_qplock);
1425 if (rc) {
1426 /* Force rpcrdma_buffer_get() to retry */
1427 r->r.frmr.fr_state = FRMR_IS_STALE;
1428 dprintk("RPC: %s: ib_post_send failed, %i\n",
1429 __func__, rc);
1430 }
1431}
1432
1433static void
1434rpcrdma_retry_flushed_linv(struct list_head *stale,
1435 struct rpcrdma_buffer *buf)
1436{
1437 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1438 struct list_head *pos;
1439 struct rpcrdma_mw *r;
1440 unsigned long flags;
1441
1442 list_for_each(pos, stale) {
1443 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1444 rpcrdma_retry_local_inv(r, ia);
1445 }
1446
1447 spin_lock_irqsave(&buf->rb_lock, flags);
1448 list_splice_tail(stale, &buf->rb_mws);
1449 spin_unlock_irqrestore(&buf->rb_lock, flags);
1450}
1451
Chuck Leverc2922c02014-07-29 17:24:36 -04001452static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001453rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1454 struct list_head *stale)
1455{
1456 struct rpcrdma_mw *r;
1457 int i;
1458
1459 i = RPCRDMA_MAX_SEGS - 1;
1460 while (!list_empty(&buf->rb_mws)) {
1461 r = list_entry(buf->rb_mws.next,
1462 struct rpcrdma_mw, mw_list);
1463 list_del(&r->mw_list);
1464 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1465 list_add(&r->mw_list, stale);
1466 continue;
1467 }
1468 req->rl_segments[i].mr_chunk.rl_mw = r;
1469 if (unlikely(i-- == 0))
1470 return req; /* Success */
1471 }
1472
1473 /* Not enough entries on rb_mws for this req */
1474 rpcrdma_buffer_put_sendbuf(req, buf);
1475 rpcrdma_buffer_put_mrs(req, buf);
1476 return NULL;
1477}
1478
1479static struct rpcrdma_req *
1480rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001481{
1482 struct rpcrdma_mw *r;
1483 int i;
1484
1485 i = RPCRDMA_MAX_SEGS - 1;
1486 while (!list_empty(&buf->rb_mws)) {
1487 r = list_entry(buf->rb_mws.next,
1488 struct rpcrdma_mw, mw_list);
1489 list_del(&r->mw_list);
1490 req->rl_segments[i].mr_chunk.rl_mw = r;
1491 if (unlikely(i-- == 0))
1492 return req; /* Success */
1493 }
1494
1495 /* Not enough entries on rb_mws for this req */
1496 rpcrdma_buffer_put_sendbuf(req, buf);
1497 rpcrdma_buffer_put_mrs(req, buf);
1498 return NULL;
1499}
1500
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001501/*
1502 * Get a set of request/reply buffers.
1503 *
1504 * Reply buffer (if needed) is attached to send buffer upon return.
1505 * Rule:
1506 * rb_send_index and rb_recv_index MUST always be pointing to the
1507 * *next* available buffer (non-NULL). They are incremented after
1508 * removing buffers, and decremented *before* returning them.
1509 */
1510struct rpcrdma_req *
1511rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1512{
Chuck Leverc2922c02014-07-29 17:24:36 -04001513 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001514 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001515 struct rpcrdma_req *req;
1516 unsigned long flags;
1517
1518 spin_lock_irqsave(&buffers->rb_lock, flags);
1519 if (buffers->rb_send_index == buffers->rb_max_requests) {
1520 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1521 dprintk("RPC: %s: out of request buffers\n", __func__);
1522 return ((struct rpcrdma_req *)NULL);
1523 }
1524
1525 req = buffers->rb_send_bufs[buffers->rb_send_index];
1526 if (buffers->rb_send_index < buffers->rb_recv_index) {
1527 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1528 __func__,
1529 buffers->rb_recv_index - buffers->rb_send_index);
1530 req->rl_reply = NULL;
1531 } else {
1532 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1533 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1534 }
1535 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001536
1537 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001538 switch (ia->ri_memreg_strategy) {
1539 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001540 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1541 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001542 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001543 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001544 break;
1545 default:
1546 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001547 }
1548 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001549 if (!list_empty(&stale))
1550 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001551 return req;
1552}
1553
1554/*
1555 * Put request/reply buffers back into pool.
1556 * Pre-decrement counter/array index.
1557 */
1558void
1559rpcrdma_buffer_put(struct rpcrdma_req *req)
1560{
1561 struct rpcrdma_buffer *buffers = req->rl_buffer;
1562 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001563 unsigned long flags;
1564
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001565 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001566 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001567 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001568 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001569 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001570 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001571 break;
1572 default:
1573 break;
1574 }
1575 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1576}
1577
1578/*
1579 * Recover reply buffers from pool.
1580 * This happens when recovering from error conditions.
1581 * Post-increment counter/array index.
1582 */
1583void
1584rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1585{
1586 struct rpcrdma_buffer *buffers = req->rl_buffer;
1587 unsigned long flags;
1588
1589 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1590 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1591 spin_lock_irqsave(&buffers->rb_lock, flags);
1592 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1593 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1594 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1595 }
1596 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1597}
1598
1599/*
1600 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001601 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001602 */
1603void
1604rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1605{
1606 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1607 unsigned long flags;
1608
1609 rep->rr_func = NULL;
1610 spin_lock_irqsave(&buffers->rb_lock, flags);
1611 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1612 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1613}
1614
1615/*
1616 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1617 */
1618
1619int
1620rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1621 struct ib_mr **mrp, struct ib_sge *iov)
1622{
1623 struct ib_phys_buf ipb;
1624 struct ib_mr *mr;
1625 int rc;
1626
1627 /*
1628 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1629 */
1630 iov->addr = ib_dma_map_single(ia->ri_id->device,
1631 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001632 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1633 return -ENOMEM;
1634
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001635 iov->length = len;
1636
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001637 if (ia->ri_have_dma_lkey) {
1638 *mrp = NULL;
1639 iov->lkey = ia->ri_dma_lkey;
1640 return 0;
1641 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001642 *mrp = NULL;
1643 iov->lkey = ia->ri_bind_mem->lkey;
1644 return 0;
1645 }
1646
1647 ipb.addr = iov->addr;
1648 ipb.size = iov->length;
1649 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1650 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1651
1652 dprintk("RPC: %s: phys convert: 0x%llx "
1653 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001654 __func__, (unsigned long long)ipb.addr,
1655 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001656
1657 if (IS_ERR(mr)) {
1658 *mrp = NULL;
1659 rc = PTR_ERR(mr);
1660 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1661 } else {
1662 *mrp = mr;
1663 iov->lkey = mr->lkey;
1664 rc = 0;
1665 }
1666
1667 return rc;
1668}
1669
1670int
1671rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1672 struct ib_mr *mr, struct ib_sge *iov)
1673{
1674 int rc;
1675
1676 ib_dma_unmap_single(ia->ri_id->device,
1677 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1678
1679 if (NULL == mr)
1680 return 0;
1681
1682 rc = ib_dereg_mr(mr);
1683 if (rc)
1684 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1685 return rc;
1686}
1687
1688/*
1689 * Wrappers for chunk registration, shared by read/write chunk code.
1690 */
1691
1692static void
1693rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1694{
1695 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1696 seg->mr_dmalen = seg->mr_len;
1697 if (seg->mr_page)
1698 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1699 seg->mr_page, offset_in_page(seg->mr_offset),
1700 seg->mr_dmalen, seg->mr_dir);
1701 else
1702 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1703 seg->mr_offset,
1704 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001705 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1706 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1707 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001708 (unsigned long long)seg->mr_dma,
1709 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001710 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001711}
1712
1713static void
1714rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1715{
1716 if (seg->mr_page)
1717 ib_dma_unmap_page(ia->ri_id->device,
1718 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1719 else
1720 ib_dma_unmap_single(ia->ri_id->device,
1721 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1722}
1723
Tom Talpey8d4ba032008-10-09 14:59:49 -04001724static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001725rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1726 int *nsegs, int writing, struct rpcrdma_ia *ia,
1727 struct rpcrdma_xprt *r_xprt)
1728{
1729 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001730 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
1731 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1732 struct ib_mr *mr = frmr->fr_mr;
Chuck Leverf590e872014-07-29 17:25:29 -04001733 struct ib_send_wr fastreg_wr, *bad_wr;
Tom Talpey3197d3092008-10-09 15:00:20 -04001734 u8 key;
1735 int len, pageoff;
1736 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001737 int seg_len;
1738 u64 pa;
1739 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001740
1741 pageoff = offset_in_page(seg1->mr_offset);
1742 seg1->mr_offset -= pageoff; /* start of page */
1743 seg1->mr_len += pageoff;
1744 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001745 if (*nsegs > ia->ri_max_frmr_depth)
1746 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001747 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001748 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001749 pa = seg->mr_dma;
1750 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001751 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001752 pa += PAGE_SIZE;
1753 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001754 len += seg->mr_len;
1755 ++seg;
1756 ++i;
1757 /* Check for holes */
1758 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1759 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1760 break;
1761 }
1762 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001763 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001764
Chuck Lever05055722014-07-29 17:25:12 -04001765 frmr->fr_state = FRMR_IS_VALID;
1766
Chuck Leverf590e872014-07-29 17:25:29 -04001767 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
1768 fastreg_wr.wr_id = (unsigned long)(void *)mw;
1769 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
1770 fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
1771 fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
1772 fastreg_wr.wr.fast_reg.page_list_len = page_no;
1773 fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
1774 fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
1775 if (fastreg_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001776 rc = -EIO;
1777 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001778 }
1779
1780 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001781 key = (u8)(mr->rkey & 0x000000FF);
1782 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001783
Chuck Leverf590e872014-07-29 17:25:29 -04001784 fastreg_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001785 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1786 IB_ACCESS_REMOTE_READ);
Chuck Leverf590e872014-07-29 17:25:29 -04001787 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001788 DECR_CQCOUNT(&r_xprt->rx_ep);
1789
Chuck Leverf590e872014-07-29 17:25:29 -04001790 rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001791 if (rc) {
1792 dprintk("RPC: %s: failed ib_post_send for register,"
1793 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001794 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001795 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001796 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001797 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001798 seg1->mr_base = seg1->mr_dma + pageoff;
1799 seg1->mr_nsegs = i;
1800 seg1->mr_len = len;
1801 }
1802 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001803 return 0;
1804out_err:
Chuck Lever05055722014-07-29 17:25:12 -04001805 frmr->fr_state = FRMR_IS_INVALID;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001806 while (i--)
1807 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001808 return rc;
1809}
1810
1811static int
1812rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1813 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1814{
1815 struct rpcrdma_mr_seg *seg1 = seg;
1816 struct ib_send_wr invalidate_wr, *bad_wr;
1817 int rc;
1818
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001819 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
1820
Tom Talpey3197d3092008-10-09 15:00:20 -04001821 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001822 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001823 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Talpey3197d3092008-10-09 15:00:20 -04001824 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1825 DECR_CQCOUNT(&r_xprt->rx_ep);
1826
Chuck Lever73806c82014-07-29 17:23:25 -04001827 read_lock(&ia->ri_qplock);
1828 while (seg1->mr_nsegs--)
1829 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001830 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001831 read_unlock(&ia->ri_qplock);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001832 if (rc) {
1833 /* Force rpcrdma_buffer_get() to retry */
1834 seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
Tom Talpey3197d3092008-10-09 15:00:20 -04001835 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1836 " status %i\n", __func__, rc);
Chuck Leverdab7e3b2014-07-29 17:25:20 -04001837 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001838 return rc;
1839}
1840
1841static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001842rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1843 int *nsegs, int writing, struct rpcrdma_ia *ia)
1844{
1845 struct rpcrdma_mr_seg *seg1 = seg;
1846 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1847 int len, pageoff, i, rc;
1848
1849 pageoff = offset_in_page(seg1->mr_offset);
1850 seg1->mr_offset -= pageoff; /* start of page */
1851 seg1->mr_len += pageoff;
1852 len = -pageoff;
1853 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1854 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1855 for (i = 0; i < *nsegs;) {
1856 rpcrdma_map_one(ia, seg, writing);
1857 physaddrs[i] = seg->mr_dma;
1858 len += seg->mr_len;
1859 ++seg;
1860 ++i;
1861 /* Check for holes */
1862 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1863 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1864 break;
1865 }
1866 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1867 physaddrs, i, seg1->mr_dma);
1868 if (rc) {
1869 dprintk("RPC: %s: failed ib_map_phys_fmr "
1870 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1871 len, (unsigned long long)seg1->mr_dma,
1872 pageoff, i, rc);
1873 while (i--)
1874 rpcrdma_unmap_one(ia, --seg);
1875 } else {
1876 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1877 seg1->mr_base = seg1->mr_dma + pageoff;
1878 seg1->mr_nsegs = i;
1879 seg1->mr_len = len;
1880 }
1881 *nsegs = i;
1882 return rc;
1883}
1884
1885static int
1886rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1887 struct rpcrdma_ia *ia)
1888{
1889 struct rpcrdma_mr_seg *seg1 = seg;
1890 LIST_HEAD(l);
1891 int rc;
1892
1893 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1894 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001895 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001896 while (seg1->mr_nsegs--)
1897 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04001898 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001899 if (rc)
1900 dprintk("RPC: %s: failed ib_unmap_fmr,"
1901 " status %i\n", __func__, rc);
1902 return rc;
1903}
1904
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001905int
1906rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1907 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1908{
1909 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001910 int rc = 0;
1911
1912 switch (ia->ri_memreg_strategy) {
1913
1914#if RPCRDMA_PERSISTENT_REGISTRATION
1915 case RPCRDMA_ALLPHYSICAL:
1916 rpcrdma_map_one(ia, seg, writing);
1917 seg->mr_rkey = ia->ri_bind_mem->rkey;
1918 seg->mr_base = seg->mr_dma;
1919 seg->mr_nsegs = 1;
1920 nsegs = 1;
1921 break;
1922#endif
1923
Tom Talpey3197d3092008-10-09 15:00:20 -04001924 /* Registration using frmr registration */
1925 case RPCRDMA_FRMR:
1926 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1927 break;
1928
Tom Talpey8d4ba032008-10-09 14:59:49 -04001929 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001930 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001931 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001932 break;
1933
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001934 default:
Chuck Lever0ac531c2014-05-28 10:32:43 -04001935 return -1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001936 }
1937 if (rc)
1938 return -1;
1939
1940 return nsegs;
1941}
1942
1943int
1944rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04001945 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001946{
1947 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001948 int nsegs = seg->mr_nsegs, rc;
1949
1950 switch (ia->ri_memreg_strategy) {
1951
1952#if RPCRDMA_PERSISTENT_REGISTRATION
1953 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04001954 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001955 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04001956 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001957 break;
1958#endif
1959
Tom Talpey3197d3092008-10-09 15:00:20 -04001960 case RPCRDMA_FRMR:
1961 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1962 break;
1963
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001964 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001965 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001966 break;
1967
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001968 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001969 break;
1970 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001971 return nsegs;
1972}
1973
1974/*
1975 * Prepost any receive buffer, then post send.
1976 *
1977 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1978 */
1979int
1980rpcrdma_ep_post(struct rpcrdma_ia *ia,
1981 struct rpcrdma_ep *ep,
1982 struct rpcrdma_req *req)
1983{
1984 struct ib_send_wr send_wr, *send_wr_fail;
1985 struct rpcrdma_rep *rep = req->rl_reply;
1986 int rc;
1987
1988 if (rep) {
1989 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1990 if (rc)
1991 goto out;
1992 req->rl_reply = NULL;
1993 }
1994
1995 send_wr.next = NULL;
1996 send_wr.wr_id = 0ULL; /* no send cookie */
1997 send_wr.sg_list = req->rl_send_iov;
1998 send_wr.num_sge = req->rl_niovs;
1999 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002000 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
2001 ib_dma_sync_single_for_device(ia->ri_id->device,
2002 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
2003 DMA_TO_DEVICE);
2004 ib_dma_sync_single_for_device(ia->ri_id->device,
2005 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
2006 DMA_TO_DEVICE);
2007 ib_dma_sync_single_for_device(ia->ri_id->device,
2008 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
2009 DMA_TO_DEVICE);
2010
2011 if (DECR_CQCOUNT(ep) > 0)
2012 send_wr.send_flags = 0;
2013 else { /* Provider must take a send completion every now and then */
2014 INIT_CQCOUNT(ep);
2015 send_wr.send_flags = IB_SEND_SIGNALED;
2016 }
2017
2018 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
2019 if (rc)
2020 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
2021 rc);
2022out:
2023 return rc;
2024}
2025
2026/*
2027 * (Re)post a receive buffer.
2028 */
2029int
2030rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2031 struct rpcrdma_ep *ep,
2032 struct rpcrdma_rep *rep)
2033{
2034 struct ib_recv_wr recv_wr, *recv_wr_fail;
2035 int rc;
2036
2037 recv_wr.next = NULL;
2038 recv_wr.wr_id = (u64) (unsigned long) rep;
2039 recv_wr.sg_list = &rep->rr_iov;
2040 recv_wr.num_sge = 1;
2041
2042 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2043 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2044
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002045 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2046
2047 if (rc)
2048 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2049 rc);
2050 return rc;
2051}
Chuck Lever43e95982014-07-29 17:23:34 -04002052
2053/* Physical mapping means one Read/Write list entry per-page.
2054 * All list entries must fit within an inline buffer
2055 *
2056 * NB: The server must return a Write list for NFS READ,
2057 * which has the same constraint. Factor in the inline
2058 * rsize as well.
2059 */
2060static size_t
2061rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2062{
2063 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2064 unsigned int inline_size, pages;
2065
2066 inline_size = min_t(unsigned int,
2067 cdata->inline_wsize, cdata->inline_rsize);
2068 inline_size -= RPCRDMA_HDRLEN_MIN;
2069 pages = inline_size / sizeof(struct rpcrdma_segment);
2070 return pages << PAGE_SHIFT;
2071}
2072
2073static size_t
2074rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2075{
2076 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2077}
2078
2079size_t
2080rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2081{
2082 size_t result;
2083
2084 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2085 case RPCRDMA_ALLPHYSICAL:
2086 result = rpcrdma_physical_max_payload(r_xprt);
2087 break;
2088 default:
2089 result = rpcrdma_mr_max_payload(r_xprt);
2090 }
2091 return result;
2092}