blob: ca55acf423650e2338c4ce1773620704c12b21df [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040038 */
39
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040040/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
Alexey Dobriyana6b7a402011-06-06 10:43:46 +000050#include <linux/interrupt.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
Chuck Lever65866f82014-05-28 10:33:59 -040052#include <asm/bitops.h>
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040053
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054#include "xprt_rdma.h"
55
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040056/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
Chuck Lever9f9d8022014-07-29 17:24:45 -040064static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
65
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -040066/*
67 * internal functions
68 */
69
70/*
71 * handle replies in tasklet context, using a single, global list
72 * rdma tasklet function -- just turn around and call the func
73 * for all replies on the list
74 */
75
76static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
77static LIST_HEAD(rpcrdma_tasklets_g);
78
79static void
80rpcrdma_run_tasklet(unsigned long data)
81{
82 struct rpcrdma_rep *rep;
83 void (*func)(struct rpcrdma_rep *);
84 unsigned long flags;
85
86 data = data;
87 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
88 while (!list_empty(&rpcrdma_tasklets_g)) {
89 rep = list_entry(rpcrdma_tasklets_g.next,
90 struct rpcrdma_rep, rr_list);
91 list_del(&rep->rr_list);
92 func = rep->rr_func;
93 rep->rr_func = NULL;
94 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
95
96 if (func)
97 func(rep);
98 else
99 rpcrdma_recv_buffer_put(rep);
100
101 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
102 }
103 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
104}
105
106static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
107
108static inline void
109rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
110{
111 unsigned long flags;
112
113 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
114 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
115 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
116 tasklet_schedule(&rpcrdma_tasklet_g);
117}
118
119static void
120rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
121{
122 struct rpcrdma_ep *ep = context;
123
124 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
125 __func__, event->event, event->device->name, context);
126 if (ep->rep_connected == 1) {
127 ep->rep_connected = -EIO;
128 ep->rep_func(ep);
129 wake_up_all(&ep->rep_connect_wait);
130 }
131}
132
133static void
134rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
135{
136 struct rpcrdma_ep *ep = context;
137
138 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
139 __func__, event->event, event->device->name, context);
140 if (ep->rep_connected == 1) {
141 ep->rep_connected = -EIO;
142 ep->rep_func(ep);
143 wake_up_all(&ep->rep_connect_wait);
144 }
145}
146
Chuck Leverfc664482014-05-28 10:33:25 -0400147static void
148rpcrdma_sendcq_process_wc(struct ib_wc *wc)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400149{
Chuck Leverfc664482014-05-28 10:33:25 -0400150 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400151
Chuck Leverfc664482014-05-28 10:33:25 -0400152 dprintk("RPC: %s: frmr %p status %X opcode %d\n",
153 __func__, frmr, wc->status, wc->opcode);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400154
Chuck Leverfc664482014-05-28 10:33:25 -0400155 if (wc->wr_id == 0ULL)
156 return;
Chuck Lever9f9d8022014-07-29 17:24:45 -0400157 if (wc->status != IB_WC_SUCCESS) {
158 frmr->r.frmr.fr_state = FRMR_IS_STALE;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400159 return;
Chuck Lever9f9d8022014-07-29 17:24:45 -0400160 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400161
Chuck Leverfc664482014-05-28 10:33:25 -0400162 if (wc->opcode == IB_WC_FAST_REG_MR)
Chuck Lever0dbb4102014-07-29 17:24:09 -0400163 frmr->r.frmr.fr_state = FRMR_IS_VALID;
Chuck Leverfc664482014-05-28 10:33:25 -0400164 else if (wc->opcode == IB_WC_LOCAL_INV)
Chuck Lever0dbb4102014-07-29 17:24:09 -0400165 frmr->r.frmr.fr_state = FRMR_IS_INVALID;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400166}
167
Chuck Leverfc664482014-05-28 10:33:25 -0400168static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400169rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400170{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400171 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400172 int budget, count, rc;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400173
Chuck Lever8301a2c2014-05-28 10:33:51 -0400174 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400175 do {
176 wcs = ep->rep_send_wcs;
177
178 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
179 if (rc <= 0)
180 return rc;
181
182 count = rc;
183 while (count-- > 0)
184 rpcrdma_sendcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400185 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400186 return 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400187}
188
189/*
Chuck Leverfc664482014-05-28 10:33:25 -0400190 * Handle send, fast_reg_mr, and local_inv completions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400191 *
Chuck Leverfc664482014-05-28 10:33:25 -0400192 * Send events are typically suppressed and thus do not result
193 * in an upcall. Occasionally one is signaled, however. This
194 * prevents the provider's completion queue from wrapping and
195 * losing a completion.
196 */
197static void
198rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
199{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400200 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
Chuck Leverfc664482014-05-28 10:33:25 -0400201 int rc;
202
Chuck Lever1c00dd02014-05-28 10:33:42 -0400203 rc = rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400204 if (rc) {
205 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
206 __func__, rc);
207 return;
208 }
209
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400210 rc = ib_req_notify_cq(cq,
211 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
212 if (rc == 0)
213 return;
214 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400215 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
216 __func__, rc);
217 return;
218 }
219
Chuck Lever1c00dd02014-05-28 10:33:42 -0400220 rpcrdma_sendcq_poll(cq, ep);
Chuck Leverfc664482014-05-28 10:33:25 -0400221}
222
223static void
224rpcrdma_recvcq_process_wc(struct ib_wc *wc)
225{
226 struct rpcrdma_rep *rep =
227 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
228
229 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
230 __func__, rep, wc->status, wc->opcode, wc->byte_len);
231
232 if (wc->status != IB_WC_SUCCESS) {
233 rep->rr_len = ~0U;
234 goto out_schedule;
235 }
236 if (wc->opcode != IB_WC_RECV)
237 return;
238
239 rep->rr_len = wc->byte_len;
240 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
241 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
242
243 if (rep->rr_len >= 16) {
244 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
245 unsigned int credits = ntohl(p->rm_credit);
246
247 if (credits == 0)
248 credits = 1; /* don't deadlock */
249 else if (credits > rep->rr_buffer->rb_max_requests)
250 credits = rep->rr_buffer->rb_max_requests;
251 atomic_set(&rep->rr_buffer->rb_credits, credits);
252 }
253
254out_schedule:
255 rpcrdma_schedule_tasklet(rep);
256}
257
258static int
Chuck Lever1c00dd02014-05-28 10:33:42 -0400259rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
Chuck Leverfc664482014-05-28 10:33:25 -0400260{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400261 struct ib_wc *wcs;
Chuck Lever8301a2c2014-05-28 10:33:51 -0400262 int budget, count, rc;
Chuck Leverfc664482014-05-28 10:33:25 -0400263
Chuck Lever8301a2c2014-05-28 10:33:51 -0400264 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
Chuck Lever1c00dd02014-05-28 10:33:42 -0400265 do {
266 wcs = ep->rep_recv_wcs;
267
268 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
269 if (rc <= 0)
270 return rc;
271
272 count = rc;
273 while (count-- > 0)
274 rpcrdma_recvcq_process_wc(wcs++);
Chuck Lever8301a2c2014-05-28 10:33:51 -0400275 } while (rc == RPCRDMA_POLLSIZE && --budget);
Chuck Lever1c00dd02014-05-28 10:33:42 -0400276 return 0;
Chuck Leverfc664482014-05-28 10:33:25 -0400277}
278
279/*
280 * Handle receive completions.
281 *
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400282 * It is reentrant but processes single events in order to maintain
283 * ordering of receives to keep server credits.
284 *
285 * It is the responsibility of the scheduled tasklet to return
286 * recv buffers to the pool. NOTE: this affects synchronization of
287 * connection shutdown. That is, the structures required for
288 * the completion of the reply handler must remain intact until
289 * all memory has been reclaimed.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400290 */
291static void
Chuck Leverfc664482014-05-28 10:33:25 -0400292rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400293{
Chuck Lever1c00dd02014-05-28 10:33:42 -0400294 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400295 int rc;
296
Chuck Lever1c00dd02014-05-28 10:33:42 -0400297 rc = rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400298 if (rc) {
Chuck Leverfc664482014-05-28 10:33:25 -0400299 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400300 __func__, rc);
301 return;
302 }
303
Chuck Lever7f23f6f2014-05-28 10:33:34 -0400304 rc = ib_req_notify_cq(cq,
305 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
306 if (rc == 0)
307 return;
308 if (rc < 0) {
Chuck Leverfc664482014-05-28 10:33:25 -0400309 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
310 __func__, rc);
311 return;
312 }
313
Chuck Lever1c00dd02014-05-28 10:33:42 -0400314 rpcrdma_recvcq_poll(cq, ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400315}
316
Chuck Levera7bc2112014-07-29 17:23:52 -0400317static void
318rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
319{
320 rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep);
321 rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
322}
323
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400324#ifdef RPC_DEBUG
325static const char * const conn[] = {
326 "address resolved",
327 "address error",
328 "route resolved",
329 "route error",
330 "connect request",
331 "connect response",
332 "connect error",
333 "unreachable",
334 "rejected",
335 "established",
336 "disconnected",
337 "device removal"
338};
339#endif
340
341static int
342rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
343{
344 struct rpcrdma_xprt *xprt = id->context;
345 struct rpcrdma_ia *ia = &xprt->rx_ia;
346 struct rpcrdma_ep *ep = &xprt->rx_ep;
Ingo Molnarff0db042008-11-25 16:58:42 -0800347#ifdef RPC_DEBUG
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400348 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
Ingo Molnarff0db042008-11-25 16:58:42 -0800349#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400350 struct ib_qp_attr attr;
351 struct ib_qp_init_attr iattr;
352 int connstate = 0;
353
354 switch (event->event) {
355 case RDMA_CM_EVENT_ADDR_RESOLVED:
356 case RDMA_CM_EVENT_ROUTE_RESOLVED:
Tom Talpey5675add2008-10-09 15:01:41 -0400357 ia->ri_async_rc = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400358 complete(&ia->ri_done);
359 break;
360 case RDMA_CM_EVENT_ADDR_ERROR:
361 ia->ri_async_rc = -EHOSTUNREACH;
362 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
363 __func__, ep);
364 complete(&ia->ri_done);
365 break;
366 case RDMA_CM_EVENT_ROUTE_ERROR:
367 ia->ri_async_rc = -ENETUNREACH;
368 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
369 __func__, ep);
370 complete(&ia->ri_done);
371 break;
372 case RDMA_CM_EVENT_ESTABLISHED:
373 connstate = 1;
374 ib_query_qp(ia->ri_id->qp, &attr,
375 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
376 &iattr);
377 dprintk("RPC: %s: %d responder resources"
378 " (%d initiator)\n",
379 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
380 goto connected;
381 case RDMA_CM_EVENT_CONNECT_ERROR:
382 connstate = -ENOTCONN;
383 goto connected;
384 case RDMA_CM_EVENT_UNREACHABLE:
385 connstate = -ENETDOWN;
386 goto connected;
387 case RDMA_CM_EVENT_REJECTED:
388 connstate = -ECONNREFUSED;
389 goto connected;
390 case RDMA_CM_EVENT_DISCONNECTED:
391 connstate = -ECONNABORTED;
392 goto connected;
393 case RDMA_CM_EVENT_DEVICE_REMOVAL:
394 connstate = -ENODEV;
395connected:
Harvey Harrison21454aa2008-10-31 00:54:56 -0700396 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400397 __func__,
398 (event->event <= 11) ? conn[event->event] :
399 "unknown connection error",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700400 &addr->sin_addr.s_addr,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400401 ntohs(addr->sin_port),
402 ep, event->event);
403 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
404 dprintk("RPC: %s: %sconnected\n",
405 __func__, connstate > 0 ? "" : "dis");
406 ep->rep_connected = connstate;
407 ep->rep_func(ep);
408 wake_up_all(&ep->rep_connect_wait);
409 break;
410 default:
Tom Talpey1a954052008-10-09 15:01:31 -0400411 dprintk("RPC: %s: unexpected CM event %d\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400412 __func__, event->event);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400413 break;
414 }
415
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400416#ifdef RPC_DEBUG
417 if (connstate == 1) {
418 int ird = attr.max_dest_rd_atomic;
419 int tird = ep->rep_remote_cma.responder_resources;
Harvey Harrison21454aa2008-10-31 00:54:56 -0700420 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400421 "on %s, memreg %d slots %d ird %d%s\n",
Harvey Harrison21454aa2008-10-31 00:54:56 -0700422 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400423 ntohs(addr->sin_port),
424 ia->ri_id->device->name,
425 ia->ri_memreg_strategy,
426 xprt->rx_buf.rb_max_requests,
427 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
428 } else if (connstate < 0) {
Harvey Harrison21454aa2008-10-31 00:54:56 -0700429 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
430 &addr->sin_addr.s_addr,
Tom Talpeyb3cd8d42008-10-09 15:02:02 -0400431 ntohs(addr->sin_port),
432 connstate);
433 }
434#endif
435
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400436 return 0;
437}
438
439static struct rdma_cm_id *
440rpcrdma_create_id(struct rpcrdma_xprt *xprt,
441 struct rpcrdma_ia *ia, struct sockaddr *addr)
442{
443 struct rdma_cm_id *id;
444 int rc;
445
Tom Talpey1a954052008-10-09 15:01:31 -0400446 init_completion(&ia->ri_done);
447
Sean Heftyb26f9b92010-04-01 17:08:41 +0000448 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400449 if (IS_ERR(id)) {
450 rc = PTR_ERR(id);
451 dprintk("RPC: %s: rdma_create_id() failed %i\n",
452 __func__, rc);
453 return id;
454 }
455
Tom Talpey5675add2008-10-09 15:01:41 -0400456 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400457 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
458 if (rc) {
459 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
460 __func__, rc);
461 goto out;
462 }
Tom Talpey5675add2008-10-09 15:01:41 -0400463 wait_for_completion_interruptible_timeout(&ia->ri_done,
464 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400465 rc = ia->ri_async_rc;
466 if (rc)
467 goto out;
468
Tom Talpey5675add2008-10-09 15:01:41 -0400469 ia->ri_async_rc = -ETIMEDOUT;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400470 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
471 if (rc) {
472 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
473 __func__, rc);
474 goto out;
475 }
Tom Talpey5675add2008-10-09 15:01:41 -0400476 wait_for_completion_interruptible_timeout(&ia->ri_done,
477 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400478 rc = ia->ri_async_rc;
479 if (rc)
480 goto out;
481
482 return id;
483
484out:
485 rdma_destroy_id(id);
486 return ERR_PTR(rc);
487}
488
489/*
490 * Drain any cq, prior to teardown.
491 */
492static void
493rpcrdma_clean_cq(struct ib_cq *cq)
494{
495 struct ib_wc wc;
496 int count = 0;
497
498 while (1 == ib_poll_cq(cq, 1, &wc))
499 ++count;
500
501 if (count)
502 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
503 __func__, count, wc.opcode);
504}
505
506/*
507 * Exported functions.
508 */
509
510/*
511 * Open and initialize an Interface Adapter.
512 * o initializes fields of struct rpcrdma_ia, including
513 * interface and provider attributes and protection zone.
514 */
515int
516rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
517{
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400518 int rc, mem_priv;
519 struct ib_device_attr devattr;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400520 struct rpcrdma_ia *ia = &xprt->rx_ia;
521
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400522 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
523 if (IS_ERR(ia->ri_id)) {
524 rc = PTR_ERR(ia->ri_id);
525 goto out1;
526 }
527
528 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
529 if (IS_ERR(ia->ri_pd)) {
530 rc = PTR_ERR(ia->ri_pd);
531 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
532 __func__, rc);
533 goto out2;
534 }
535
536 /*
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400537 * Query the device to determine if the requested memory
538 * registration strategy is supported. If it isn't, set the
539 * strategy to a globally supported model.
540 */
541 rc = ib_query_device(ia->ri_id->device, &devattr);
542 if (rc) {
543 dprintk("RPC: %s: ib_query_device failed %d\n",
544 __func__, rc);
545 goto out2;
546 }
547
548 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
549 ia->ri_have_dma_lkey = 1;
550 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
551 }
552
Chuck Leverf10eafd2014-05-28 10:32:51 -0400553 if (memreg == RPCRDMA_FRMR) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400554 /* Requires both frmr reg and local dma lkey */
555 if ((devattr.device_cap_flags &
556 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
557 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400558 dprintk("RPC: %s: FRMR registration "
Chuck Leverf10eafd2014-05-28 10:32:51 -0400559 "not supported by HCA\n", __func__);
560 memreg = RPCRDMA_MTHCAFMR;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400561 } else {
562 /* Mind the ia limit on FRMR page list depth */
563 ia->ri_max_frmr_depth = min_t(unsigned int,
564 RPCRDMA_MAX_DATA_SEGS,
565 devattr.max_fast_reg_page_list_len);
Tom Talpey3197d3092008-10-09 15:00:20 -0400566 }
Chuck Leverf10eafd2014-05-28 10:32:51 -0400567 }
568 if (memreg == RPCRDMA_MTHCAFMR) {
569 if (!ia->ri_id->device->alloc_fmr) {
570 dprintk("RPC: %s: MTHCAFMR registration "
571 "not supported by HCA\n", __func__);
572#if RPCRDMA_PERSISTENT_REGISTRATION
573 memreg = RPCRDMA_ALLPHYSICAL;
574#else
Chuck Levercdd9ade2014-05-28 10:33:00 -0400575 rc = -ENOMEM;
Chuck Leverf10eafd2014-05-28 10:32:51 -0400576 goto out2;
577#endif
578 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400579 }
580
581 /*
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400582 * Optionally obtain an underlying physical identity mapping in
583 * order to do a memory window-based bind. This base registration
584 * is protected from remote access - that is enabled only by binding
585 * for the specific bytes targeted during each RPC operation, and
586 * revoked after the corresponding completion similar to a storage
587 * adapter.
588 */
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400589 switch (memreg) {
Tom Talpey3197d3092008-10-09 15:00:20 -0400590 case RPCRDMA_FRMR:
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400591 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400592#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400593 case RPCRDMA_ALLPHYSICAL:
594 mem_priv = IB_ACCESS_LOCAL_WRITE |
595 IB_ACCESS_REMOTE_WRITE |
596 IB_ACCESS_REMOTE_READ;
597 goto register_setup;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400598#endif
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400599 case RPCRDMA_MTHCAFMR:
600 if (ia->ri_have_dma_lkey)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400601 break;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400602 mem_priv = IB_ACCESS_LOCAL_WRITE;
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400603#if RPCRDMA_PERSISTENT_REGISTRATION
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400604 register_setup:
Chuck Leverb45ccfd2014-05-28 10:32:34 -0400605#endif
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400606 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
607 if (IS_ERR(ia->ri_bind_mem)) {
608 printk(KERN_ALERT "%s: ib_get_dma_mr for "
Chuck Lever0ac531c2014-05-28 10:32:43 -0400609 "phys register failed with %lX\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400610 __func__, PTR_ERR(ia->ri_bind_mem));
Chuck Lever0ac531c2014-05-28 10:32:43 -0400611 rc = -ENOMEM;
612 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400613 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400614 break;
615 default:
Chuck Levercdd9ade2014-05-28 10:33:00 -0400616 printk(KERN_ERR "RPC: Unsupported memory "
617 "registration mode: %d\n", memreg);
618 rc = -ENOMEM;
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400619 goto out2;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400620 }
Tom Talpeybd7ed1d2008-10-09 15:00:09 -0400621 dprintk("RPC: %s: memory registration strategy is %d\n",
622 __func__, memreg);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400623
624 /* Else will do memory reg/dereg for each chunk */
625 ia->ri_memreg_strategy = memreg;
626
Chuck Lever73806c82014-07-29 17:23:25 -0400627 rwlock_init(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400628 return 0;
629out2:
630 rdma_destroy_id(ia->ri_id);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400631 ia->ri_id = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400632out1:
633 return rc;
634}
635
636/*
637 * Clean up/close an IA.
638 * o if event handles and PD have been initialized, free them.
639 * o close the IA
640 */
641void
642rpcrdma_ia_close(struct rpcrdma_ia *ia)
643{
644 int rc;
645
646 dprintk("RPC: %s: entering\n", __func__);
647 if (ia->ri_bind_mem != NULL) {
648 rc = ib_dereg_mr(ia->ri_bind_mem);
649 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
650 __func__, rc);
651 }
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400652 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
653 if (ia->ri_id->qp)
654 rdma_destroy_qp(ia->ri_id);
655 rdma_destroy_id(ia->ri_id);
656 ia->ri_id = NULL;
657 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400658 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
659 rc = ib_dealloc_pd(ia->ri_pd);
660 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
661 __func__, rc);
662 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400663}
664
665/*
666 * Create unconnected endpoint.
667 */
668int
669rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
670 struct rpcrdma_create_data_internal *cdata)
671{
672 struct ib_device_attr devattr;
Chuck Leverfc664482014-05-28 10:33:25 -0400673 struct ib_cq *sendcq, *recvcq;
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400674 int rc, err;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400675
676 rc = ib_query_device(ia->ri_id->device, &devattr);
677 if (rc) {
678 dprintk("RPC: %s: ib_query_device failed %d\n",
679 __func__, rc);
680 return rc;
681 }
682
683 /* check provider's send/recv wr limits */
684 if (cdata->max_requests > devattr.max_qp_wr)
685 cdata->max_requests = devattr.max_qp_wr;
686
687 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
688 ep->rep_attr.qp_context = ep;
689 /* send_cq and recv_cq initialized below */
690 ep->rep_attr.srq = NULL;
691 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
692 switch (ia->ri_memreg_strategy) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400693 case RPCRDMA_FRMR: {
694 int depth = 7;
695
Tom Tucker15cdc6442010-08-11 12:47:24 -0400696 /* Add room for frmr register and invalidate WRs.
697 * 1. FRMR reg WR for head
698 * 2. FRMR invalidate WR for head
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400699 * 3. N FRMR reg WRs for pagelist
700 * 4. N FRMR invalidate WRs for pagelist
Tom Tucker15cdc6442010-08-11 12:47:24 -0400701 * 5. FRMR reg WR for tail
702 * 6. FRMR invalidate WR for tail
703 * 7. The RDMA_SEND WR
704 */
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400705
706 /* Calculate N if the device max FRMR depth is smaller than
707 * RPCRDMA_MAX_DATA_SEGS.
708 */
709 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
710 int delta = RPCRDMA_MAX_DATA_SEGS -
711 ia->ri_max_frmr_depth;
712
713 do {
714 depth += 2; /* FRMR reg + invalidate */
715 delta -= ia->ri_max_frmr_depth;
716 } while (delta > 0);
717
718 }
719 ep->rep_attr.cap.max_send_wr *= depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400720 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400721 cdata->max_requests = devattr.max_qp_wr / depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400722 if (!cdata->max_requests)
723 return -EINVAL;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400724 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
725 depth;
Tom Tucker15cdc6442010-08-11 12:47:24 -0400726 }
Tom Talpey3197d3092008-10-09 15:00:20 -0400727 break;
Steve Wise0fc6c4e2014-05-28 10:32:00 -0400728 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400729 default:
730 break;
731 }
732 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
733 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
734 ep->rep_attr.cap.max_recv_sge = 1;
735 ep->rep_attr.cap.max_inline_data = 0;
736 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
737 ep->rep_attr.qp_type = IB_QPT_RC;
738 ep->rep_attr.port_num = ~0;
739
740 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
741 "iovs: send %d recv %d\n",
742 __func__,
743 ep->rep_attr.cap.max_send_wr,
744 ep->rep_attr.cap.max_recv_wr,
745 ep->rep_attr.cap.max_send_sge,
746 ep->rep_attr.cap.max_recv_sge);
747
748 /* set trigger for requesting send completion */
Chuck Leverfc664482014-05-28 10:33:25 -0400749 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400750 if (ep->rep_cqinit <= 2)
751 ep->rep_cqinit = 0;
752 INIT_CQCOUNT(ep);
753 ep->rep_ia = ia;
754 init_waitqueue_head(&ep->rep_connect_wait);
Chuck Lever254f91e2014-05-28 10:32:17 -0400755 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400756
Chuck Leverfc664482014-05-28 10:33:25 -0400757 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400758 rpcrdma_cq_async_error_upcall, ep,
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400759 ep->rep_attr.cap.max_send_wr + 1, 0);
Chuck Leverfc664482014-05-28 10:33:25 -0400760 if (IS_ERR(sendcq)) {
761 rc = PTR_ERR(sendcq);
762 dprintk("RPC: %s: failed to create send CQ: %i\n",
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400763 __func__, rc);
764 goto out1;
765 }
766
Chuck Leverfc664482014-05-28 10:33:25 -0400767 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400768 if (rc) {
769 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
770 __func__, rc);
771 goto out2;
772 }
773
Chuck Leverfc664482014-05-28 10:33:25 -0400774 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
Chuck Lever1c00dd02014-05-28 10:33:42 -0400775 rpcrdma_cq_async_error_upcall, ep,
Chuck Leverfc664482014-05-28 10:33:25 -0400776 ep->rep_attr.cap.max_recv_wr + 1, 0);
777 if (IS_ERR(recvcq)) {
778 rc = PTR_ERR(recvcq);
779 dprintk("RPC: %s: failed to create recv CQ: %i\n",
780 __func__, rc);
781 goto out2;
782 }
783
784 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
785 if (rc) {
786 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
787 __func__, rc);
788 ib_destroy_cq(recvcq);
789 goto out2;
790 }
791
792 ep->rep_attr.send_cq = sendcq;
793 ep->rep_attr.recv_cq = recvcq;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400794
795 /* Initialize cma parameters */
796
797 /* RPC/RDMA does not use private data */
798 ep->rep_remote_cma.private_data = NULL;
799 ep->rep_remote_cma.private_data_len = 0;
800
801 /* Client offers RDMA Read but does not initiate */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400802 ep->rep_remote_cma.initiator_depth = 0;
Chuck Lever03ff8822014-05-28 10:32:26 -0400803 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400804 ep->rep_remote_cma.responder_resources = 32;
805 else
806 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400807
808 ep->rep_remote_cma.retry_count = 7;
809 ep->rep_remote_cma.flow_control = 0;
810 ep->rep_remote_cma.rnr_retry_count = 0;
811
812 return 0;
813
814out2:
Chuck Leverfc664482014-05-28 10:33:25 -0400815 err = ib_destroy_cq(sendcq);
Chuck Lever5d40a8a2007-10-26 13:30:54 -0400816 if (err)
817 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
818 __func__, err);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400819out1:
820 return rc;
821}
822
823/*
824 * rpcrdma_ep_destroy
825 *
826 * Disconnect and destroy endpoint. After this, the only
827 * valid operations on the ep are to free it (if dynamically
828 * allocated) or re-create it.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400829 */
Chuck Lever7f1d5412014-05-28 10:33:16 -0400830void
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400831rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
832{
833 int rc;
834
835 dprintk("RPC: %s: entering, connected is %d\n",
836 __func__, ep->rep_connected);
837
Chuck Lever254f91e2014-05-28 10:32:17 -0400838 cancel_delayed_work_sync(&ep->rep_connect_worker);
839
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400840 if (ia->ri_id->qp) {
841 rc = rpcrdma_ep_disconnect(ep, ia);
842 if (rc)
843 dprintk("RPC: %s: rpcrdma_ep_disconnect"
844 " returned %i\n", __func__, rc);
Tom Talpeyfee08ca2008-10-09 15:01:00 -0400845 rdma_destroy_qp(ia->ri_id);
846 ia->ri_id->qp = NULL;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400847 }
848
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400849 /* padding - could be done in rpcrdma_buffer_destroy... */
850 if (ep->rep_pad_mr) {
851 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
852 ep->rep_pad_mr = NULL;
853 }
854
Chuck Leverfc664482014-05-28 10:33:25 -0400855 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
856 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
857 if (rc)
858 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
859 __func__, rc);
860
861 rpcrdma_clean_cq(ep->rep_attr.send_cq);
862 rc = ib_destroy_cq(ep->rep_attr.send_cq);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400863 if (rc)
864 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
865 __func__, rc);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400866}
867
868/*
869 * Connect unconnected endpoint.
870 */
871int
872rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
873{
Chuck Lever73806c82014-07-29 17:23:25 -0400874 struct rdma_cm_id *id, *old;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400875 int rc = 0;
876 int retry_count = 0;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400877
Tom Talpeyc0555512008-10-10 11:32:45 -0400878 if (ep->rep_connected != 0) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400879 struct rpcrdma_xprt *xprt;
880retry:
Chuck Leverec62f402014-05-28 10:34:07 -0400881 dprintk("RPC: %s: reconnecting...\n", __func__);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400882 rc = rpcrdma_ep_disconnect(ep, ia);
883 if (rc && rc != -ENOTCONN)
884 dprintk("RPC: %s: rpcrdma_ep_disconnect"
885 " status %i\n", __func__, rc);
Chuck Levera7bc2112014-07-29 17:23:52 -0400886 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400887
Chuck Lever9f9d8022014-07-29 17:24:45 -0400888 if (ia->ri_memreg_strategy == RPCRDMA_FRMR)
889 rpcrdma_reset_frmrs(ia);
890
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400891 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
892 id = rpcrdma_create_id(xprt, ia,
893 (struct sockaddr *)&xprt->rx_data.addr);
894 if (IS_ERR(id)) {
Chuck Leverec62f402014-05-28 10:34:07 -0400895 rc = -EHOSTUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400896 goto out;
897 }
898 /* TEMP TEMP TEMP - fail if new device:
899 * Deregister/remarshal *all* requests!
900 * Close and recreate adapter, pd, etc!
901 * Re-determine all attributes still sane!
902 * More stuff I haven't thought of!
903 * Rrrgh!
904 */
905 if (ia->ri_id->device != id->device) {
906 printk("RPC: %s: can't reconnect on "
907 "different device!\n", __func__);
908 rdma_destroy_id(id);
Chuck Leverec62f402014-05-28 10:34:07 -0400909 rc = -ENETUNREACH;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400910 goto out;
911 }
912 /* END TEMP */
Chuck Leverec62f402014-05-28 10:34:07 -0400913 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
914 if (rc) {
915 dprintk("RPC: %s: rdma_create_qp failed %i\n",
916 __func__, rc);
917 rdma_destroy_id(id);
918 rc = -ENETUNREACH;
919 goto out;
920 }
Chuck Lever73806c82014-07-29 17:23:25 -0400921
922 write_lock(&ia->ri_qplock);
923 old = ia->ri_id;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400924 ia->ri_id = id;
Chuck Lever73806c82014-07-29 17:23:25 -0400925 write_unlock(&ia->ri_qplock);
926
927 rdma_destroy_qp(old);
928 rdma_destroy_id(old);
Chuck Leverec62f402014-05-28 10:34:07 -0400929 } else {
930 dprintk("RPC: %s: connecting...\n", __func__);
931 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
932 if (rc) {
933 dprintk("RPC: %s: rdma_create_qp failed %i\n",
934 __func__, rc);
935 /* do not update ep->rep_connected */
936 return -ENETUNREACH;
937 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400938 }
939
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400940 ep->rep_connected = 0;
941
942 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
943 if (rc) {
944 dprintk("RPC: %s: rdma_connect() failed with %i\n",
945 __func__, rc);
946 goto out;
947 }
948
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400949 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
950
951 /*
952 * Check state. A non-peer reject indicates no listener
953 * (ECONNREFUSED), which may be a transient state. All
954 * others indicate a transport condition which has already
955 * undergone a best-effort.
956 */
Joe Perchesf64f9e72009-11-29 16:55:45 -0800957 if (ep->rep_connected == -ECONNREFUSED &&
958 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400959 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
960 goto retry;
961 }
962 if (ep->rep_connected <= 0) {
963 /* Sometimes, the only way to reliably connect to remote
964 * CMs is to use same nonzero values for ORD and IRD. */
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400965 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
966 (ep->rep_remote_cma.responder_resources == 0 ||
967 ep->rep_remote_cma.initiator_depth !=
968 ep->rep_remote_cma.responder_resources)) {
969 if (ep->rep_remote_cma.responder_resources == 0)
970 ep->rep_remote_cma.responder_resources = 1;
971 ep->rep_remote_cma.initiator_depth =
972 ep->rep_remote_cma.responder_resources;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400973 goto retry;
Tom Tuckerb334eaa2008-10-09 15:00:30 -0400974 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -0400975 rc = ep->rep_connected;
976 } else {
977 dprintk("RPC: %s: connected\n", __func__);
978 }
979
980out:
981 if (rc)
982 ep->rep_connected = rc;
983 return rc;
984}
985
986/*
987 * rpcrdma_ep_disconnect
988 *
989 * This is separate from destroy to facilitate the ability
990 * to reconnect without recreating the endpoint.
991 *
992 * This call is not reentrant, and must not be made in parallel
993 * on the same endpoint.
994 */
995int
996rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
997{
998 int rc;
999
Chuck Levera7bc2112014-07-29 17:23:52 -04001000 rpcrdma_flush_cqs(ep);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001001 rc = rdma_disconnect(ia->ri_id);
1002 if (!rc) {
1003 /* returns without wait if not connected */
1004 wait_event_interruptible(ep->rep_connect_wait,
1005 ep->rep_connected != 1);
1006 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
1007 (ep->rep_connected == 1) ? "still " : "dis");
1008 } else {
1009 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
1010 ep->rep_connected = rc;
1011 }
1012 return rc;
1013}
1014
1015/*
1016 * Initialize buffer memory
1017 */
1018int
1019rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1020 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1021{
1022 char *p;
Chuck Lever65866f82014-05-28 10:33:59 -04001023 size_t len, rlen, wlen;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001024 int i, rc;
Tom Talpey8d4ba032008-10-09 14:59:49 -04001025 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001026
1027 buf->rb_max_requests = cdata->max_requests;
1028 spin_lock_init(&buf->rb_lock);
1029 atomic_set(&buf->rb_credits, 1);
1030
1031 /* Need to allocate:
1032 * 1. arrays for send and recv pointers
1033 * 2. arrays of struct rpcrdma_req to fill in pointers
1034 * 3. array of struct rpcrdma_rep for replies
1035 * 4. padding, if any
Tom Talpey3197d3092008-10-09 15:00:20 -04001036 * 5. mw's, fmr's or frmr's, if any
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001037 * Send/recv buffers in req/rep need to be registered
1038 */
1039
1040 len = buf->rb_max_requests *
1041 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1042 len += cdata->padding;
1043 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001044 case RPCRDMA_FRMR:
1045 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
1046 sizeof(struct rpcrdma_mw);
1047 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001048 case RPCRDMA_MTHCAFMR:
1049 /* TBD we are perhaps overallocating here */
1050 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
1051 sizeof(struct rpcrdma_mw);
1052 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001053 default:
1054 break;
1055 }
1056
1057 /* allocate 1, 4 and 5 in one shot */
1058 p = kzalloc(len, GFP_KERNEL);
1059 if (p == NULL) {
1060 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1061 __func__, len);
1062 rc = -ENOMEM;
1063 goto out;
1064 }
1065 buf->rb_pool = p; /* for freeing it later */
1066
1067 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1068 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1069 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1070 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1071
1072 /*
1073 * Register the zeroed pad buffer, if any.
1074 */
1075 if (cdata->padding) {
1076 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1077 &ep->rep_pad_mr, &ep->rep_pad);
1078 if (rc)
1079 goto out;
1080 }
1081 p += cdata->padding;
1082
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001083 INIT_LIST_HEAD(&buf->rb_mws);
Chuck Lever3111d722014-07-29 17:24:28 -04001084 INIT_LIST_HEAD(&buf->rb_all);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001085 r = (struct rpcrdma_mw *)p;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001086 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001087 case RPCRDMA_FRMR:
1088 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1089 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001090 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001091 if (IS_ERR(r->r.frmr.fr_mr)) {
1092 rc = PTR_ERR(r->r.frmr.fr_mr);
1093 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1094 " failed %i\n", __func__, rc);
1095 goto out;
1096 }
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001097 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1098 ia->ri_id->device,
1099 ia->ri_max_frmr_depth);
Tom Talpey3197d3092008-10-09 15:00:20 -04001100 if (IS_ERR(r->r.frmr.fr_pgl)) {
1101 rc = PTR_ERR(r->r.frmr.fr_pgl);
1102 dprintk("RPC: %s: "
1103 "ib_alloc_fast_reg_page_list "
1104 "failed %i\n", __func__, rc);
Allen Andrews4034ba02014-05-28 10:32:09 -04001105
1106 ib_dereg_mr(r->r.frmr.fr_mr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001107 goto out;
1108 }
Chuck Lever3111d722014-07-29 17:24:28 -04001109 list_add(&r->mw_all, &buf->rb_all);
Tom Talpey3197d3092008-10-09 15:00:20 -04001110 list_add(&r->mw_list, &buf->rb_mws);
1111 ++r;
1112 }
1113 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001114 case RPCRDMA_MTHCAFMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001115 /* TBD we are perhaps overallocating here */
1116 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
Tom Talpey8d4ba032008-10-09 14:59:49 -04001117 static struct ib_fmr_attr fa =
1118 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001119 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1120 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1121 &fa);
1122 if (IS_ERR(r->r.fmr)) {
1123 rc = PTR_ERR(r->r.fmr);
1124 dprintk("RPC: %s: ib_alloc_fmr"
1125 " failed %i\n", __func__, rc);
1126 goto out;
1127 }
Chuck Lever3111d722014-07-29 17:24:28 -04001128 list_add(&r->mw_all, &buf->rb_all);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001129 list_add(&r->mw_list, &buf->rb_mws);
1130 ++r;
1131 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001132 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001133 default:
1134 break;
1135 }
1136
1137 /*
1138 * Allocate/init the request/reply buffers. Doing this
1139 * using kmalloc for now -- one for each buf.
1140 */
Chuck Lever65866f82014-05-28 10:33:59 -04001141 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1142 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1143 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1144 __func__, wlen, rlen);
1145
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001146 for (i = 0; i < buf->rb_max_requests; i++) {
1147 struct rpcrdma_req *req;
1148 struct rpcrdma_rep *rep;
1149
Chuck Lever65866f82014-05-28 10:33:59 -04001150 req = kmalloc(wlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001151 if (req == NULL) {
1152 dprintk("RPC: %s: request buffer %d alloc"
1153 " failed\n", __func__, i);
1154 rc = -ENOMEM;
1155 goto out;
1156 }
1157 memset(req, 0, sizeof(struct rpcrdma_req));
1158 buf->rb_send_bufs[i] = req;
1159 buf->rb_send_bufs[i]->rl_buffer = buf;
1160
1161 rc = rpcrdma_register_internal(ia, req->rl_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001162 wlen - offsetof(struct rpcrdma_req, rl_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001163 &buf->rb_send_bufs[i]->rl_handle,
1164 &buf->rb_send_bufs[i]->rl_iov);
1165 if (rc)
1166 goto out;
1167
Chuck Lever65866f82014-05-28 10:33:59 -04001168 buf->rb_send_bufs[i]->rl_size = wlen -
1169 sizeof(struct rpcrdma_req);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001170
Chuck Lever65866f82014-05-28 10:33:59 -04001171 rep = kmalloc(rlen, GFP_KERNEL);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001172 if (rep == NULL) {
1173 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1174 __func__, i);
1175 rc = -ENOMEM;
1176 goto out;
1177 }
1178 memset(rep, 0, sizeof(struct rpcrdma_rep));
1179 buf->rb_recv_bufs[i] = rep;
1180 buf->rb_recv_bufs[i]->rr_buffer = buf;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001181
1182 rc = rpcrdma_register_internal(ia, rep->rr_base,
Chuck Lever65866f82014-05-28 10:33:59 -04001183 rlen - offsetof(struct rpcrdma_rep, rr_base),
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001184 &buf->rb_recv_bufs[i]->rr_handle,
1185 &buf->rb_recv_bufs[i]->rr_iov);
1186 if (rc)
1187 goto out;
1188
1189 }
1190 dprintk("RPC: %s: max_requests %d\n",
1191 __func__, buf->rb_max_requests);
1192 /* done */
1193 return 0;
1194out:
1195 rpcrdma_buffer_destroy(buf);
1196 return rc;
1197}
1198
1199/*
1200 * Unregister and destroy buffer memory. Need to deal with
1201 * partial initialization, so it's callable from failed create.
1202 * Must be called before destroying endpoint, as registrations
1203 * reference it.
1204 */
1205void
1206rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1207{
1208 int rc, i;
1209 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001210 struct rpcrdma_mw *r;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001211
1212 /* clean up in reverse order from create
1213 * 1. recv mr memory (mr free, then kfree)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001214 * 2. send mr memory (mr free, then kfree)
1215 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1216 * 4. arrays
1217 */
1218 dprintk("RPC: %s: entering\n", __func__);
1219
1220 for (i = 0; i < buf->rb_max_requests; i++) {
1221 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1222 rpcrdma_deregister_internal(ia,
1223 buf->rb_recv_bufs[i]->rr_handle,
1224 &buf->rb_recv_bufs[i]->rr_iov);
1225 kfree(buf->rb_recv_bufs[i]);
1226 }
1227 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001228 rpcrdma_deregister_internal(ia,
1229 buf->rb_send_bufs[i]->rl_handle,
1230 &buf->rb_send_bufs[i]->rl_iov);
1231 kfree(buf->rb_send_bufs[i]);
1232 }
1233 }
1234
Allen Andrews4034ba02014-05-28 10:32:09 -04001235 while (!list_empty(&buf->rb_mws)) {
1236 r = list_entry(buf->rb_mws.next,
1237 struct rpcrdma_mw, mw_list);
Chuck Lever3111d722014-07-29 17:24:28 -04001238 list_del(&r->mw_all);
Allen Andrews4034ba02014-05-28 10:32:09 -04001239 list_del(&r->mw_list);
1240 switch (ia->ri_memreg_strategy) {
1241 case RPCRDMA_FRMR:
1242 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1243 if (rc)
1244 dprintk("RPC: %s:"
1245 " ib_dereg_mr"
1246 " failed %i\n",
1247 __func__, rc);
1248 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1249 break;
1250 case RPCRDMA_MTHCAFMR:
1251 rc = ib_dealloc_fmr(r->r.fmr);
1252 if (rc)
1253 dprintk("RPC: %s:"
1254 " ib_dealloc_fmr"
1255 " failed %i\n",
1256 __func__, rc);
1257 break;
Allen Andrews4034ba02014-05-28 10:32:09 -04001258 default:
1259 break;
1260 }
1261 }
1262
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001263 kfree(buf->rb_pool);
1264}
1265
Chuck Lever9f9d8022014-07-29 17:24:45 -04001266/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1267 * an unusable state. Find FRMRs in this state and dereg / reg
1268 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1269 * also torn down.
1270 *
1271 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1272 *
1273 * This is invoked only in the transport connect worker in order
1274 * to serialize with rpcrdma_register_frmr_external().
1275 */
1276static void
1277rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
1278{
1279 struct rpcrdma_xprt *r_xprt =
1280 container_of(ia, struct rpcrdma_xprt, rx_ia);
1281 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
1282 struct list_head *pos;
1283 struct rpcrdma_mw *r;
1284 int rc;
1285
1286 list_for_each(pos, &buf->rb_all) {
1287 r = list_entry(pos, struct rpcrdma_mw, mw_all);
1288
1289 if (r->r.frmr.fr_state == FRMR_IS_INVALID)
1290 continue;
1291
1292 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1293 if (rc)
1294 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1295 __func__, rc);
1296 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1297
1298 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
1299 ia->ri_max_frmr_depth);
1300 if (IS_ERR(r->r.frmr.fr_mr)) {
1301 rc = PTR_ERR(r->r.frmr.fr_mr);
1302 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1303 " failed %i\n", __func__, rc);
1304 continue;
1305 }
1306 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1307 ia->ri_id->device,
1308 ia->ri_max_frmr_depth);
1309 if (IS_ERR(r->r.frmr.fr_pgl)) {
1310 rc = PTR_ERR(r->r.frmr.fr_pgl);
1311 dprintk("RPC: %s: "
1312 "ib_alloc_fast_reg_page_list "
1313 "failed %i\n", __func__, rc);
1314
1315 ib_dereg_mr(r->r.frmr.fr_mr);
1316 continue;
1317 }
1318 r->r.frmr.fr_state = FRMR_IS_INVALID;
1319 }
1320}
1321
Chuck Leverc2922c02014-07-29 17:24:36 -04001322/* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1323 * some req segments uninitialized.
1324 */
1325static void
1326rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf)
1327{
1328 if (*mw) {
1329 list_add_tail(&(*mw)->mw_list, &buf->rb_mws);
1330 *mw = NULL;
1331 }
1332}
1333
1334/* Cycle mw's back in reverse order, and "spin" them.
1335 * This delays and scrambles reuse as much as possible.
1336 */
1337static void
1338rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1339{
1340 struct rpcrdma_mr_seg *seg = req->rl_segments;
1341 struct rpcrdma_mr_seg *seg1 = seg;
1342 int i;
1343
1344 for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++)
1345 rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf);
1346 rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf);
1347}
1348
1349static void
1350rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
1351{
1352 buf->rb_send_bufs[--buf->rb_send_index] = req;
1353 req->rl_niovs = 0;
1354 if (req->rl_reply) {
1355 buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
1356 req->rl_reply->rr_func = NULL;
1357 req->rl_reply = NULL;
1358 }
1359}
1360
Chuck Leverddb6beb2014-07-29 17:24:54 -04001361/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1362 * Redo only the ib_post_send().
1363 */
1364static void
1365rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia)
1366{
1367 struct rpcrdma_xprt *r_xprt =
1368 container_of(ia, struct rpcrdma_xprt, rx_ia);
1369 struct ib_send_wr invalidate_wr, *bad_wr;
1370 int rc;
1371
1372 dprintk("RPC: %s: FRMR %p is stale\n", __func__, r);
1373
1374 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
1375 r->r.frmr.fr_state = FRMR_IS_VALID;
1376
1377 memset(&invalidate_wr, 0, sizeof(invalidate_wr));
1378 invalidate_wr.wr_id = (unsigned long)(void *)r;
1379 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1380 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1381 invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey;
1382 DECR_CQCOUNT(&r_xprt->rx_ep);
1383
1384 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1385 __func__, r, r->r.frmr.fr_mr->rkey);
1386
1387 read_lock(&ia->ri_qplock);
1388 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1389 read_unlock(&ia->ri_qplock);
1390 if (rc) {
1391 /* Force rpcrdma_buffer_get() to retry */
1392 r->r.frmr.fr_state = FRMR_IS_STALE;
1393 dprintk("RPC: %s: ib_post_send failed, %i\n",
1394 __func__, rc);
1395 }
1396}
1397
1398static void
1399rpcrdma_retry_flushed_linv(struct list_head *stale,
1400 struct rpcrdma_buffer *buf)
1401{
1402 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
1403 struct list_head *pos;
1404 struct rpcrdma_mw *r;
1405 unsigned long flags;
1406
1407 list_for_each(pos, stale) {
1408 r = list_entry(pos, struct rpcrdma_mw, mw_list);
1409 rpcrdma_retry_local_inv(r, ia);
1410 }
1411
1412 spin_lock_irqsave(&buf->rb_lock, flags);
1413 list_splice_tail(stale, &buf->rb_mws);
1414 spin_unlock_irqrestore(&buf->rb_lock, flags);
1415}
1416
Chuck Leverc2922c02014-07-29 17:24:36 -04001417static struct rpcrdma_req *
Chuck Leverddb6beb2014-07-29 17:24:54 -04001418rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf,
1419 struct list_head *stale)
1420{
1421 struct rpcrdma_mw *r;
1422 int i;
1423
1424 i = RPCRDMA_MAX_SEGS - 1;
1425 while (!list_empty(&buf->rb_mws)) {
1426 r = list_entry(buf->rb_mws.next,
1427 struct rpcrdma_mw, mw_list);
1428 list_del(&r->mw_list);
1429 if (r->r.frmr.fr_state == FRMR_IS_STALE) {
1430 list_add(&r->mw_list, stale);
1431 continue;
1432 }
1433 req->rl_segments[i].mr_chunk.rl_mw = r;
1434 if (unlikely(i-- == 0))
1435 return req; /* Success */
1436 }
1437
1438 /* Not enough entries on rb_mws for this req */
1439 rpcrdma_buffer_put_sendbuf(req, buf);
1440 rpcrdma_buffer_put_mrs(req, buf);
1441 return NULL;
1442}
1443
1444static struct rpcrdma_req *
1445rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
Chuck Leverc2922c02014-07-29 17:24:36 -04001446{
1447 struct rpcrdma_mw *r;
1448 int i;
1449
1450 i = RPCRDMA_MAX_SEGS - 1;
1451 while (!list_empty(&buf->rb_mws)) {
1452 r = list_entry(buf->rb_mws.next,
1453 struct rpcrdma_mw, mw_list);
1454 list_del(&r->mw_list);
1455 req->rl_segments[i].mr_chunk.rl_mw = r;
1456 if (unlikely(i-- == 0))
1457 return req; /* Success */
1458 }
1459
1460 /* Not enough entries on rb_mws for this req */
1461 rpcrdma_buffer_put_sendbuf(req, buf);
1462 rpcrdma_buffer_put_mrs(req, buf);
1463 return NULL;
1464}
1465
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001466/*
1467 * Get a set of request/reply buffers.
1468 *
1469 * Reply buffer (if needed) is attached to send buffer upon return.
1470 * Rule:
1471 * rb_send_index and rb_recv_index MUST always be pointing to the
1472 * *next* available buffer (non-NULL). They are incremented after
1473 * removing buffers, and decremented *before* returning them.
1474 */
1475struct rpcrdma_req *
1476rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1477{
Chuck Leverc2922c02014-07-29 17:24:36 -04001478 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001479 struct list_head stale;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001480 struct rpcrdma_req *req;
1481 unsigned long flags;
1482
1483 spin_lock_irqsave(&buffers->rb_lock, flags);
1484 if (buffers->rb_send_index == buffers->rb_max_requests) {
1485 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1486 dprintk("RPC: %s: out of request buffers\n", __func__);
1487 return ((struct rpcrdma_req *)NULL);
1488 }
1489
1490 req = buffers->rb_send_bufs[buffers->rb_send_index];
1491 if (buffers->rb_send_index < buffers->rb_recv_index) {
1492 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1493 __func__,
1494 buffers->rb_recv_index - buffers->rb_send_index);
1495 req->rl_reply = NULL;
1496 } else {
1497 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1498 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1499 }
1500 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
Chuck Leverddb6beb2014-07-29 17:24:54 -04001501
1502 INIT_LIST_HEAD(&stale);
Chuck Leverc2922c02014-07-29 17:24:36 -04001503 switch (ia->ri_memreg_strategy) {
1504 case RPCRDMA_FRMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001505 req = rpcrdma_buffer_get_frmrs(req, buffers, &stale);
1506 break;
Chuck Leverc2922c02014-07-29 17:24:36 -04001507 case RPCRDMA_MTHCAFMR:
Chuck Leverddb6beb2014-07-29 17:24:54 -04001508 req = rpcrdma_buffer_get_fmrs(req, buffers);
Chuck Leverc2922c02014-07-29 17:24:36 -04001509 break;
1510 default:
1511 break;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001512 }
1513 spin_unlock_irqrestore(&buffers->rb_lock, flags);
Chuck Leverddb6beb2014-07-29 17:24:54 -04001514 if (!list_empty(&stale))
1515 rpcrdma_retry_flushed_linv(&stale, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001516 return req;
1517}
1518
1519/*
1520 * Put request/reply buffers back into pool.
1521 * Pre-decrement counter/array index.
1522 */
1523void
1524rpcrdma_buffer_put(struct rpcrdma_req *req)
1525{
1526 struct rpcrdma_buffer *buffers = req->rl_buffer;
1527 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001528 unsigned long flags;
1529
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001530 spin_lock_irqsave(&buffers->rb_lock, flags);
Chuck Leverc2922c02014-07-29 17:24:36 -04001531 rpcrdma_buffer_put_sendbuf(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001532 switch (ia->ri_memreg_strategy) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001533 case RPCRDMA_FRMR:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001534 case RPCRDMA_MTHCAFMR:
Chuck Leverc2922c02014-07-29 17:24:36 -04001535 rpcrdma_buffer_put_mrs(req, buffers);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001536 break;
1537 default:
1538 break;
1539 }
1540 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1541}
1542
1543/*
1544 * Recover reply buffers from pool.
1545 * This happens when recovering from error conditions.
1546 * Post-increment counter/array index.
1547 */
1548void
1549rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1550{
1551 struct rpcrdma_buffer *buffers = req->rl_buffer;
1552 unsigned long flags;
1553
1554 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1555 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1556 spin_lock_irqsave(&buffers->rb_lock, flags);
1557 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1558 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1559 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1560 }
1561 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1562}
1563
1564/*
1565 * Put reply buffers back into pool when not attached to
Chuck Leverb45ccfd2014-05-28 10:32:34 -04001566 * request. This happens in error conditions.
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001567 */
1568void
1569rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1570{
1571 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1572 unsigned long flags;
1573
1574 rep->rr_func = NULL;
1575 spin_lock_irqsave(&buffers->rb_lock, flags);
1576 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1577 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1578}
1579
1580/*
1581 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1582 */
1583
1584int
1585rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1586 struct ib_mr **mrp, struct ib_sge *iov)
1587{
1588 struct ib_phys_buf ipb;
1589 struct ib_mr *mr;
1590 int rc;
1591
1592 /*
1593 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1594 */
1595 iov->addr = ib_dma_map_single(ia->ri_id->device,
1596 va, len, DMA_BIDIRECTIONAL);
Yan Burmanbf858ab2014-06-19 16:06:30 +03001597 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1598 return -ENOMEM;
1599
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001600 iov->length = len;
1601
Tom Talpeybd7ed1d2008-10-09 15:00:09 -04001602 if (ia->ri_have_dma_lkey) {
1603 *mrp = NULL;
1604 iov->lkey = ia->ri_dma_lkey;
1605 return 0;
1606 } else if (ia->ri_bind_mem != NULL) {
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001607 *mrp = NULL;
1608 iov->lkey = ia->ri_bind_mem->lkey;
1609 return 0;
1610 }
1611
1612 ipb.addr = iov->addr;
1613 ipb.size = iov->length;
1614 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1615 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1616
1617 dprintk("RPC: %s: phys convert: 0x%llx "
1618 "registered 0x%llx length %d\n",
Andrew Mortona56daeb2007-10-16 01:29:57 -07001619 __func__, (unsigned long long)ipb.addr,
1620 (unsigned long long)iov->addr, len);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001621
1622 if (IS_ERR(mr)) {
1623 *mrp = NULL;
1624 rc = PTR_ERR(mr);
1625 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1626 } else {
1627 *mrp = mr;
1628 iov->lkey = mr->lkey;
1629 rc = 0;
1630 }
1631
1632 return rc;
1633}
1634
1635int
1636rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1637 struct ib_mr *mr, struct ib_sge *iov)
1638{
1639 int rc;
1640
1641 ib_dma_unmap_single(ia->ri_id->device,
1642 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1643
1644 if (NULL == mr)
1645 return 0;
1646
1647 rc = ib_dereg_mr(mr);
1648 if (rc)
1649 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1650 return rc;
1651}
1652
1653/*
1654 * Wrappers for chunk registration, shared by read/write chunk code.
1655 */
1656
1657static void
1658rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1659{
1660 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1661 seg->mr_dmalen = seg->mr_len;
1662 if (seg->mr_page)
1663 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1664 seg->mr_page, offset_in_page(seg->mr_offset),
1665 seg->mr_dmalen, seg->mr_dir);
1666 else
1667 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1668 seg->mr_offset,
1669 seg->mr_dmalen, seg->mr_dir);
Tom Tucker5c635e02011-02-09 19:45:34 +00001670 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1671 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1672 __func__,
Randy Dunlap986d4ab2011-03-15 17:11:59 -07001673 (unsigned long long)seg->mr_dma,
1674 seg->mr_offset, seg->mr_dmalen);
Tom Tucker5c635e02011-02-09 19:45:34 +00001675 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001676}
1677
1678static void
1679rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1680{
1681 if (seg->mr_page)
1682 ib_dma_unmap_page(ia->ri_id->device,
1683 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1684 else
1685 ib_dma_unmap_single(ia->ri_id->device,
1686 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1687}
1688
Tom Talpey8d4ba032008-10-09 14:59:49 -04001689static int
Tom Talpey3197d3092008-10-09 15:00:20 -04001690rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1691 int *nsegs, int writing, struct rpcrdma_ia *ia,
1692 struct rpcrdma_xprt *r_xprt)
1693{
1694 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001695 struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw;
1696 struct rpcrdma_frmr *frmr = &mw->r.frmr;
1697 struct ib_mr *mr = frmr->fr_mr;
Tom Tucker5c635e02011-02-09 19:45:34 +00001698 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1699
Tom Talpey3197d3092008-10-09 15:00:20 -04001700 u8 key;
1701 int len, pageoff;
1702 int i, rc;
Tom Tucker9b781452012-02-20 13:07:57 -06001703 int seg_len;
1704 u64 pa;
1705 int page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001706
1707 pageoff = offset_in_page(seg1->mr_offset);
1708 seg1->mr_offset -= pageoff; /* start of page */
1709 seg1->mr_len += pageoff;
1710 len = -pageoff;
Steve Wise0fc6c4e2014-05-28 10:32:00 -04001711 if (*nsegs > ia->ri_max_frmr_depth)
1712 *nsegs = ia->ri_max_frmr_depth;
Tom Tucker9b781452012-02-20 13:07:57 -06001713 for (page_no = i = 0; i < *nsegs;) {
Tom Talpey3197d3092008-10-09 15:00:20 -04001714 rpcrdma_map_one(ia, seg, writing);
Tom Tucker9b781452012-02-20 13:07:57 -06001715 pa = seg->mr_dma;
1716 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001717 frmr->fr_pgl->page_list[page_no++] = pa;
Tom Tucker9b781452012-02-20 13:07:57 -06001718 pa += PAGE_SIZE;
1719 }
Tom Talpey3197d3092008-10-09 15:00:20 -04001720 len += seg->mr_len;
1721 ++seg;
1722 ++i;
1723 /* Check for holes */
1724 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1725 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1726 break;
1727 }
1728 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001729 __func__, mw, i);
Tom Talpey3197d3092008-10-09 15:00:20 -04001730
Chuck Lever9f9d8022014-07-29 17:24:45 -04001731 if (unlikely(frmr->fr_state != FRMR_IS_INVALID)) {
Tom Tucker5c635e02011-02-09 19:45:34 +00001732 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
Chuck Lever0dbb4102014-07-29 17:24:09 -04001733 __func__, mr->rkey);
Tom Tucker5c635e02011-02-09 19:45:34 +00001734 /* Invalidate before using. */
1735 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Chuck Lever0dbb4102014-07-29 17:24:09 -04001736 invalidate_wr.wr_id = (unsigned long)(void *)mw;
Tom Tucker5c635e02011-02-09 19:45:34 +00001737 invalidate_wr.next = &frmr_wr;
1738 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1739 invalidate_wr.send_flags = IB_SEND_SIGNALED;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001740 invalidate_wr.ex.invalidate_rkey = mr->rkey;
Tom Tucker5c635e02011-02-09 19:45:34 +00001741 DECR_CQCOUNT(&r_xprt->rx_ep);
1742 post_wr = &invalidate_wr;
1743 } else
1744 post_wr = &frmr_wr;
1745
Tom Talpey3197d3092008-10-09 15:00:20 -04001746 /* Prepare FRMR WR */
1747 memset(&frmr_wr, 0, sizeof frmr_wr);
Chuck Lever0dbb4102014-07-29 17:24:09 -04001748 frmr_wr.wr_id = (unsigned long)(void *)mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001749 frmr_wr.opcode = IB_WR_FAST_REG_MR;
Tom Tucker5c635e02011-02-09 19:45:34 +00001750 frmr_wr.send_flags = IB_SEND_SIGNALED;
Steve Wise7a8b80eb2010-08-11 12:47:08 -04001751 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
Chuck Lever0dbb4102014-07-29 17:24:09 -04001752 frmr_wr.wr.fast_reg.page_list = frmr->fr_pgl;
Tom Tucker9b781452012-02-20 13:07:57 -06001753 frmr_wr.wr.fast_reg.page_list_len = page_no;
Tom Talpey3197d3092008-10-09 15:00:20 -04001754 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
Tom Tucker9b781452012-02-20 13:07:57 -06001755 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
Chuck Leverc977dea2014-05-28 10:35:06 -04001756 if (frmr_wr.wr.fast_reg.length < len) {
Chuck Lever5fc83f42014-07-29 17:23:17 -04001757 rc = -EIO;
1758 goto out_err;
Chuck Leverc977dea2014-05-28 10:35:06 -04001759 }
1760
1761 /* Bump the key */
Chuck Lever0dbb4102014-07-29 17:24:09 -04001762 key = (u8)(mr->rkey & 0x000000FF);
1763 ib_update_fast_reg_key(mr, ++key);
Chuck Leverc977dea2014-05-28 10:35:06 -04001764
Tom Talpey3197d3092008-10-09 15:00:20 -04001765 frmr_wr.wr.fast_reg.access_flags = (writing ?
Vu Pham68743082009-05-26 14:51:00 -04001766 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1767 IB_ACCESS_REMOTE_READ);
Chuck Lever0dbb4102014-07-29 17:24:09 -04001768 frmr_wr.wr.fast_reg.rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001769 DECR_CQCOUNT(&r_xprt->rx_ep);
1770
Tom Tucker5c635e02011-02-09 19:45:34 +00001771 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
Tom Talpey3197d3092008-10-09 15:00:20 -04001772
1773 if (rc) {
1774 dprintk("RPC: %s: failed ib_post_send for register,"
1775 " status %i\n", __func__, rc);
Chuck Leverc93e9862014-07-29 17:24:19 -04001776 ib_update_fast_reg_key(mr, --key);
Chuck Lever5fc83f42014-07-29 17:23:17 -04001777 goto out_err;
Tom Talpey3197d3092008-10-09 15:00:20 -04001778 } else {
Chuck Lever0dbb4102014-07-29 17:24:09 -04001779 seg1->mr_rkey = mr->rkey;
Tom Talpey3197d3092008-10-09 15:00:20 -04001780 seg1->mr_base = seg1->mr_dma + pageoff;
1781 seg1->mr_nsegs = i;
1782 seg1->mr_len = len;
1783 }
1784 *nsegs = i;
Chuck Lever5fc83f42014-07-29 17:23:17 -04001785 return 0;
1786out_err:
1787 while (i--)
1788 rpcrdma_unmap_one(ia, --seg);
Tom Talpey3197d3092008-10-09 15:00:20 -04001789 return rc;
1790}
1791
1792static int
1793rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1794 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1795{
1796 struct rpcrdma_mr_seg *seg1 = seg;
1797 struct ib_send_wr invalidate_wr, *bad_wr;
1798 int rc;
1799
Tom Talpey3197d3092008-10-09 15:00:20 -04001800 memset(&invalidate_wr, 0, sizeof invalidate_wr);
Tom Tucker5c635e02011-02-09 19:45:34 +00001801 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
Tom Talpey3197d3092008-10-09 15:00:20 -04001802 invalidate_wr.opcode = IB_WR_LOCAL_INV;
Tom Tucker5c635e02011-02-09 19:45:34 +00001803 invalidate_wr.send_flags = IB_SEND_SIGNALED;
Tom Talpey3197d3092008-10-09 15:00:20 -04001804 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1805 DECR_CQCOUNT(&r_xprt->rx_ep);
1806
Chuck Lever73806c82014-07-29 17:23:25 -04001807 read_lock(&ia->ri_qplock);
1808 while (seg1->mr_nsegs--)
1809 rpcrdma_unmap_one(ia, seg++);
Tom Talpey3197d3092008-10-09 15:00:20 -04001810 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
Chuck Lever73806c82014-07-29 17:23:25 -04001811 read_unlock(&ia->ri_qplock);
Tom Talpey3197d3092008-10-09 15:00:20 -04001812 if (rc)
1813 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1814 " status %i\n", __func__, rc);
1815 return rc;
1816}
1817
1818static int
Tom Talpey8d4ba032008-10-09 14:59:49 -04001819rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1820 int *nsegs, int writing, struct rpcrdma_ia *ia)
1821{
1822 struct rpcrdma_mr_seg *seg1 = seg;
1823 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1824 int len, pageoff, i, rc;
1825
1826 pageoff = offset_in_page(seg1->mr_offset);
1827 seg1->mr_offset -= pageoff; /* start of page */
1828 seg1->mr_len += pageoff;
1829 len = -pageoff;
1830 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1831 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1832 for (i = 0; i < *nsegs;) {
1833 rpcrdma_map_one(ia, seg, writing);
1834 physaddrs[i] = seg->mr_dma;
1835 len += seg->mr_len;
1836 ++seg;
1837 ++i;
1838 /* Check for holes */
1839 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1840 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1841 break;
1842 }
1843 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1844 physaddrs, i, seg1->mr_dma);
1845 if (rc) {
1846 dprintk("RPC: %s: failed ib_map_phys_fmr "
1847 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1848 len, (unsigned long long)seg1->mr_dma,
1849 pageoff, i, rc);
1850 while (i--)
1851 rpcrdma_unmap_one(ia, --seg);
1852 } else {
1853 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1854 seg1->mr_base = seg1->mr_dma + pageoff;
1855 seg1->mr_nsegs = i;
1856 seg1->mr_len = len;
1857 }
1858 *nsegs = i;
1859 return rc;
1860}
1861
1862static int
1863rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1864 struct rpcrdma_ia *ia)
1865{
1866 struct rpcrdma_mr_seg *seg1 = seg;
1867 LIST_HEAD(l);
1868 int rc;
1869
1870 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1871 rc = ib_unmap_fmr(&l);
Chuck Lever73806c82014-07-29 17:23:25 -04001872 read_lock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001873 while (seg1->mr_nsegs--)
1874 rpcrdma_unmap_one(ia, seg++);
Chuck Lever73806c82014-07-29 17:23:25 -04001875 read_unlock(&ia->ri_qplock);
Tom Talpey8d4ba032008-10-09 14:59:49 -04001876 if (rc)
1877 dprintk("RPC: %s: failed ib_unmap_fmr,"
1878 " status %i\n", __func__, rc);
1879 return rc;
1880}
1881
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001882int
1883rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1884 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1885{
1886 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001887 int rc = 0;
1888
1889 switch (ia->ri_memreg_strategy) {
1890
1891#if RPCRDMA_PERSISTENT_REGISTRATION
1892 case RPCRDMA_ALLPHYSICAL:
1893 rpcrdma_map_one(ia, seg, writing);
1894 seg->mr_rkey = ia->ri_bind_mem->rkey;
1895 seg->mr_base = seg->mr_dma;
1896 seg->mr_nsegs = 1;
1897 nsegs = 1;
1898 break;
1899#endif
1900
Tom Talpey3197d3092008-10-09 15:00:20 -04001901 /* Registration using frmr registration */
1902 case RPCRDMA_FRMR:
1903 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1904 break;
1905
Tom Talpey8d4ba032008-10-09 14:59:49 -04001906 /* Registration using fmr memory registration */
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001907 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001908 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001909 break;
1910
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001911 default:
Chuck Lever0ac531c2014-05-28 10:32:43 -04001912 return -1;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001913 }
1914 if (rc)
1915 return -1;
1916
1917 return nsegs;
1918}
1919
1920int
1921rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
Chuck Lever13c9ff82014-05-28 10:33:08 -04001922 struct rpcrdma_xprt *r_xprt)
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001923{
1924 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001925 int nsegs = seg->mr_nsegs, rc;
1926
1927 switch (ia->ri_memreg_strategy) {
1928
1929#if RPCRDMA_PERSISTENT_REGISTRATION
1930 case RPCRDMA_ALLPHYSICAL:
Chuck Lever73806c82014-07-29 17:23:25 -04001931 read_lock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001932 rpcrdma_unmap_one(ia, seg);
Chuck Lever73806c82014-07-29 17:23:25 -04001933 read_unlock(&ia->ri_qplock);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001934 break;
1935#endif
1936
Tom Talpey3197d3092008-10-09 15:00:20 -04001937 case RPCRDMA_FRMR:
1938 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1939 break;
1940
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001941 case RPCRDMA_MTHCAFMR:
Tom Talpey8d4ba032008-10-09 14:59:49 -04001942 rc = rpcrdma_deregister_fmr_external(seg, ia);
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001943 break;
1944
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001945 default:
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001946 break;
1947 }
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001948 return nsegs;
1949}
1950
1951/*
1952 * Prepost any receive buffer, then post send.
1953 *
1954 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1955 */
1956int
1957rpcrdma_ep_post(struct rpcrdma_ia *ia,
1958 struct rpcrdma_ep *ep,
1959 struct rpcrdma_req *req)
1960{
1961 struct ib_send_wr send_wr, *send_wr_fail;
1962 struct rpcrdma_rep *rep = req->rl_reply;
1963 int rc;
1964
1965 if (rep) {
1966 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1967 if (rc)
1968 goto out;
1969 req->rl_reply = NULL;
1970 }
1971
1972 send_wr.next = NULL;
1973 send_wr.wr_id = 0ULL; /* no send cookie */
1974 send_wr.sg_list = req->rl_send_iov;
1975 send_wr.num_sge = req->rl_niovs;
1976 send_wr.opcode = IB_WR_SEND;
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04001977 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1978 ib_dma_sync_single_for_device(ia->ri_id->device,
1979 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1980 DMA_TO_DEVICE);
1981 ib_dma_sync_single_for_device(ia->ri_id->device,
1982 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1983 DMA_TO_DEVICE);
1984 ib_dma_sync_single_for_device(ia->ri_id->device,
1985 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1986 DMA_TO_DEVICE);
1987
1988 if (DECR_CQCOUNT(ep) > 0)
1989 send_wr.send_flags = 0;
1990 else { /* Provider must take a send completion every now and then */
1991 INIT_CQCOUNT(ep);
1992 send_wr.send_flags = IB_SEND_SIGNALED;
1993 }
1994
1995 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1996 if (rc)
1997 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1998 rc);
1999out:
2000 return rc;
2001}
2002
2003/*
2004 * (Re)post a receive buffer.
2005 */
2006int
2007rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
2008 struct rpcrdma_ep *ep,
2009 struct rpcrdma_rep *rep)
2010{
2011 struct ib_recv_wr recv_wr, *recv_wr_fail;
2012 int rc;
2013
2014 recv_wr.next = NULL;
2015 recv_wr.wr_id = (u64) (unsigned long) rep;
2016 recv_wr.sg_list = &rep->rr_iov;
2017 recv_wr.num_sge = 1;
2018
2019 ib_dma_sync_single_for_cpu(ia->ri_id->device,
2020 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
2021
\"Talpey, Thomas\c56c65f2007-09-10 13:51:18 -04002022 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
2023
2024 if (rc)
2025 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
2026 rc);
2027 return rc;
2028}
Chuck Lever43e95982014-07-29 17:23:34 -04002029
2030/* Physical mapping means one Read/Write list entry per-page.
2031 * All list entries must fit within an inline buffer
2032 *
2033 * NB: The server must return a Write list for NFS READ,
2034 * which has the same constraint. Factor in the inline
2035 * rsize as well.
2036 */
2037static size_t
2038rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
2039{
2040 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
2041 unsigned int inline_size, pages;
2042
2043 inline_size = min_t(unsigned int,
2044 cdata->inline_wsize, cdata->inline_rsize);
2045 inline_size -= RPCRDMA_HDRLEN_MIN;
2046 pages = inline_size / sizeof(struct rpcrdma_segment);
2047 return pages << PAGE_SHIFT;
2048}
2049
2050static size_t
2051rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
2052{
2053 return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
2054}
2055
2056size_t
2057rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
2058{
2059 size_t result;
2060
2061 switch (r_xprt->rx_ia.ri_memreg_strategy) {
2062 case RPCRDMA_ALLPHYSICAL:
2063 result = rpcrdma_physical_max_payload(r_xprt);
2064 break;
2065 default:
2066 result = rpcrdma_mr_max_payload(r_xprt);
2067 }
2068 return result;
2069}