blob: 5c7d235672fa666b8d21a34b96199b66bb27906b [file] [log] [blame]
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -04001/*
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40/*
41 * transport.c
42 *
43 * This file contains the top-level implementation of an RPC RDMA
44 * transport.
45 *
46 * Naming convention: functions beginning with xprt_ are part of the
47 * transport switch. All others are RPC RDMA internal.
48 */
49
50#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090051#include <linux/slab.h>
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040052#include <linux/seq_file.h>
Jeff Layton59766872013-02-04 12:50:00 -050053#include <linux/sunrpc/addr.h>
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040054
55#include "xprt_rdma.h"
56
Jeff Laytonf895b252014-11-17 16:58:04 -050057#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040058# define RPCDBG_FACILITY RPCDBG_TRANS
59#endif
60
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040061/*
62 * tunables
63 */
64
65static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
Chuck Lever5d252f92016-01-07 14:50:10 -050066unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040067static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
68static unsigned int xprt_rdma_inline_write_padding;
Tom Talpey3197d3092008-10-09 15:00:20 -040069static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
Chuck Leverd5440e22014-11-08 20:14:53 -050070 int xprt_rdma_pad_optimize = 1;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040071
Jeff Laytonf895b252014-11-17 16:58:04 -050072#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040073
74static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
75static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
76static unsigned int zero;
77static unsigned int max_padding = PAGE_SIZE;
78static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
79static unsigned int max_memreg = RPCRDMA_LAST - 1;
80
81static struct ctl_table_header *sunrpc_table_header;
82
Joe Perchesfe2c6332013-06-11 23:04:25 -070083static struct ctl_table xr_tunables_table[] = {
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040084 {
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040085 .procname = "rdma_slot_table_entries",
86 .data = &xprt_rdma_slot_table_entries,
87 .maxlen = sizeof(unsigned int),
88 .mode = 0644,
Eric W. Biederman6d456112009-11-16 03:11:48 -080089 .proc_handler = proc_dointvec_minmax,
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040090 .extra1 = &min_slot_table_size,
91 .extra2 = &max_slot_table_size
92 },
93 {
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040094 .procname = "rdma_max_inline_read",
95 .data = &xprt_rdma_max_inline_read,
96 .maxlen = sizeof(unsigned int),
97 .mode = 0644,
Eric W. Biederman6d456112009-11-16 03:11:48 -080098 .proc_handler = proc_dointvec,
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -040099 },
100 {
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400101 .procname = "rdma_max_inline_write",
102 .data = &xprt_rdma_max_inline_write,
103 .maxlen = sizeof(unsigned int),
104 .mode = 0644,
Eric W. Biederman6d456112009-11-16 03:11:48 -0800105 .proc_handler = proc_dointvec,
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400106 },
107 {
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400108 .procname = "rdma_inline_write_padding",
109 .data = &xprt_rdma_inline_write_padding,
110 .maxlen = sizeof(unsigned int),
111 .mode = 0644,
Eric W. Biederman6d456112009-11-16 03:11:48 -0800112 .proc_handler = proc_dointvec_minmax,
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400113 .extra1 = &zero,
114 .extra2 = &max_padding,
115 },
116 {
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400117 .procname = "rdma_memreg_strategy",
118 .data = &xprt_rdma_memreg_strategy,
119 .maxlen = sizeof(unsigned int),
120 .mode = 0644,
Eric W. Biederman6d456112009-11-16 03:11:48 -0800121 .proc_handler = proc_dointvec_minmax,
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400122 .extra1 = &min_memreg,
123 .extra2 = &max_memreg,
124 },
125 {
Tom Talpey9191ca32008-10-09 15:01:11 -0400126 .procname = "rdma_pad_optimize",
127 .data = &xprt_rdma_pad_optimize,
128 .maxlen = sizeof(unsigned int),
129 .mode = 0644,
Eric W. Biederman6d456112009-11-16 03:11:48 -0800130 .proc_handler = proc_dointvec,
Tom Talpey9191ca32008-10-09 15:01:11 -0400131 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -0800132 { },
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400133};
134
Joe Perchesfe2c6332013-06-11 23:04:25 -0700135static struct ctl_table sunrpc_table[] = {
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400136 {
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400137 .procname = "sunrpc",
138 .mode = 0555,
139 .child = xr_tunables_table
140 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -0800141 { },
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400142};
143
144#endif
145
Chuck Lever5d252f92016-01-07 14:50:10 -0500146static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400147
148static void
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400149xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
150{
151 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
152 char buf[20];
153
154 snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
155 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
156
157 xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA;
158}
159
160static void
161xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
162{
163 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
164 char buf[40];
165
166 snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
167 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
168
169 xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
170}
171
Chuck Lever5d252f92016-01-07 14:50:10 -0500172void
Chuck Lever5231eb92015-08-03 13:02:41 -0400173xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400174{
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400175 char buf[128];
176
177 switch (sap->sa_family) {
178 case AF_INET:
179 xprt_rdma_format_addresses4(xprt, sap);
180 break;
181 case AF_INET6:
182 xprt_rdma_format_addresses6(xprt, sap);
183 break;
184 default:
185 pr_err("rpcrdma: Unrecognized address family\n");
186 return;
187 }
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400188
Chuck Leverc877b842009-08-09 15:09:36 -0400189 (void)rpc_ntop(sap, buf, sizeof(buf));
190 xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400191
Joe Perches81160e662010-03-08 12:15:59 -0800192 snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
Chuck Leverc877b842009-08-09 15:09:36 -0400193 xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400194
Joe Perches81160e662010-03-08 12:15:59 -0800195 snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
Chuck Leverc877b842009-08-09 15:09:36 -0400196 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400197
Chuck Lever0dd39ca2015-03-30 14:33:43 -0400198 xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400199}
200
Chuck Lever5d252f92016-01-07 14:50:10 -0500201void
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400202xprt_rdma_free_addresses(struct rpc_xprt *xprt)
203{
Chuck Lever33e01dc2008-01-14 12:32:20 -0500204 unsigned int i;
205
206 for (i = 0; i < RPC_DISPLAY_MAX; i++)
207 switch (i) {
208 case RPC_DISPLAY_PROTO:
209 case RPC_DISPLAY_NETID:
210 continue;
211 default:
212 kfree(xprt->address_strings[i]);
213 }
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400214}
215
216static void
217xprt_rdma_connect_worker(struct work_struct *work)
218{
Chuck Lever5abefb82015-01-21 11:02:37 -0500219 struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
220 rx_connect_worker.work);
221 struct rpc_xprt *xprt = &r_xprt->rx_xprt;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400222 int rc = 0;
223
Trond Myklebustd19751e2012-09-11 17:21:25 -0400224 xprt_clear_connected(xprt);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400225
Trond Myklebustd19751e2012-09-11 17:21:25 -0400226 dprintk("RPC: %s: %sconnect\n", __func__,
227 r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
228 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
229 if (rc)
230 xprt_wake_pending_tasks(xprt, rc);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400231
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400232 dprintk("RPC: %s: exit\n", __func__);
233 xprt_clear_connecting(xprt);
234}
235
Chuck Lever4a068252015-05-11 14:02:25 -0400236static void
237xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
238{
239 struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
240 rx_xprt);
241
242 pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt);
243 rdma_disconnect(r_xprt->rx_ia.ri_id);
244}
245
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400246/*
247 * xprt_rdma_destroy
248 *
249 * Destroy the xprt.
250 * Free all memory associated with the object, including its own.
251 * NOTE: none of the *destroy methods free memory for their top-level
252 * objects, even though they may have allocated it (they do free
253 * private memory). It's up to the caller to handle it. In this
254 * case (RDMA transport), all structure memory is inlined with the
255 * struct rpcrdma_xprt.
256 */
257static void
258xprt_rdma_destroy(struct rpc_xprt *xprt)
259{
260 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400261
262 dprintk("RPC: %s: called\n", __func__);
263
Chuck Lever5abefb82015-01-21 11:02:37 -0500264 cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400265
266 xprt_clear_connected(xprt);
267
Chuck Lever7f1d5412014-05-28 10:33:16 -0400268 rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
Steve Wise72c02172015-09-21 12:24:23 -0500269 rpcrdma_buffer_destroy(&r_xprt->rx_buf);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400270 rpcrdma_ia_close(&r_xprt->rx_ia);
271
272 xprt_rdma_free_addresses(xprt);
273
Pavel Emelyanove204e622010-09-29 16:03:13 +0400274 xprt_free(xprt);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400275
276 dprintk("RPC: %s: returning\n", __func__);
277
278 module_put(THIS_MODULE);
279}
280
Trond Myklebust2881ae72007-12-20 16:03:54 -0500281static const struct rpc_timeout xprt_rdma_default_timeout = {
282 .to_initval = 60 * HZ,
283 .to_maxval = 60 * HZ,
284};
285
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400286/**
287 * xprt_setup_rdma - Set up transport to use RDMA
288 *
289 * @args: rpc transport arguments
290 */
291static struct rpc_xprt *
292xprt_setup_rdma(struct xprt_create *args)
293{
294 struct rpcrdma_create_data_internal cdata;
295 struct rpc_xprt *xprt;
296 struct rpcrdma_xprt *new_xprt;
297 struct rpcrdma_ep *new_ep;
Chuck Lever5231eb92015-08-03 13:02:41 -0400298 struct sockaddr *sap;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400299 int rc;
300
301 if (args->addrlen > sizeof(xprt->addr)) {
302 dprintk("RPC: %s: address too large\n", __func__);
303 return ERR_PTR(-EBADF);
304 }
305
Pavel Emelyanov37aa2132010-09-29 16:05:43 +0400306 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
Trond Myklebustd9ba1312011-07-17 18:11:30 -0400307 xprt_rdma_slot_table_entries,
Pavel Emelyanovbd1722d2010-09-29 16:02:43 +0400308 xprt_rdma_slot_table_entries);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400309 if (xprt == NULL) {
310 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
311 __func__);
312 return ERR_PTR(-ENOMEM);
313 }
314
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400315 /* 60 second timeout, no retries */
Trond Myklebustba7392b2007-12-20 16:03:55 -0500316 xprt->timeout = &xprt_rdma_default_timeout;
Chuck Leverbfaee092014-05-28 10:34:32 -0400317 xprt->bind_timeout = RPCRDMA_BIND_TO;
318 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
319 xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400320
321 xprt->resvport = 0; /* privileged port not needed */
322 xprt->tsh_size = 0; /* RPC-RDMA handles framing */
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400323 xprt->ops = &xprt_rdma_procs;
324
325 /*
326 * Set up RDMA-specific connect data.
327 */
328
Chuck Lever5231eb92015-08-03 13:02:41 -0400329 sap = (struct sockaddr *)&cdata.addr;
330 memcpy(sap, args->dstaddr, args->addrlen);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400331
332 /* Ensure xprt->addr holds valid server TCP (not RDMA)
333 * address, for any side protocols which peek at it */
334 xprt->prot = IPPROTO_TCP;
335 xprt->addrlen = args->addrlen;
Chuck Lever5231eb92015-08-03 13:02:41 -0400336 memcpy(&xprt->addr, sap, xprt->addrlen);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400337
Chuck Lever5231eb92015-08-03 13:02:41 -0400338 if (rpc_get_port(sap))
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400339 xprt_set_bound(xprt);
340
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400341 cdata.max_requests = xprt->max_reqs;
342
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400343 cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
344 cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
345
346 cdata.inline_wsize = xprt_rdma_max_inline_write;
347 if (cdata.inline_wsize > cdata.wsize)
348 cdata.inline_wsize = cdata.wsize;
349
350 cdata.inline_rsize = xprt_rdma_max_inline_read;
351 if (cdata.inline_rsize > cdata.rsize)
352 cdata.inline_rsize = cdata.rsize;
353
354 cdata.padding = xprt_rdma_inline_write_padding;
355
356 /*
357 * Create new transport instance, which includes initialized
358 * o ia
359 * o endpoint
360 * o buffers
361 */
362
363 new_xprt = rpcx_to_rdmax(xprt);
364
Chuck Lever5231eb92015-08-03 13:02:41 -0400365 rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400366 if (rc)
367 goto out1;
368
369 /*
370 * initialize and create ep
371 */
372 new_xprt->rx_data = cdata;
373 new_ep = &new_xprt->rx_ep;
374 new_ep->rep_remote_addr = cdata.addr;
375
376 rc = rpcrdma_ep_create(&new_xprt->rx_ep,
377 &new_xprt->rx_ia, &new_xprt->rx_data);
378 if (rc)
379 goto out2;
380
381 /*
382 * Allocate pre-registered send and receive buffers for headers and
383 * any inline data. Also specify any padding which will be provided
384 * from a preregistered zero buffer.
385 */
Chuck Leverac920d02015-01-21 11:03:44 -0500386 rc = rpcrdma_buffer_create(new_xprt);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400387 if (rc)
388 goto out3;
389
390 /*
391 * Register a callback for connection events. This is necessary because
392 * connection loss notification is async. We also catch connection loss
393 * when reaping receives.
394 */
Chuck Lever5abefb82015-01-21 11:02:37 -0500395 INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
396 xprt_rdma_connect_worker);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400397
Chuck Lever5231eb92015-08-03 13:02:41 -0400398 xprt_rdma_format_addresses(xprt, sap);
Chuck Lever1c9351e2015-03-30 14:34:30 -0400399 xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
400 if (xprt->max_payload == 0)
401 goto out4;
402 xprt->max_payload <<= PAGE_SHIFT;
Chuck Lever43e95982014-07-29 17:23:34 -0400403 dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
404 __func__, xprt->max_payload);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400405
406 if (!try_module_get(THIS_MODULE))
407 goto out4;
408
Chuck Lever5231eb92015-08-03 13:02:41 -0400409 dprintk("RPC: %s: %s:%s\n", __func__,
410 xprt->address_strings[RPC_DISPLAY_ADDR],
411 xprt->address_strings[RPC_DISPLAY_PORT]);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400412 return xprt;
413
414out4:
415 xprt_rdma_free_addresses(xprt);
416 rc = -EINVAL;
417out3:
Chuck Lever7f1d5412014-05-28 10:33:16 -0400418 rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400419out2:
420 rpcrdma_ia_close(&new_xprt->rx_ia);
421out1:
Pavel Emelyanove204e622010-09-29 16:03:13 +0400422 xprt_free(xprt);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400423 return ERR_PTR(rc);
424}
425
426/*
427 * Close a connection, during shutdown or timeout/reconnect
428 */
429static void
430xprt_rdma_close(struct rpc_xprt *xprt)
431{
432 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
433
434 dprintk("RPC: %s: closing\n", __func__);
Tom Talpey08ca0dc2008-10-10 11:32:34 -0400435 if (r_xprt->rx_ep.rep_connected > 0)
436 xprt->reestablish_timeout = 0;
Trond Myklebust62da3b22007-11-06 18:44:20 -0500437 xprt_disconnect_done(xprt);
Chuck Lever282191c2014-07-29 17:25:55 -0400438 rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400439}
440
441static void
442xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
443{
444 struct sockaddr_in *sap;
445
446 sap = (struct sockaddr_in *)&xprt->addr;
447 sap->sin_port = htons(port);
448 sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
449 sap->sin_port = htons(port);
450 dprintk("RPC: %s: %u\n", __func__, port);
451}
452
453static void
Trond Myklebust1b092092013-01-08 09:26:49 -0500454xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400455{
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400456 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
457
Trond Myklebust0b9e7942010-04-16 16:41:57 -0400458 if (r_xprt->rx_ep.rep_connected != 0) {
459 /* Reconnect */
Chuck Lever5abefb82015-01-21 11:02:37 -0500460 schedule_delayed_work(&r_xprt->rx_connect_worker,
461 xprt->reestablish_timeout);
Trond Myklebust0b9e7942010-04-16 16:41:57 -0400462 xprt->reestablish_timeout <<= 1;
Chuck Leverbfaee092014-05-28 10:34:32 -0400463 if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
464 xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
465 else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
466 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
Trond Myklebust0b9e7942010-04-16 16:41:57 -0400467 } else {
Chuck Lever5abefb82015-01-21 11:02:37 -0500468 schedule_delayed_work(&r_xprt->rx_connect_worker, 0);
Trond Myklebust0b9e7942010-04-16 16:41:57 -0400469 if (!RPC_IS_ASYNC(task))
Chuck Lever5abefb82015-01-21 11:02:37 -0500470 flush_delayed_work(&r_xprt->rx_connect_worker);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400471 }
472}
473
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400474/*
475 * The RDMA allocate/free functions need the task structure as a place
476 * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500477 * sequence.
478 *
479 * The RPC layer allocates both send and receive buffers in the same call
480 * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer).
481 * We may register rq_rcv_buf when using reply chunks.
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400482 */
483static void *
484xprt_rdma_allocate(struct rpc_task *task, size_t size)
485{
Trond Myklebusta4f08352013-01-08 09:10:21 -0500486 struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500487 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
488 struct rpcrdma_regbuf *rb;
489 struct rpcrdma_req *req;
490 size_t min_size;
Chuck Levera0a1d502015-01-26 17:11:47 -0500491 gfp_t flags;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400492
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500493 req = rpcrdma_buffer_get(&r_xprt->rx_buf);
Chuck Leverc977dea2014-05-28 10:35:06 -0400494 if (req == NULL)
495 return NULL;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400496
Chuck Lever5d252f92016-01-07 14:50:10 -0500497 flags = RPCRDMA_DEF_GFP;
Chuck Levera0a1d502015-01-26 17:11:47 -0500498 if (RPC_IS_SWAPPER(task))
499 flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
500
Chuck Lever85275c82015-01-21 11:04:16 -0500501 if (req->rl_rdmabuf == NULL)
502 goto out_rdmabuf;
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500503 if (req->rl_sendbuf == NULL)
504 goto out_sendbuf;
505 if (size > req->rl_sendbuf->rg_size)
506 goto out_sendbuf;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400507
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500508out:
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400509 dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
Tom Talpey575448b2008-10-09 15:00:40 -0400510 req->rl_connect_cookie = 0; /* our reserved value */
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500511 return req->rl_sendbuf->rg_base;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400512
Chuck Lever85275c82015-01-21 11:04:16 -0500513out_rdmabuf:
514 min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
515 rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags);
516 if (IS_ERR(rb))
517 goto out_fail;
518 req->rl_rdmabuf = rb;
519
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500520out_sendbuf:
521 /* XDR encoding and RPC/RDMA marshaling of this request has not
522 * yet occurred. Thus a lower bound is needed to prevent buffer
523 * overrun during marshaling.
524 *
525 * RPC/RDMA marshaling may choose to send payload bearing ops
526 * inline, if the result is smaller than the inline threshold.
527 * The value of the "size" argument accounts for header
528 * requirements but not for the payload in these cases.
529 *
530 * Likewise, allocate enough space to receive a reply up to the
531 * size of the inline threshold.
532 *
533 * It's unlikely that both the send header and the received
534 * reply will be large, but slush is provided here to allow
535 * flexibility when marshaling.
536 */
537 min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp);
538 min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
539 if (size < min_size)
540 size = min_size;
541
542 rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags);
543 if (IS_ERR(rb))
544 goto out_fail;
545 rb->rg_owner = req;
546
547 r_xprt->rx_stats.hardway_register_count += size;
548 rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf);
549 req->rl_sendbuf = rb;
550 goto out;
551
552out_fail:
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400553 rpcrdma_buffer_put(req);
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500554 r_xprt->rx_stats.failed_marshal_count++;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400555 return NULL;
556}
557
558/*
559 * This function returns all RDMA resources to the pool.
560 */
561static void
562xprt_rdma_free(void *buffer)
563{
564 struct rpcrdma_req *req;
565 struct rpcrdma_xprt *r_xprt;
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500566 struct rpcrdma_regbuf *rb;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400567 int i;
568
569 if (buffer == NULL)
570 return;
571
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500572 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
573 req = rb->rg_owner;
574 r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400575
Chuck Lever0ca77dc2015-01-21 11:04:08 -0500576 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400577
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400578 for (i = 0; req->rl_nchunks;) {
579 --req->rl_nchunks;
Chuck Lever6814bae2015-03-30 14:34:48 -0400580 i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
581 &req->rl_segments[i]);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400582 }
583
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400584 rpcrdma_buffer_put(req);
585}
586
587/*
588 * send_request invokes the meat of RPC RDMA. It must do the following:
589 * 1. Marshal the RPC request into an RPC RDMA request, which means
590 * putting a header in front of data, and creating IOVs for RDMA
591 * from those in the request.
592 * 2. In marshaling, detect opportunities for RDMA, and use them.
593 * 3. Post a recv message to set up asynch completion, then send
594 * the request (rpcrdma_ep_post).
595 * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
596 */
597
598static int
599xprt_rdma_send_request(struct rpc_task *task)
600{
601 struct rpc_rqst *rqst = task->tk_rqstp;
Trond Myklebusta4f08352013-01-08 09:10:21 -0500602 struct rpc_xprt *xprt = rqst->rq_xprt;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400603 struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
604 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
Chuck Lever6ab59942014-07-29 17:23:43 -0400605 int rc = 0;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400606
Chuck Levere2377942015-03-30 14:33:53 -0400607 rc = rpcrdma_marshal_req(rqst);
Chuck Lever6ab59942014-07-29 17:23:43 -0400608 if (rc < 0)
609 goto failed_marshal;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400610
611 if (req->rl_reply == NULL) /* e.g. reconnection */
612 rpcrdma_recv_buffer_get(req);
613
Tom Talpey575448b2008-10-09 15:00:40 -0400614 /* Must suppress retransmit to maintain credits */
615 if (req->rl_connect_cookie == xprt->connect_cookie)
616 goto drop_connection;
617 req->rl_connect_cookie = xprt->connect_cookie;
618
619 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
620 goto drop_connection;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400621
Trond Myklebustd60dbb22010-05-13 12:51:49 -0400622 rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400623 rqst->rq_bytes_sent = 0;
624 return 0;
Tom Talpey575448b2008-10-09 15:00:40 -0400625
Chuck Leverc93c6222014-05-28 10:35:14 -0400626failed_marshal:
627 r_xprt->rx_stats.failed_marshal_count++;
628 dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
629 __func__, rc);
630 if (rc == -EIO)
631 return -EIO;
Tom Talpey575448b2008-10-09 15:00:40 -0400632drop_connection:
633 xprt_disconnect_done(xprt);
634 return -ENOTCONN; /* implies disconnect */
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400635}
636
Chuck Lever5d252f92016-01-07 14:50:10 -0500637void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400638{
639 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
640 long idle_time = 0;
641
642 if (xprt_connected(xprt))
643 idle_time = (long)(jiffies - xprt->last_used) / HZ;
644
Chuck Lever763f7e42015-08-03 13:04:36 -0400645 seq_puts(seq, "\txprt:\trdma ");
646 seq_printf(seq, "%u %lu %lu %lu %ld %lu %lu %lu %llu %llu ",
647 0, /* need a local port? */
648 xprt->stat.bind_count,
649 xprt->stat.connect_count,
650 xprt->stat.connect_time,
651 idle_time,
652 xprt->stat.sends,
653 xprt->stat.recvs,
654 xprt->stat.bad_xids,
655 xprt->stat.req_u,
656 xprt->stat.bklog_u);
Chuck Lever860477d2015-08-03 13:04:45 -0400657 seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %lu %lu %lu %lu\n",
Chuck Lever763f7e42015-08-03 13:04:36 -0400658 r_xprt->rx_stats.read_chunk_count,
659 r_xprt->rx_stats.write_chunk_count,
660 r_xprt->rx_stats.reply_chunk_count,
661 r_xprt->rx_stats.total_rdma_request,
662 r_xprt->rx_stats.total_rdma_reply,
663 r_xprt->rx_stats.pullup_copy_count,
664 r_xprt->rx_stats.fixup_copy_count,
665 r_xprt->rx_stats.hardway_register_count,
666 r_xprt->rx_stats.failed_marshal_count,
Chuck Lever860477d2015-08-03 13:04:45 -0400667 r_xprt->rx_stats.bad_reply_count,
668 r_xprt->rx_stats.nomsg_call_count);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400669}
670
Jeff Laytond67fa4d2015-06-03 16:14:29 -0400671static int
672xprt_rdma_enable_swap(struct rpc_xprt *xprt)
673{
Chuck Levera0451782015-10-24 17:26:29 -0400674 return 0;
Jeff Laytond67fa4d2015-06-03 16:14:29 -0400675}
676
677static void
678xprt_rdma_disable_swap(struct rpc_xprt *xprt)
679{
680}
681
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400682/*
683 * Plumbing for rpc transport switch and kernel module
684 */
685
686static struct rpc_xprt_ops xprt_rdma_procs = {
Chuck Levere7ce7102014-05-28 10:34:57 -0400687 .reserve_xprt = xprt_reserve_xprt_cong,
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400688 .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
Trond Myklebustf39c1bf2012-09-07 11:08:50 -0400689 .alloc_slot = xprt_alloc_slot,
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400690 .release_request = xprt_release_rqst_cong, /* ditto */
691 .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
692 .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
693 .set_port = xprt_rdma_set_port,
694 .connect = xprt_rdma_connect,
695 .buf_alloc = xprt_rdma_allocate,
696 .buf_free = xprt_rdma_free,
697 .send_request = xprt_rdma_send_request,
698 .close = xprt_rdma_close,
699 .destroy = xprt_rdma_destroy,
Jeff Laytond67fa4d2015-06-03 16:14:29 -0400700 .print_stats = xprt_rdma_print_stats,
701 .enable_swap = xprt_rdma_enable_swap,
702 .disable_swap = xprt_rdma_disable_swap,
Chuck Leverf531a5d2015-10-24 17:27:43 -0400703 .inject_disconnect = xprt_rdma_inject_disconnect,
704#if defined(CONFIG_SUNRPC_BACKCHANNEL)
705 .bc_setup = xprt_rdma_bc_setup,
Chuck Lever76566772015-10-24 17:28:32 -0400706 .bc_up = xprt_rdma_bc_up,
Chuck Leverf531a5d2015-10-24 17:27:43 -0400707 .bc_free_rqst = xprt_rdma_bc_free_rqst,
708 .bc_destroy = xprt_rdma_bc_destroy,
709#endif
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400710};
711
712static struct xprt_class xprt_rdma = {
713 .list = LIST_HEAD_INIT(xprt_rdma.list),
714 .name = "rdma",
715 .owner = THIS_MODULE,
716 .ident = XPRT_TRANSPORT_RDMA,
717 .setup = xprt_setup_rdma,
718};
719
Chuck Leverffe1f0d2015-06-04 11:21:42 -0400720void xprt_rdma_cleanup(void)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400721{
722 int rc;
723
Chuck Lever3a0799a2014-03-12 12:51:39 -0400724 dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
Jeff Laytonf895b252014-11-17 16:58:04 -0500725#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400726 if (sunrpc_table_header) {
727 unregister_sysctl_table(sunrpc_table_header);
728 sunrpc_table_header = NULL;
729 }
730#endif
731 rc = xprt_unregister_transport(&xprt_rdma);
732 if (rc)
733 dprintk("RPC: %s: xprt_unregister returned %i\n",
734 __func__, rc);
Chuck Lever951e7212015-05-26 11:52:25 -0400735
Chuck Leverfe97b472015-10-24 17:27:10 -0400736 rpcrdma_destroy_wq();
Chuck Lever951e7212015-05-26 11:52:25 -0400737 frwr_destroy_recovery_wq();
Chuck Lever5d252f92016-01-07 14:50:10 -0500738
739 rc = xprt_unregister_transport(&xprt_rdma_bc);
740 if (rc)
741 dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
742 __func__, rc);
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400743}
744
Chuck Leverffe1f0d2015-06-04 11:21:42 -0400745int xprt_rdma_init(void)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400746{
747 int rc;
748
Chuck Lever951e7212015-05-26 11:52:25 -0400749 rc = frwr_alloc_recovery_wq();
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400750 if (rc)
751 return rc;
752
Chuck Leverfe97b472015-10-24 17:27:10 -0400753 rc = rpcrdma_alloc_wq();
754 if (rc) {
755 frwr_destroy_recovery_wq();
756 return rc;
757 }
758
Chuck Lever951e7212015-05-26 11:52:25 -0400759 rc = xprt_register_transport(&xprt_rdma);
760 if (rc) {
Chuck Leverfe97b472015-10-24 17:27:10 -0400761 rpcrdma_destroy_wq();
Chuck Lever951e7212015-05-26 11:52:25 -0400762 frwr_destroy_recovery_wq();
763 return rc;
764 }
765
Chuck Lever5d252f92016-01-07 14:50:10 -0500766 rc = xprt_register_transport(&xprt_rdma_bc);
767 if (rc) {
768 xprt_unregister_transport(&xprt_rdma);
769 rpcrdma_destroy_wq();
770 frwr_destroy_recovery_wq();
771 return rc;
772 }
773
Chuck Lever3a0799a2014-03-12 12:51:39 -0400774 dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400775
Chuck Lever3a0799a2014-03-12 12:51:39 -0400776 dprintk("Defaults:\n");
777 dprintk("\tSlots %d\n"
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400778 "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
779 xprt_rdma_slot_table_entries,
780 xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
Chuck Lever3a0799a2014-03-12 12:51:39 -0400781 dprintk("\tPadding %d\n\tMemreg %d\n",
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400782 xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
783
Jeff Laytonf895b252014-11-17 16:58:04 -0500784#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
\"Talpey, Thomas\f58851e2007-09-10 13:50:12 -0400785 if (!sunrpc_table_header)
786 sunrpc_table_header = register_sysctl_table(sunrpc_table);
787#endif
788 return 0;
789}