blob: 8b6ce8ebe60f91cafb72ad949e77ebb7e406ee2e [file] [log] [blame]
Chuck Levera0ce85f2015-03-30 14:34:21 -04001/*
2 * Copyright (c) 2015 Oracle. All rights reserved.
3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
4 */
5
6/* Lightweight memory registration using Fast Memory Regions (FMR).
7 * Referred to sometimes as MTHCAFMR mode.
8 *
9 * FMR uses synchronous memory registration and deregistration.
10 * FMR registration is known to be fast, but FMR deregistration
11 * can take tens of usecs to complete.
12 */
13
Chuck Leverfc7fbb52015-05-26 11:52:16 -040014/* Normal operation
15 *
16 * A Memory Region is prepared for RDMA READ or WRITE using the
17 * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
18 * finished, the Memory Region is unmapped using the ib_unmap_fmr
19 * verb (fmr_op_unmap).
20 */
21
Chuck Levera0ce85f2015-03-30 14:34:21 -040022#include "xprt_rdma.h"
23
24#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
25# define RPCDBG_FACILITY RPCDBG_TRANS
26#endif
27
Chuck Lever1c9351e2015-03-30 14:34:30 -040028/* Maximum scatter/gather per FMR */
29#define RPCRDMA_MAX_FMR_SGES (64)
30
Chuck Leverd48b1d22016-06-29 13:52:29 -040031/* Access mode of externally registered pages */
32enum {
33 RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE |
34 IB_ACCESS_REMOTE_READ,
35};
36
Chuck Leverb54054c2016-06-29 13:53:27 -040037bool
38fmr_is_supported(struct rpcrdma_ia *ia)
39{
40 if (!ia->ri_device->alloc_fmr) {
41 pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n",
42 ia->ri_device->name);
43 return false;
44 }
45 return true;
46}
47
Chuck Leveread3f262016-05-02 14:42:46 -040048static int
Chuck Leverd48b1d22016-06-29 13:52:29 -040049__fmr_init(struct rpcrdma_mw *mw, struct ib_pd *pd)
50{
51 static struct ib_fmr_attr fmr_attr = {
52 .max_pages = RPCRDMA_MAX_FMR_SGES,
53 .max_maps = 1,
54 .page_shift = PAGE_SHIFT
55 };
56
Chuck Lever88975eb2016-06-29 13:52:37 -040057 mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
58 sizeof(u64), GFP_KERNEL);
59 if (!mw->fmr.fm_physaddrs)
Chuck Leverd48b1d22016-06-29 13:52:29 -040060 goto out_free;
61
62 mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
63 sizeof(*mw->mw_sg), GFP_KERNEL);
64 if (!mw->mw_sg)
65 goto out_free;
66
67 sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES);
68
Chuck Lever88975eb2016-06-29 13:52:37 -040069 mw->fmr.fm_mr = ib_alloc_fmr(pd, RPCRDMA_FMR_ACCESS_FLAGS,
70 &fmr_attr);
71 if (IS_ERR(mw->fmr.fm_mr))
Chuck Leverd48b1d22016-06-29 13:52:29 -040072 goto out_fmr_err;
73
74 return 0;
75
76out_fmr_err:
77 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
Chuck Lever88975eb2016-06-29 13:52:37 -040078 PTR_ERR(mw->fmr.fm_mr));
Chuck Leverd48b1d22016-06-29 13:52:29 -040079
80out_free:
81 kfree(mw->mw_sg);
Chuck Lever88975eb2016-06-29 13:52:37 -040082 kfree(mw->fmr.fm_physaddrs);
Chuck Leverd48b1d22016-06-29 13:52:29 -040083 return -ENOMEM;
84}
85
86static int
Chuck Leveread3f262016-05-02 14:42:46 -040087__fmr_unmap(struct rpcrdma_mw *mw)
88{
89 LIST_HEAD(l);
Chuck Lever38f1932e2016-06-29 13:52:12 -040090 int rc;
Chuck Leveread3f262016-05-02 14:42:46 -040091
Chuck Lever88975eb2016-06-29 13:52:37 -040092 list_add(&mw->fmr.fm_mr->list, &l);
Chuck Lever38f1932e2016-06-29 13:52:12 -040093 rc = ib_unmap_fmr(&l);
Chuck Lever88975eb2016-06-29 13:52:37 -040094 list_del_init(&mw->fmr.fm_mr->list);
Chuck Lever38f1932e2016-06-29 13:52:12 -040095 return rc;
Chuck Leveread3f262016-05-02 14:42:46 -040096}
97
Chuck Leverd48b1d22016-06-29 13:52:29 -040098static void
Chuck Leverd48b1d22016-06-29 13:52:29 -040099__fmr_release(struct rpcrdma_mw *r)
100{
Chuck Lever505bbe62016-06-29 13:52:54 -0400101 LIST_HEAD(unmap_list);
Chuck Leverd48b1d22016-06-29 13:52:29 -0400102 int rc;
103
Chuck Lever88975eb2016-06-29 13:52:37 -0400104 kfree(r->fmr.fm_physaddrs);
Chuck Leverd48b1d22016-06-29 13:52:29 -0400105 kfree(r->mw_sg);
106
Chuck Lever505bbe62016-06-29 13:52:54 -0400107 /* In case this one was left mapped, try to unmap it
108 * to prevent dealloc_fmr from failing with EBUSY
109 */
110 rc = __fmr_unmap(r);
111 if (rc)
112 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
113 r, rc);
114
Chuck Lever88975eb2016-06-29 13:52:37 -0400115 rc = ib_dealloc_fmr(r->fmr.fm_mr);
Chuck Leverd48b1d22016-06-29 13:52:29 -0400116 if (rc)
117 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
118 r, rc);
119}
120
Chuck Lever505bbe62016-06-29 13:52:54 -0400121/* Reset of a single FMR.
122 *
123 * There's no recovery if this fails. The FMR is abandoned, but
124 * remains in rb_all. It will be cleaned up when the transport is
125 * destroyed.
Chuck Leveread3f262016-05-02 14:42:46 -0400126 */
127static void
Chuck Lever505bbe62016-06-29 13:52:54 -0400128fmr_op_recover_mr(struct rpcrdma_mw *mw)
Chuck Leveread3f262016-05-02 14:42:46 -0400129{
Chuck Lever505bbe62016-06-29 13:52:54 -0400130 struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
131 int rc;
Chuck Leveread3f262016-05-02 14:42:46 -0400132
Chuck Lever505bbe62016-06-29 13:52:54 -0400133 /* ORDER: invalidate first */
134 rc = __fmr_unmap(mw);
Chuck Leveread3f262016-05-02 14:42:46 -0400135
Chuck Lever505bbe62016-06-29 13:52:54 -0400136 /* ORDER: then DMA unmap */
137 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
138 mw->mw_sg, mw->mw_nents, mw->mw_dir);
139 if (rc) {
140 pr_err("rpcrdma: FMR reset status %d, %p orphaned\n",
141 rc, mw);
142 r_xprt->rx_stats.mrs_orphaned++;
143 return;
144 }
145
146 rpcrdma_put_mw(r_xprt, mw);
147 r_xprt->rx_stats.mrs_recovered++;
Chuck Leveread3f262016-05-02 14:42:46 -0400148}
149
Chuck Lever3968cb52015-03-30 14:35:26 -0400150static int
151fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
152 struct rpcrdma_create_data_internal *cdata)
153{
Chuck Lever302d3de2016-05-02 14:41:05 -0400154 rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
155 RPCRDMA_MAX_DATA_SEGS /
156 RPCRDMA_MAX_FMR_SGES));
Chuck Lever3968cb52015-03-30 14:35:26 -0400157 return 0;
158}
159
Chuck Lever1c9351e2015-03-30 14:34:30 -0400160/* FMR mode conveys up to 64 pages of payload per chunk segment.
161 */
162static size_t
163fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
164{
165 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
Chuck Lever94931742016-05-02 14:40:56 -0400166 RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
Chuck Lever1c9351e2015-03-30 14:34:30 -0400167}
168
Chuck Lever91e70e72015-03-30 14:34:58 -0400169static int
170fmr_op_init(struct rpcrdma_xprt *r_xprt)
171{
172 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
Chuck Lever91e70e72015-03-30 14:34:58 -0400173 struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
174 struct rpcrdma_mw *r;
175 int i, rc;
176
Chuck Lever58d1dcf2015-05-26 11:53:13 -0400177 spin_lock_init(&buf->rb_mwlock);
Chuck Lever91e70e72015-03-30 14:34:58 -0400178 INIT_LIST_HEAD(&buf->rb_mws);
179 INIT_LIST_HEAD(&buf->rb_all);
180
Chuck Lever40c6ed02015-05-26 11:53:33 -0400181 i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
182 i += 2; /* head + tail */
183 i *= buf->rb_max_requests; /* one set for each RPC slot */
184 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
Chuck Lever91e70e72015-03-30 14:34:58 -0400185
186 while (i--) {
187 r = kzalloc(sizeof(*r), GFP_KERNEL);
188 if (!r)
Chuck Leverd48b1d22016-06-29 13:52:29 -0400189 return -ENOMEM;
Chuck Lever91e70e72015-03-30 14:34:58 -0400190
Chuck Leverd48b1d22016-06-29 13:52:29 -0400191 rc = __fmr_init(r, pd);
192 if (rc) {
193 kfree(r);
194 return rc;
195 }
Chuck Lever91e70e72015-03-30 14:34:58 -0400196
Chuck Leveread3f262016-05-02 14:42:46 -0400197 r->mw_xprt = r_xprt;
Chuck Lever91e70e72015-03-30 14:34:58 -0400198 list_add(&r->mw_list, &buf->rb_mws);
199 list_add(&r->mw_all, &buf->rb_all);
200 }
201 return 0;
Chuck Lever91e70e72015-03-30 14:34:58 -0400202}
203
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400204/* Use the ib_map_phys_fmr() verb to register a memory region
205 * for remote access via RDMA READ or RDMA WRITE.
206 */
207static int
208fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
209 int nsegs, bool writing)
210{
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400211 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400212 int len, pageoff, i, rc;
Chuck Leverfc7fbb52015-05-26 11:52:16 -0400213 struct rpcrdma_mw *mw;
Chuck Leverfcdfb962016-06-29 13:52:45 -0400214 u64 *dma_pages;
Chuck Leverfc7fbb52015-05-26 11:52:16 -0400215
216 mw = seg1->rl_mw;
217 seg1->rl_mw = NULL;
Chuck Lever505bbe62016-06-29 13:52:54 -0400218 if (mw)
219 rpcrdma_defer_mr_recovery(mw);
220 mw = rpcrdma_get_mw(r_xprt);
221 if (!mw)
222 return -ENOMEM;
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400223
224 pageoff = offset_in_page(seg1->mr_offset);
225 seg1->mr_offset -= pageoff; /* start of page */
226 seg1->mr_len += pageoff;
227 len = -pageoff;
228 if (nsegs > RPCRDMA_MAX_FMR_SGES)
229 nsegs = RPCRDMA_MAX_FMR_SGES;
230 for (i = 0; i < nsegs;) {
Chuck Leverfcdfb962016-06-29 13:52:45 -0400231 if (seg->mr_page)
232 sg_set_page(&mw->mw_sg[i],
233 seg->mr_page,
234 seg->mr_len,
235 offset_in_page(seg->mr_offset));
236 else
237 sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
238 seg->mr_len);
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400239 len += seg->mr_len;
240 ++seg;
241 ++i;
242 /* Check for holes */
243 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
244 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
245 break;
246 }
Chuck Leverfcdfb962016-06-29 13:52:45 -0400247 mw->mw_nents = i;
248 mw->mw_dir = rpcrdma_data_dir(writing);
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400249
Chuck Leverfcdfb962016-06-29 13:52:45 -0400250 if (!ib_dma_map_sg(r_xprt->rx_ia.ri_device,
251 mw->mw_sg, mw->mw_nents, mw->mw_dir))
252 goto out_dmamap_err;
253
254 for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
255 dma_pages[i] = sg_dma_address(&mw->mw_sg[i]);
256 rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents,
257 dma_pages[0]);
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400258 if (rc)
259 goto out_maperr;
260
Chuck Leverfc7fbb52015-05-26 11:52:16 -0400261 seg1->rl_mw = mw;
Chuck Lever88975eb2016-06-29 13:52:37 -0400262 seg1->mr_rkey = mw->fmr.fm_mr->rkey;
Chuck Leverfcdfb962016-06-29 13:52:45 -0400263 seg1->mr_base = dma_pages[0] + pageoff;
264 seg1->mr_nsegs = mw->mw_nents;
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400265 seg1->mr_len = len;
Chuck Leverfcdfb962016-06-29 13:52:45 -0400266 return mw->mw_nents;
267
268out_dmamap_err:
269 pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
270 mw->mw_sg, mw->mw_nents);
Chuck Lever42fe28f2016-06-29 13:53:02 -0400271 rpcrdma_defer_mr_recovery(mw);
Chuck Leverfcdfb962016-06-29 13:52:45 -0400272 return -ENOMEM;
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400273
274out_maperr:
Chuck Leverfcdfb962016-06-29 13:52:45 -0400275 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
276 len, (unsigned long long)dma_pages[0],
277 pageoff, mw->mw_nents, rc);
Chuck Lever505bbe62016-06-29 13:52:54 -0400278 rpcrdma_defer_mr_recovery(mw);
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400279 return rc;
280}
281
Chuck Lever7c7a5392015-12-16 17:22:55 -0500282/* Invalidate all memory regions that were registered for "req".
283 *
284 * Sleeps until it is safe for the host CPU to access the
285 * previously mapped memory regions.
286 */
287static void
288fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
289{
290 struct rpcrdma_mr_seg *seg;
291 unsigned int i, nchunks;
292 struct rpcrdma_mw *mw;
293 LIST_HEAD(unmap_list);
294 int rc;
295
296 dprintk("RPC: %s: req %p\n", __func__, req);
297
298 /* ORDER: Invalidate all of the req's MRs first
299 *
300 * ib_unmap_fmr() is slow, so use a single call instead
Chuck Lever505bbe62016-06-29 13:52:54 -0400301 * of one call per mapped FMR.
Chuck Lever7c7a5392015-12-16 17:22:55 -0500302 */
303 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
304 seg = &req->rl_segments[i];
305 mw = seg->rl_mw;
306
Chuck Lever88975eb2016-06-29 13:52:37 -0400307 list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
Chuck Lever7c7a5392015-12-16 17:22:55 -0500308
309 i += seg->mr_nsegs;
310 }
311 rc = ib_unmap_fmr(&unmap_list);
312 if (rc)
Chuck Lever505bbe62016-06-29 13:52:54 -0400313 goto out_reset;
Chuck Lever7c7a5392015-12-16 17:22:55 -0500314
315 /* ORDER: Now DMA unmap all of the req's MRs, and return
316 * them to the free MW list.
317 */
318 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
319 seg = &req->rl_segments[i];
Chuck Lever38f1932e2016-06-29 13:52:12 -0400320 mw = seg->rl_mw;
Chuck Lever7c7a5392015-12-16 17:22:55 -0500321
Chuck Lever88975eb2016-06-29 13:52:37 -0400322 list_del_init(&mw->fmr.fm_mr->list);
Chuck Lever505bbe62016-06-29 13:52:54 -0400323 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
324 mw->mw_sg, mw->mw_nents, mw->mw_dir);
325 rpcrdma_put_mw(r_xprt, mw);
Chuck Lever7c7a5392015-12-16 17:22:55 -0500326
327 i += seg->mr_nsegs;
328 seg->mr_nsegs = 0;
Chuck Lever763bc232016-05-02 14:42:38 -0400329 seg->rl_mw = NULL;
Chuck Lever7c7a5392015-12-16 17:22:55 -0500330 }
331
332 req->rl_nchunks = 0;
Chuck Lever505bbe62016-06-29 13:52:54 -0400333 return;
334
335out_reset:
336 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
337
338 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
339 seg = &req->rl_segments[i];
340 mw = seg->rl_mw;
341
342 list_del_init(&mw->fmr.fm_mr->list);
343 fmr_op_recover_mr(mw);
344
345 i += seg->mr_nsegs;
346 }
Chuck Lever7c7a5392015-12-16 17:22:55 -0500347}
348
Chuck Leveread3f262016-05-02 14:42:46 -0400349/* Use a slow, safe mechanism to invalidate all memory regions
350 * that were registered for "req".
Chuck Leveread3f262016-05-02 14:42:46 -0400351 */
352static void
353fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
354 bool sync)
355{
356 struct rpcrdma_mr_seg *seg;
357 struct rpcrdma_mw *mw;
358 unsigned int i;
359
360 for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
361 seg = &req->rl_segments[i];
362 mw = seg->rl_mw;
363
Chuck Leverfcdfb962016-06-29 13:52:45 -0400364 if (sync)
Chuck Lever505bbe62016-06-29 13:52:54 -0400365 fmr_op_recover_mr(mw);
Chuck Leverfcdfb962016-06-29 13:52:45 -0400366 else
Chuck Lever505bbe62016-06-29 13:52:54 -0400367 rpcrdma_defer_mr_recovery(mw);
Chuck Leveread3f262016-05-02 14:42:46 -0400368
369 i += seg->mr_nsegs;
370 seg->mr_nsegs = 0;
371 seg->rl_mw = NULL;
372 }
373}
374
Chuck Lever4561f342015-03-30 14:35:17 -0400375static void
376fmr_op_destroy(struct rpcrdma_buffer *buf)
377{
378 struct rpcrdma_mw *r;
Chuck Lever4561f342015-03-30 14:35:17 -0400379
380 while (!list_empty(&buf->rb_all)) {
381 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
382 list_del(&r->mw_all);
Chuck Leverd48b1d22016-06-29 13:52:29 -0400383 __fmr_release(r);
Chuck Lever4561f342015-03-30 14:35:17 -0400384 kfree(r);
385 }
386}
387
Chuck Levera0ce85f2015-03-30 14:34:21 -0400388const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400389 .ro_map = fmr_op_map,
Chuck Lever7c7a5392015-12-16 17:22:55 -0500390 .ro_unmap_sync = fmr_op_unmap_sync,
Chuck Leveread3f262016-05-02 14:42:46 -0400391 .ro_unmap_safe = fmr_op_unmap_safe,
Chuck Lever505bbe62016-06-29 13:52:54 -0400392 .ro_recover_mr = fmr_op_recover_mr,
Chuck Lever3968cb52015-03-30 14:35:26 -0400393 .ro_open = fmr_op_open,
Chuck Lever1c9351e2015-03-30 14:34:30 -0400394 .ro_maxpages = fmr_op_maxpages,
Chuck Lever91e70e72015-03-30 14:34:58 -0400395 .ro_init = fmr_op_init,
Chuck Lever4561f342015-03-30 14:35:17 -0400396 .ro_destroy = fmr_op_destroy,
Chuck Levera0ce85f2015-03-30 14:34:21 -0400397 .ro_displayname = "fmr",
398};