blob: 4837ced20b65c5cd7e5ba4c01bbbb1186ba4dfa5 [file] [log] [blame]
Chuck Levera0ce85f2015-03-30 14:34:21 -04001/*
2 * Copyright (c) 2015 Oracle. All rights reserved.
3 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
4 */
5
6/* Lightweight memory registration using Fast Memory Regions (FMR).
7 * Referred to sometimes as MTHCAFMR mode.
8 *
9 * FMR uses synchronous memory registration and deregistration.
10 * FMR registration is known to be fast, but FMR deregistration
11 * can take tens of usecs to complete.
12 */
13
Chuck Leverfc7fbb52015-05-26 11:52:16 -040014/* Normal operation
15 *
16 * A Memory Region is prepared for RDMA READ or WRITE using the
17 * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
18 * finished, the Memory Region is unmapped using the ib_unmap_fmr
19 * verb (fmr_op_unmap).
20 */
21
Chuck Levera0ce85f2015-03-30 14:34:21 -040022#include "xprt_rdma.h"
23
24#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
25# define RPCDBG_FACILITY RPCDBG_TRANS
26#endif
27
Chuck Lever1c9351e2015-03-30 14:34:30 -040028/* Maximum scatter/gather per FMR */
29#define RPCRDMA_MAX_FMR_SGES (64)
30
Chuck Leverd48b1d22016-06-29 13:52:29 -040031/* Access mode of externally registered pages */
32enum {
33 RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE |
34 IB_ACCESS_REMOTE_READ,
35};
36
Chuck Leveread3f262016-05-02 14:42:46 -040037static int
Chuck Leverd48b1d22016-06-29 13:52:29 -040038__fmr_init(struct rpcrdma_mw *mw, struct ib_pd *pd)
39{
40 static struct ib_fmr_attr fmr_attr = {
41 .max_pages = RPCRDMA_MAX_FMR_SGES,
42 .max_maps = 1,
43 .page_shift = PAGE_SHIFT
44 };
45
Chuck Lever88975eb2016-06-29 13:52:37 -040046 mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
47 sizeof(u64), GFP_KERNEL);
48 if (!mw->fmr.fm_physaddrs)
Chuck Leverd48b1d22016-06-29 13:52:29 -040049 goto out_free;
50
51 mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
52 sizeof(*mw->mw_sg), GFP_KERNEL);
53 if (!mw->mw_sg)
54 goto out_free;
55
56 sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES);
57
Chuck Lever88975eb2016-06-29 13:52:37 -040058 mw->fmr.fm_mr = ib_alloc_fmr(pd, RPCRDMA_FMR_ACCESS_FLAGS,
59 &fmr_attr);
60 if (IS_ERR(mw->fmr.fm_mr))
Chuck Leverd48b1d22016-06-29 13:52:29 -040061 goto out_fmr_err;
62
63 return 0;
64
65out_fmr_err:
66 dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
Chuck Lever88975eb2016-06-29 13:52:37 -040067 PTR_ERR(mw->fmr.fm_mr));
Chuck Leverd48b1d22016-06-29 13:52:29 -040068
69out_free:
70 kfree(mw->mw_sg);
Chuck Lever88975eb2016-06-29 13:52:37 -040071 kfree(mw->fmr.fm_physaddrs);
Chuck Leverd48b1d22016-06-29 13:52:29 -040072 return -ENOMEM;
73}
74
75static int
Chuck Leveread3f262016-05-02 14:42:46 -040076__fmr_unmap(struct rpcrdma_mw *mw)
77{
78 LIST_HEAD(l);
Chuck Lever38f1932e2016-06-29 13:52:12 -040079 int rc;
Chuck Leveread3f262016-05-02 14:42:46 -040080
Chuck Lever88975eb2016-06-29 13:52:37 -040081 list_add(&mw->fmr.fm_mr->list, &l);
Chuck Lever38f1932e2016-06-29 13:52:12 -040082 rc = ib_unmap_fmr(&l);
Chuck Lever88975eb2016-06-29 13:52:37 -040083 list_del_init(&mw->fmr.fm_mr->list);
Chuck Lever38f1932e2016-06-29 13:52:12 -040084 return rc;
Chuck Leveread3f262016-05-02 14:42:46 -040085}
86
Chuck Leverd48b1d22016-06-29 13:52:29 -040087static void
Chuck Leverd48b1d22016-06-29 13:52:29 -040088__fmr_release(struct rpcrdma_mw *r)
89{
Chuck Lever505bbe62016-06-29 13:52:54 -040090 LIST_HEAD(unmap_list);
Chuck Leverd48b1d22016-06-29 13:52:29 -040091 int rc;
92
Chuck Lever88975eb2016-06-29 13:52:37 -040093 kfree(r->fmr.fm_physaddrs);
Chuck Leverd48b1d22016-06-29 13:52:29 -040094 kfree(r->mw_sg);
95
Chuck Lever505bbe62016-06-29 13:52:54 -040096 /* In case this one was left mapped, try to unmap it
97 * to prevent dealloc_fmr from failing with EBUSY
98 */
99 rc = __fmr_unmap(r);
100 if (rc)
101 pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
102 r, rc);
103
Chuck Lever88975eb2016-06-29 13:52:37 -0400104 rc = ib_dealloc_fmr(r->fmr.fm_mr);
Chuck Leverd48b1d22016-06-29 13:52:29 -0400105 if (rc)
106 pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
107 r, rc);
108}
109
Chuck Lever505bbe62016-06-29 13:52:54 -0400110/* Reset of a single FMR.
111 *
112 * There's no recovery if this fails. The FMR is abandoned, but
113 * remains in rb_all. It will be cleaned up when the transport is
114 * destroyed.
Chuck Leveread3f262016-05-02 14:42:46 -0400115 */
116static void
Chuck Lever505bbe62016-06-29 13:52:54 -0400117fmr_op_recover_mr(struct rpcrdma_mw *mw)
Chuck Leveread3f262016-05-02 14:42:46 -0400118{
Chuck Lever505bbe62016-06-29 13:52:54 -0400119 struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
120 int rc;
Chuck Leveread3f262016-05-02 14:42:46 -0400121
Chuck Lever505bbe62016-06-29 13:52:54 -0400122 /* ORDER: invalidate first */
123 rc = __fmr_unmap(mw);
Chuck Leveread3f262016-05-02 14:42:46 -0400124
Chuck Lever505bbe62016-06-29 13:52:54 -0400125 /* ORDER: then DMA unmap */
126 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
127 mw->mw_sg, mw->mw_nents, mw->mw_dir);
128 if (rc) {
129 pr_err("rpcrdma: FMR reset status %d, %p orphaned\n",
130 rc, mw);
131 r_xprt->rx_stats.mrs_orphaned++;
132 return;
133 }
134
135 rpcrdma_put_mw(r_xprt, mw);
136 r_xprt->rx_stats.mrs_recovered++;
Chuck Leveread3f262016-05-02 14:42:46 -0400137}
138
Chuck Lever3968cb52015-03-30 14:35:26 -0400139static int
140fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
141 struct rpcrdma_create_data_internal *cdata)
142{
Chuck Lever302d3de2016-05-02 14:41:05 -0400143 rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
144 RPCRDMA_MAX_DATA_SEGS /
145 RPCRDMA_MAX_FMR_SGES));
Chuck Lever3968cb52015-03-30 14:35:26 -0400146 return 0;
147}
148
Chuck Lever1c9351e2015-03-30 14:34:30 -0400149/* FMR mode conveys up to 64 pages of payload per chunk segment.
150 */
151static size_t
152fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
153{
154 return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
Chuck Lever94931742016-05-02 14:40:56 -0400155 RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
Chuck Lever1c9351e2015-03-30 14:34:30 -0400156}
157
Chuck Lever91e70e72015-03-30 14:34:58 -0400158static int
159fmr_op_init(struct rpcrdma_xprt *r_xprt)
160{
161 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
Chuck Lever91e70e72015-03-30 14:34:58 -0400162 struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
163 struct rpcrdma_mw *r;
164 int i, rc;
165
Chuck Lever58d1dcf2015-05-26 11:53:13 -0400166 spin_lock_init(&buf->rb_mwlock);
Chuck Lever91e70e72015-03-30 14:34:58 -0400167 INIT_LIST_HEAD(&buf->rb_mws);
168 INIT_LIST_HEAD(&buf->rb_all);
169
Chuck Lever40c6ed02015-05-26 11:53:33 -0400170 i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
171 i += 2; /* head + tail */
172 i *= buf->rb_max_requests; /* one set for each RPC slot */
173 dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
Chuck Lever91e70e72015-03-30 14:34:58 -0400174
175 while (i--) {
176 r = kzalloc(sizeof(*r), GFP_KERNEL);
177 if (!r)
Chuck Leverd48b1d22016-06-29 13:52:29 -0400178 return -ENOMEM;
Chuck Lever91e70e72015-03-30 14:34:58 -0400179
Chuck Leverd48b1d22016-06-29 13:52:29 -0400180 rc = __fmr_init(r, pd);
181 if (rc) {
182 kfree(r);
183 return rc;
184 }
Chuck Lever91e70e72015-03-30 14:34:58 -0400185
Chuck Leveread3f262016-05-02 14:42:46 -0400186 r->mw_xprt = r_xprt;
Chuck Lever91e70e72015-03-30 14:34:58 -0400187 list_add(&r->mw_list, &buf->rb_mws);
188 list_add(&r->mw_all, &buf->rb_all);
189 }
190 return 0;
Chuck Lever91e70e72015-03-30 14:34:58 -0400191}
192
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400193/* Use the ib_map_phys_fmr() verb to register a memory region
194 * for remote access via RDMA READ or RDMA WRITE.
195 */
196static int
197fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
198 int nsegs, bool writing)
199{
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400200 struct rpcrdma_mr_seg *seg1 = seg;
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400201 int len, pageoff, i, rc;
Chuck Leverfc7fbb52015-05-26 11:52:16 -0400202 struct rpcrdma_mw *mw;
Chuck Leverfcdfb962016-06-29 13:52:45 -0400203 u64 *dma_pages;
Chuck Leverfc7fbb52015-05-26 11:52:16 -0400204
205 mw = seg1->rl_mw;
206 seg1->rl_mw = NULL;
Chuck Lever505bbe62016-06-29 13:52:54 -0400207 if (mw)
208 rpcrdma_defer_mr_recovery(mw);
209 mw = rpcrdma_get_mw(r_xprt);
210 if (!mw)
211 return -ENOMEM;
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400212
213 pageoff = offset_in_page(seg1->mr_offset);
214 seg1->mr_offset -= pageoff; /* start of page */
215 seg1->mr_len += pageoff;
216 len = -pageoff;
217 if (nsegs > RPCRDMA_MAX_FMR_SGES)
218 nsegs = RPCRDMA_MAX_FMR_SGES;
219 for (i = 0; i < nsegs;) {
Chuck Leverfcdfb962016-06-29 13:52:45 -0400220 if (seg->mr_page)
221 sg_set_page(&mw->mw_sg[i],
222 seg->mr_page,
223 seg->mr_len,
224 offset_in_page(seg->mr_offset));
225 else
226 sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
227 seg->mr_len);
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400228 len += seg->mr_len;
229 ++seg;
230 ++i;
231 /* Check for holes */
232 if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
233 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
234 break;
235 }
Chuck Leverfcdfb962016-06-29 13:52:45 -0400236 mw->mw_nents = i;
237 mw->mw_dir = rpcrdma_data_dir(writing);
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400238
Chuck Leverfcdfb962016-06-29 13:52:45 -0400239 if (!ib_dma_map_sg(r_xprt->rx_ia.ri_device,
240 mw->mw_sg, mw->mw_nents, mw->mw_dir))
241 goto out_dmamap_err;
242
243 for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++)
244 dma_pages[i] = sg_dma_address(&mw->mw_sg[i]);
245 rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents,
246 dma_pages[0]);
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400247 if (rc)
248 goto out_maperr;
249
Chuck Leverfc7fbb52015-05-26 11:52:16 -0400250 seg1->rl_mw = mw;
Chuck Lever88975eb2016-06-29 13:52:37 -0400251 seg1->mr_rkey = mw->fmr.fm_mr->rkey;
Chuck Leverfcdfb962016-06-29 13:52:45 -0400252 seg1->mr_base = dma_pages[0] + pageoff;
253 seg1->mr_nsegs = mw->mw_nents;
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400254 seg1->mr_len = len;
Chuck Leverfcdfb962016-06-29 13:52:45 -0400255 return mw->mw_nents;
256
257out_dmamap_err:
258 pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
259 mw->mw_sg, mw->mw_nents);
260 return -ENOMEM;
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400261
262out_maperr:
Chuck Leverfcdfb962016-06-29 13:52:45 -0400263 pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
264 len, (unsigned long long)dma_pages[0],
265 pageoff, mw->mw_nents, rc);
Chuck Lever505bbe62016-06-29 13:52:54 -0400266 rpcrdma_defer_mr_recovery(mw);
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400267 return rc;
268}
269
Chuck Lever7c7a5392015-12-16 17:22:55 -0500270/* Invalidate all memory regions that were registered for "req".
271 *
272 * Sleeps until it is safe for the host CPU to access the
273 * previously mapped memory regions.
274 */
275static void
276fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
277{
278 struct rpcrdma_mr_seg *seg;
279 unsigned int i, nchunks;
280 struct rpcrdma_mw *mw;
281 LIST_HEAD(unmap_list);
282 int rc;
283
284 dprintk("RPC: %s: req %p\n", __func__, req);
285
286 /* ORDER: Invalidate all of the req's MRs first
287 *
288 * ib_unmap_fmr() is slow, so use a single call instead
Chuck Lever505bbe62016-06-29 13:52:54 -0400289 * of one call per mapped FMR.
Chuck Lever7c7a5392015-12-16 17:22:55 -0500290 */
291 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
292 seg = &req->rl_segments[i];
293 mw = seg->rl_mw;
294
Chuck Lever88975eb2016-06-29 13:52:37 -0400295 list_add_tail(&mw->fmr.fm_mr->list, &unmap_list);
Chuck Lever7c7a5392015-12-16 17:22:55 -0500296
297 i += seg->mr_nsegs;
298 }
299 rc = ib_unmap_fmr(&unmap_list);
300 if (rc)
Chuck Lever505bbe62016-06-29 13:52:54 -0400301 goto out_reset;
Chuck Lever7c7a5392015-12-16 17:22:55 -0500302
303 /* ORDER: Now DMA unmap all of the req's MRs, and return
304 * them to the free MW list.
305 */
306 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
307 seg = &req->rl_segments[i];
Chuck Lever38f1932e2016-06-29 13:52:12 -0400308 mw = seg->rl_mw;
Chuck Lever7c7a5392015-12-16 17:22:55 -0500309
Chuck Lever88975eb2016-06-29 13:52:37 -0400310 list_del_init(&mw->fmr.fm_mr->list);
Chuck Lever505bbe62016-06-29 13:52:54 -0400311 ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
312 mw->mw_sg, mw->mw_nents, mw->mw_dir);
313 rpcrdma_put_mw(r_xprt, mw);
Chuck Lever7c7a5392015-12-16 17:22:55 -0500314
315 i += seg->mr_nsegs;
316 seg->mr_nsegs = 0;
Chuck Lever763bc232016-05-02 14:42:38 -0400317 seg->rl_mw = NULL;
Chuck Lever7c7a5392015-12-16 17:22:55 -0500318 }
319
320 req->rl_nchunks = 0;
Chuck Lever505bbe62016-06-29 13:52:54 -0400321 return;
322
323out_reset:
324 pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
325
326 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
327 seg = &req->rl_segments[i];
328 mw = seg->rl_mw;
329
330 list_del_init(&mw->fmr.fm_mr->list);
331 fmr_op_recover_mr(mw);
332
333 i += seg->mr_nsegs;
334 }
Chuck Lever7c7a5392015-12-16 17:22:55 -0500335}
336
Chuck Leveread3f262016-05-02 14:42:46 -0400337/* Use a slow, safe mechanism to invalidate all memory regions
338 * that were registered for "req".
Chuck Leveread3f262016-05-02 14:42:46 -0400339 */
340static void
341fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
342 bool sync)
343{
344 struct rpcrdma_mr_seg *seg;
345 struct rpcrdma_mw *mw;
346 unsigned int i;
347
348 for (i = 0; req->rl_nchunks; req->rl_nchunks--) {
349 seg = &req->rl_segments[i];
350 mw = seg->rl_mw;
351
Chuck Leverfcdfb962016-06-29 13:52:45 -0400352 if (sync)
Chuck Lever505bbe62016-06-29 13:52:54 -0400353 fmr_op_recover_mr(mw);
Chuck Leverfcdfb962016-06-29 13:52:45 -0400354 else
Chuck Lever505bbe62016-06-29 13:52:54 -0400355 rpcrdma_defer_mr_recovery(mw);
Chuck Leveread3f262016-05-02 14:42:46 -0400356
357 i += seg->mr_nsegs;
358 seg->mr_nsegs = 0;
359 seg->rl_mw = NULL;
360 }
361}
362
Chuck Lever4561f342015-03-30 14:35:17 -0400363static void
364fmr_op_destroy(struct rpcrdma_buffer *buf)
365{
366 struct rpcrdma_mw *r;
Chuck Lever4561f342015-03-30 14:35:17 -0400367
368 while (!list_empty(&buf->rb_all)) {
369 r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
370 list_del(&r->mw_all);
Chuck Leverd48b1d22016-06-29 13:52:29 -0400371 __fmr_release(r);
Chuck Lever4561f342015-03-30 14:35:17 -0400372 kfree(r);
373 }
374}
375
Chuck Levera0ce85f2015-03-30 14:34:21 -0400376const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
Chuck Lever9c1b4d72015-03-30 14:34:39 -0400377 .ro_map = fmr_op_map,
Chuck Lever7c7a5392015-12-16 17:22:55 -0500378 .ro_unmap_sync = fmr_op_unmap_sync,
Chuck Leveread3f262016-05-02 14:42:46 -0400379 .ro_unmap_safe = fmr_op_unmap_safe,
Chuck Lever505bbe62016-06-29 13:52:54 -0400380 .ro_recover_mr = fmr_op_recover_mr,
Chuck Lever3968cb52015-03-30 14:35:26 -0400381 .ro_open = fmr_op_open,
Chuck Lever1c9351e2015-03-30 14:34:30 -0400382 .ro_maxpages = fmr_op_maxpages,
Chuck Lever91e70e72015-03-30 14:34:58 -0400383 .ro_init = fmr_op_init,
Chuck Lever4561f342015-03-30 14:35:17 -0400384 .ro_destroy = fmr_op_destroy,
Chuck Levera0ce85f2015-03-30 14:34:21 -0400385 .ro_displayname = "fmr",
386};