blob: 6df7a0cf566015378aa3f76c480115675454297d [file] [log] [blame]
Tom Haynesd67ae822014-12-11 17:02:04 -05001/*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
5 *
6 * Tao Peng <bergwolf@primarydata.com>
7 */
8
9#include <linux/nfs_fs.h>
10#include <linux/vmalloc.h>
11#include <linux/module.h>
12#include <linux/sunrpc/addr.h>
13
14#include "../internal.h"
15#include "../nfs4session.h"
16#include "flexfilelayout.h"
17
18#define NFSDBG_FACILITY NFSDBG_PNFS_LD
19
Trond Myklebust15d03052016-08-16 11:08:22 -040020static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
21static unsigned int dataserver_retrans;
Tom Haynesd67ae822014-12-11 17:02:04 -050022
Fred Isaman65990d12016-09-30 14:37:41 -040023static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
24
Tom Haynesd67ae822014-12-11 17:02:04 -050025void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
26{
Fred Isaman65990d12016-09-30 14:37:41 -040027 if (!IS_ERR_OR_NULL(mirror_ds))
Tom Haynesd67ae822014-12-11 17:02:04 -050028 nfs4_put_deviceid_node(&mirror_ds->id_node);
29}
30
31void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
32{
33 nfs4_print_deviceid(&mirror_ds->id_node.deviceid);
34 nfs4_pnfs_ds_put(mirror_ds->ds);
Trond Myklebust84a80f62015-03-09 15:23:35 -040035 kfree_rcu(mirror_ds, id_node.rcu);
Tom Haynesd67ae822014-12-11 17:02:04 -050036}
37
38/* Decode opaque device data and construct new_ds using it */
39struct nfs4_ff_layout_ds *
40nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
41 gfp_t gfp_flags)
42{
43 struct xdr_stream stream;
44 struct xdr_buf buf;
45 struct page *scratch;
46 struct list_head dsaddrs;
47 struct nfs4_pnfs_ds_addr *da;
48 struct nfs4_ff_layout_ds *new_ds = NULL;
49 struct nfs4_ff_ds_version *ds_versions = NULL;
50 u32 mp_count;
51 u32 version_count;
52 __be32 *p;
53 int i, ret = -ENOMEM;
54
55 /* set up xdr stream */
56 scratch = alloc_page(gfp_flags);
57 if (!scratch)
58 goto out_err;
59
60 new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags);
61 if (!new_ds)
62 goto out_scratch;
63
64 nfs4_init_deviceid_node(&new_ds->id_node,
65 server,
66 &pdev->dev_id);
67 INIT_LIST_HEAD(&dsaddrs);
68
69 xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
70 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
71
72 /* multipath count */
73 p = xdr_inline_decode(&stream, 4);
74 if (unlikely(!p))
75 goto out_err_drain_dsaddrs;
76 mp_count = be32_to_cpup(p);
77 dprintk("%s: multipath ds count %d\n", __func__, mp_count);
78
79 for (i = 0; i < mp_count; i++) {
80 /* multipath ds */
81 da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
82 &stream, gfp_flags);
83 if (da)
84 list_add_tail(&da->da_node, &dsaddrs);
85 }
86 if (list_empty(&dsaddrs)) {
87 dprintk("%s: no suitable DS addresses found\n",
88 __func__);
89 ret = -ENOMEDIUM;
90 goto out_err_drain_dsaddrs;
91 }
92
93 /* version count */
94 p = xdr_inline_decode(&stream, 4);
95 if (unlikely(!p))
96 goto out_err_drain_dsaddrs;
97 version_count = be32_to_cpup(p);
98 dprintk("%s: version count %d\n", __func__, version_count);
99
100 ds_versions = kzalloc(version_count * sizeof(struct nfs4_ff_ds_version),
101 gfp_flags);
102 if (!ds_versions)
103 goto out_scratch;
104
105 for (i = 0; i < version_count; i++) {
106 /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) +
107 * tightly_coupled(4) */
108 p = xdr_inline_decode(&stream, 20);
109 if (unlikely(!p))
110 goto out_err_drain_dsaddrs;
111 ds_versions[i].version = be32_to_cpup(p++);
112 ds_versions[i].minor_version = be32_to_cpup(p++);
113 ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL);
114 ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL);
115 ds_versions[i].tightly_coupled = be32_to_cpup(p);
116
117 if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE)
118 ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE;
119 if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE)
120 ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE;
121
Tigran Mkrtchyana7878ca2017-04-04 15:12:51 +0200122 /*
123 * check for valid major/minor combination.
124 * currently we support dataserver which talk:
125 * v3, v4.0, v4.1, v4.2
126 */
127 if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) ||
128 (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) {
Tom Haynesd67ae822014-12-11 17:02:04 -0500129 dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__,
130 i, ds_versions[i].version,
131 ds_versions[i].minor_version);
132 ret = -EPROTONOSUPPORT;
133 goto out_err_drain_dsaddrs;
134 }
135
136 dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n",
137 __func__, i, ds_versions[i].version,
138 ds_versions[i].minor_version,
139 ds_versions[i].rsize,
140 ds_versions[i].wsize,
141 ds_versions[i].tightly_coupled);
142 }
143
144 new_ds->ds_versions = ds_versions;
145 new_ds->ds_versions_cnt = version_count;
146
147 new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
148 if (!new_ds->ds)
149 goto out_err_drain_dsaddrs;
150
151 /* If DS was already in cache, free ds addrs */
152 while (!list_empty(&dsaddrs)) {
153 da = list_first_entry(&dsaddrs,
154 struct nfs4_pnfs_ds_addr,
155 da_node);
156 list_del_init(&da->da_node);
157 kfree(da->da_remotestr);
158 kfree(da);
159 }
160
161 __free_page(scratch);
162 return new_ds;
163
164out_err_drain_dsaddrs:
165 while (!list_empty(&dsaddrs)) {
166 da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
167 da_node);
168 list_del_init(&da->da_node);
169 kfree(da->da_remotestr);
170 kfree(da);
171 }
172
173 kfree(ds_versions);
174out_scratch:
175 __free_page(scratch);
176out_err:
177 kfree(new_ds);
178
179 dprintk("%s ERROR: returning %d\n", __func__, ret);
180 return NULL;
181}
182
Trond Myklebust889d94d2015-09-01 00:58:24 -0700183static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg,
184 struct nfs4_deviceid_node *devid)
185{
Weston Andros Adamson1c48cee2016-12-14 16:31:55 -0500186 nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid);
Trond Myklebust889d94d2015-09-01 00:58:24 -0700187 if (!ff_layout_has_available_ds(lseg))
188 pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode,
189 lseg);
190}
191
192static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg,
Fred Isaman65990d12016-09-30 14:37:41 -0400193 struct nfs4_ff_layout_mirror *mirror,
194 bool create)
Trond Myklebust889d94d2015-09-01 00:58:24 -0700195{
Fred Isaman65990d12016-09-30 14:37:41 -0400196 if (mirror == NULL || IS_ERR(mirror->mirror_ds))
197 goto outerr;
198 if (mirror->mirror_ds == NULL) {
199 if (create) {
200 struct nfs4_deviceid_node *node;
201 struct pnfs_layout_hdr *lh = lseg->pls_layout;
202 struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
203
204 node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode),
205 &mirror->devid, lh->plh_lc_cred,
206 GFP_KERNEL);
207 if (node)
208 mirror_ds = FF_LAYOUT_MIRROR_DS(node);
209
210 /* check for race with another call to this function */
211 if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) &&
212 mirror_ds != ERR_PTR(-ENODEV))
213 nfs4_put_deviceid_node(node);
214 } else
215 goto outerr;
Trond Myklebust889d94d2015-09-01 00:58:24 -0700216 }
Tigran Mkrtchyanf17f8a12017-03-30 17:31:18 +0200217
218 if (IS_ERR(mirror->mirror_ds))
219 goto outerr;
220
Trond Myklebust889d94d2015-09-01 00:58:24 -0700221 if (mirror->mirror_ds->ds == NULL) {
222 struct nfs4_deviceid_node *devid;
223 devid = &mirror->mirror_ds->id_node;
224 ff_layout_mark_devid_invalid(lseg, devid);
225 return false;
226 }
227 return true;
Fred Isaman65990d12016-09-30 14:37:41 -0400228outerr:
229 pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
230 return false;
Trond Myklebust889d94d2015-09-01 00:58:24 -0700231}
232
Tom Haynesd67ae822014-12-11 17:02:04 -0500233static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
234 u64 offset, u64 length)
235{
236 u64 end;
237
Trond Myklebust17822b22016-10-25 12:24:25 -0400238 end = max_t(u64, pnfs_end_offset(err->offset, err->length),
239 pnfs_end_offset(offset, length));
Tom Haynesd67ae822014-12-11 17:02:04 -0500240 err->offset = min_t(u64, err->offset, offset);
241 err->length = end - err->offset;
242}
243
Trond Myklebustb819ed42016-01-21 14:48:49 -0500244static int
245ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
246 const struct nfs4_ff_layout_ds_err *e2)
Tom Haynesd67ae822014-12-11 17:02:04 -0500247{
Trond Myklebustb819ed42016-01-21 14:48:49 -0500248 int ret;
249
250 if (e1->opnum != e2->opnum)
251 return e1->opnum < e2->opnum ? -1 : 1;
252 if (e1->status != e2->status)
253 return e1->status < e2->status ? -1 : 1;
Trond Myklebust93b717f2016-05-16 17:42:43 -0400254 ret = memcmp(e1->stateid.data, e2->stateid.data,
255 sizeof(e1->stateid.data));
Trond Myklebustb819ed42016-01-21 14:48:49 -0500256 if (ret != 0)
257 return ret;
258 ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
259 if (ret != 0)
260 return ret;
Trond Myklebust17822b22016-10-25 12:24:25 -0400261 if (pnfs_end_offset(e1->offset, e1->length) < e2->offset)
Trond Myklebustb819ed42016-01-21 14:48:49 -0500262 return -1;
Trond Myklebust17822b22016-10-25 12:24:25 -0400263 if (e1->offset > pnfs_end_offset(e2->offset, e2->length))
Trond Myklebustb819ed42016-01-21 14:48:49 -0500264 return 1;
265 /* If ranges overlap or are contiguous, they are the same */
266 return 0;
Tom Haynesd67ae822014-12-11 17:02:04 -0500267}
268
Trond Myklebustb819ed42016-01-21 14:48:49 -0500269static void
Tom Haynesd67ae822014-12-11 17:02:04 -0500270ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
271 struct nfs4_ff_layout_ds_err *dserr)
272{
Trond Myklebustb819ed42016-01-21 14:48:49 -0500273 struct nfs4_ff_layout_ds_err *err, *tmp;
274 struct list_head *head = &flo->error_list;
275 int match;
Tom Haynesd67ae822014-12-11 17:02:04 -0500276
Trond Myklebustb819ed42016-01-21 14:48:49 -0500277 /* Do insertion sort w/ merges */
278 list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
279 match = ff_ds_error_match(err, dserr);
280 if (match < 0)
281 continue;
282 if (match > 0) {
283 /* Add entry "dserr" _before_ entry "err" */
284 head = &err->list;
Tom Haynesd67ae822014-12-11 17:02:04 -0500285 break;
286 }
Trond Myklebustb819ed42016-01-21 14:48:49 -0500287 /* Entries match, so merge "err" into "dserr" */
288 extend_ds_error(dserr, err->offset, err->length);
Trond Myklebustcb067932016-12-06 12:00:51 -0500289 list_replace(&err->list, &dserr->list);
Trond Myklebustb819ed42016-01-21 14:48:49 -0500290 kfree(err);
Trond Myklebustcb067932016-12-06 12:00:51 -0500291 return;
Tom Haynesd67ae822014-12-11 17:02:04 -0500292 }
293
Trond Myklebustb819ed42016-01-21 14:48:49 -0500294 list_add_tail(&dserr->list, head);
Tom Haynesd67ae822014-12-11 17:02:04 -0500295}
296
297int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
298 struct nfs4_ff_layout_mirror *mirror, u64 offset,
299 u64 length, int status, enum nfs_opnum4 opnum,
300 gfp_t gfp_flags)
301{
302 struct nfs4_ff_layout_ds_err *dserr;
Tom Haynesd67ae822014-12-11 17:02:04 -0500303
304 if (status == 0)
305 return 0;
306
307 if (mirror->mirror_ds == NULL)
308 return -EINVAL;
309
Tom Haynesd67ae822014-12-11 17:02:04 -0500310 dserr = kmalloc(sizeof(*dserr), gfp_flags);
311 if (!dserr)
312 return -ENOMEM;
313
314 INIT_LIST_HEAD(&dserr->list);
315 dserr->offset = offset;
316 dserr->length = length;
317 dserr->status = status;
318 dserr->opnum = opnum;
319 nfs4_stateid_copy(&dserr->stateid, &mirror->stateid);
320 memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid,
321 NFS4_DEVICEID4_SIZE);
322
323 spin_lock(&flo->generic_hdr.plh_inode->i_lock);
Trond Myklebustb819ed42016-01-21 14:48:49 -0500324 ff_layout_add_ds_error_locked(flo, dserr);
Tom Haynesd67ae822014-12-11 17:02:04 -0500325 spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
Tom Haynesd67ae822014-12-11 17:02:04 -0500326
327 return 0;
328}
329
Jeff Layton57f3f4c2016-04-21 20:51:58 -0400330static struct rpc_cred *
331ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode)
332{
Jeff Layton3064b682016-04-21 20:52:00 -0400333 struct rpc_cred *cred, __rcu **pcred;
Jeff Layton57f3f4c2016-04-21 20:51:58 -0400334
Jeff Layton3064b682016-04-21 20:52:00 -0400335 if (iomode == IOMODE_READ)
336 pcred = &mirror->ro_cred;
337 else
338 pcred = &mirror->rw_cred;
Jeff Layton57f3f4c2016-04-21 20:51:58 -0400339
340 rcu_read_lock();
341 do {
342 cred = rcu_dereference(*pcred);
343 if (!cred)
344 break;
345
346 cred = get_rpccred_rcu(cred);
347 } while(!cred);
348 rcu_read_unlock();
349 return cred;
350}
351
Tom Haynesd67ae822014-12-11 17:02:04 -0500352struct nfs_fh *
353nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx)
354{
355 struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
356 struct nfs_fh *fh = NULL;
Tom Haynesd67ae822014-12-11 17:02:04 -0500357
Fred Isaman65990d12016-09-30 14:37:41 -0400358 if (!ff_layout_mirror_valid(lseg, mirror, false)) {
Trond Myklebust889d94d2015-09-01 00:58:24 -0700359 pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
Tom Haynesd67ae822014-12-11 17:02:04 -0500360 __func__, mirror_idx);
Tom Haynesd67ae822014-12-11 17:02:04 -0500361 goto out;
362 }
363
364 /* FIXME: For now assume there is only 1 version available for the DS */
365 fh = &mirror->fh_versions[0];
366out:
367 return fh;
368}
369
Jeff Layton95e2b7e2016-05-17 12:28:44 -0400370/**
371 * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
372 * @lseg: the layout segment we're operating on
373 * @ds_idx: index of the DS to use
374 * @fail_return: return layout on connect failure?
375 *
376 * Try to prepare a DS connection to accept an RPC call. This involves
377 * selecting a mirror to use and connecting the client to it if it's not
378 * already connected.
379 *
380 * Since we only need a single functioning mirror to satisfy a read, we don't
381 * want to return the layout if there is one. For writes though, any down
382 * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish
383 * between the two cases.
384 *
385 * Returns a pointer to a connected DS object on success or NULL on failure.
386 */
Tom Haynesd67ae822014-12-11 17:02:04 -0500387struct nfs4_pnfs_ds *
388nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
389 bool fail_return)
390{
391 struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
392 struct nfs4_pnfs_ds *ds = NULL;
393 struct nfs4_deviceid_node *devid;
394 struct inode *ino = lseg->pls_layout->plh_inode;
395 struct nfs_server *s = NFS_SERVER(ino);
396 unsigned int max_payload;
Weston Andros Adamsona33e4b02017-03-09 12:56:48 -0500397 int status;
Tom Haynesd67ae822014-12-11 17:02:04 -0500398
Fred Isaman65990d12016-09-30 14:37:41 -0400399 if (!ff_layout_mirror_valid(lseg, mirror, true)) {
Trond Myklebust889d94d2015-09-01 00:58:24 -0700400 pr_err_ratelimited("NFS: %s: No data server for offset index %d\n",
Tom Haynesd67ae822014-12-11 17:02:04 -0500401 __func__, ds_idx);
Tom Haynesd67ae822014-12-11 17:02:04 -0500402 goto out;
403 }
404
405 devid = &mirror->mirror_ds->id_node;
406 if (ff_layout_test_devid_unavailable(devid))
Trond Myklebust3dc14732016-08-29 15:12:54 -0400407 goto out_fail;
Tom Haynesd67ae822014-12-11 17:02:04 -0500408
409 ds = mirror->mirror_ds->ds;
410 /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
411 smp_rmb();
412 if (ds->ds_clp)
Jeff Layton90a0be02016-04-21 20:51:59 -0400413 goto out;
Tom Haynesd67ae822014-12-11 17:02:04 -0500414
415 /* FIXME: For now we assume the server sent only one version of NFS
416 * to use for the DS.
417 */
Weston Andros Adamsona33e4b02017-03-09 12:56:48 -0500418 status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
Tom Haynesd67ae822014-12-11 17:02:04 -0500419 dataserver_retrans,
420 mirror->mirror_ds->ds_versions[0].version,
Anna Schumaker7d38de32016-11-17 15:15:55 -0500421 mirror->mirror_ds->ds_versions[0].minor_version);
Tom Haynesd67ae822014-12-11 17:02:04 -0500422
423 /* connect success, check rsize/wsize limit */
Trond Myklebust260f32a2017-04-20 14:33:06 -0400424 if (!status) {
Tom Haynesd67ae822014-12-11 17:02:04 -0500425 max_payload =
426 nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
427 NULL);
428 if (mirror->mirror_ds->ds_versions[0].rsize > max_payload)
429 mirror->mirror_ds->ds_versions[0].rsize = max_payload;
430 if (mirror->mirror_ds->ds_versions[0].wsize > max_payload)
431 mirror->mirror_ds->ds_versions[0].wsize = max_payload;
Trond Myklebust3dc14732016-08-29 15:12:54 -0400432 goto out;
Tom Haynesd67ae822014-12-11 17:02:04 -0500433 }
Weston Andros Adamsonda066f32017-03-09 12:56:49 -0500434out_fail:
Trond Myklebust3dc14732016-08-29 15:12:54 -0400435 ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
436 mirror, lseg->pls_range.offset,
437 lseg->pls_range.length, NFS4ERR_NXIO,
438 OP_ILLEGAL, GFP_NOIO);
Trond Myklebust3dc14732016-08-29 15:12:54 -0400439 if (fail_return || !ff_layout_has_available_ds(lseg))
440 pnfs_error_mark_layout_for_return(ino, lseg);
441 ds = NULL;
Tom Haynesd67ae822014-12-11 17:02:04 -0500442out:
443 return ds;
444}
445
446struct rpc_cred *
447ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
448 struct rpc_cred *mdscred)
449{
450 struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
Jeff Layton547a6372016-04-21 20:51:57 -0400451 struct rpc_cred *cred;
Tom Haynesd67ae822014-12-11 17:02:04 -0500452
Jeff Layton57f3f4c2016-04-21 20:51:58 -0400453 if (mirror) {
454 cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
455 if (!cred)
456 cred = get_rpccred(mdscred);
457 } else {
458 cred = get_rpccred(mdscred);
459 }
Tom Haynesd67ae822014-12-11 17:02:04 -0500460 return cred;
461}
462
463/**
464* Find or create a DS rpc client with th MDS server rpc client auth flavor
465* in the nfs_client cl_ds_clients list.
466*/
467struct rpc_clnt *
468nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx,
469 struct nfs_client *ds_clp, struct inode *inode)
470{
471 struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
472
473 switch (mirror->mirror_ds->ds_versions[0].version) {
474 case 3:
475 /* For NFSv3 DS, flavor is set when creating DS connections */
476 return ds_clp->cl_rpcclient;
477 case 4:
478 return nfs4_find_or_create_ds_client(ds_clp, inode);
479 default:
480 BUG();
481 }
482}
483
Trond Myklebust5b9b3c82016-12-02 16:15:05 -0500484void ff_layout_free_ds_ioerr(struct list_head *head)
Tom Haynesd67ae822014-12-11 17:02:04 -0500485{
Trond Myklebust5b9b3c82016-12-02 16:15:05 -0500486 struct nfs4_ff_layout_ds_err *err;
487
488 while (!list_empty(head)) {
489 err = list_first_entry(head,
490 struct nfs4_ff_layout_ds_err,
491 list);
492 list_del(&err->list);
493 kfree(err);
494 }
495}
496
497/* called with inode i_lock held */
498int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head)
499{
500 struct nfs4_ff_layout_ds_err *err;
Tom Haynesd67ae822014-12-11 17:02:04 -0500501 __be32 *p;
502
Trond Myklebust5b9b3c82016-12-02 16:15:05 -0500503 list_for_each_entry(err, head, list) {
Tom Haynesd67ae822014-12-11 17:02:04 -0500504 /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE)
Trond Myklebustd1354902015-08-27 20:37:39 -0400505 * + array length + deviceid(NFS4_DEVICEID4_SIZE)
506 * + status(4) + opnum(4)
Tom Haynesd67ae822014-12-11 17:02:04 -0500507 */
508 p = xdr_reserve_space(xdr,
Trond Myklebustd1354902015-08-27 20:37:39 -0400509 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE);
Tom Haynesd67ae822014-12-11 17:02:04 -0500510 if (unlikely(!p))
511 return -ENOBUFS;
512 p = xdr_encode_hyper(p, err->offset);
513 p = xdr_encode_hyper(p, err->length);
514 p = xdr_encode_opaque_fixed(p, &err->stateid,
515 NFS4_STATEID_SIZE);
Trond Myklebustd1354902015-08-27 20:37:39 -0400516 /* Encode 1 error */
517 *p++ = cpu_to_be32(1);
Tom Haynesd67ae822014-12-11 17:02:04 -0500518 p = xdr_encode_opaque_fixed(p, &err->deviceid,
519 NFS4_DEVICEID4_SIZE);
520 *p++ = cpu_to_be32(err->status);
521 *p++ = cpu_to_be32(err->opnum);
Trond Myklebust5b9b3c82016-12-02 16:15:05 -0500522 dprintk("%s: offset %llu length %llu status %d op %d\n",
Tom Haynesd67ae822014-12-11 17:02:04 -0500523 __func__, err->offset, err->length, err->status,
Trond Myklebust5b9b3c82016-12-02 16:15:05 -0500524 err->opnum);
Tom Haynesd67ae822014-12-11 17:02:04 -0500525 }
526
527 return 0;
528}
529
Trond Myklebust5b9b3c82016-12-02 16:15:05 -0500530static
531unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
532 const struct pnfs_layout_range *range,
533 struct list_head *head,
534 unsigned int maxnum)
535{
536 struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo);
537 struct inode *inode = lo->plh_inode;
538 struct nfs4_ff_layout_ds_err *err, *n;
539 unsigned int ret = 0;
540
541 spin_lock(&inode->i_lock);
542 list_for_each_entry_safe(err, n, &flo->error_list, list) {
543 if (!pnfs_is_range_intersecting(err->offset,
544 pnfs_end_offset(err->offset, err->length),
545 range->offset,
546 pnfs_end_offset(range->offset, range->length)))
547 continue;
548 if (!maxnum)
549 break;
550 list_move(&err->list, head);
551 maxnum--;
552 ret++;
553 }
554 spin_unlock(&inode->i_lock);
555 return ret;
556}
557
558unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
559 const struct pnfs_layout_range *range,
560 struct list_head *head,
561 unsigned int maxnum)
562{
563 unsigned int ret;
564
565 ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum);
566 /* If we're over the max, discard all remaining entries */
567 if (ret == maxnum) {
568 LIST_HEAD(discard);
569 do_layout_fetch_ds_ioerr(lo, range, &discard, -1);
570 ff_layout_free_ds_ioerr(&discard);
571 }
572 return ret;
573}
574
Trond Myklebust81d6dc82015-09-01 02:49:44 -0700575static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg)
Tom Haynesd67ae822014-12-11 17:02:04 -0500576{
577 struct nfs4_ff_layout_mirror *mirror;
578 struct nfs4_deviceid_node *devid;
Trond Myklebust81d6dc82015-09-01 02:49:44 -0700579 u32 idx;
Tom Haynesd67ae822014-12-11 17:02:04 -0500580
581 for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {
582 mirror = FF_LAYOUT_COMP(lseg, idx);
Fred Isaman65990d12016-09-30 14:37:41 -0400583 if (mirror) {
584 if (!mirror->mirror_ds)
585 return true;
586 if (IS_ERR(mirror->mirror_ds))
587 continue;
Tom Haynesd67ae822014-12-11 17:02:04 -0500588 devid = &mirror->mirror_ds->id_node;
589 if (!ff_layout_test_devid_unavailable(devid))
590 return true;
591 }
592 }
593
594 return false;
595}
596
Trond Myklebust81d6dc82015-09-01 02:49:44 -0700597static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg)
598{
599 struct nfs4_ff_layout_mirror *mirror;
600 struct nfs4_deviceid_node *devid;
601 u32 idx;
602
603 for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {
604 mirror = FF_LAYOUT_COMP(lseg, idx);
Fred Isaman65990d12016-09-30 14:37:41 -0400605 if (!mirror || IS_ERR(mirror->mirror_ds))
Trond Myklebust81d6dc82015-09-01 02:49:44 -0700606 return false;
Fred Isaman65990d12016-09-30 14:37:41 -0400607 if (!mirror->mirror_ds)
608 continue;
Trond Myklebust81d6dc82015-09-01 02:49:44 -0700609 devid = &mirror->mirror_ds->id_node;
610 if (ff_layout_test_devid_unavailable(devid))
611 return false;
612 }
613
614 return FF_LAYOUT_MIRROR_COUNT(lseg) != 0;
615}
616
Fred Isaman65990d12016-09-30 14:37:41 -0400617static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg)
Trond Myklebust81d6dc82015-09-01 02:49:44 -0700618{
619 if (lseg->pls_range.iomode == IOMODE_READ)
620 return ff_read_layout_has_available_ds(lseg);
621 /* Note: RW layout needs all mirrors available */
622 return ff_rw_layout_has_available_ds(lseg);
623}
624
Tom Haynes3b13b4b2016-05-17 12:28:37 -0400625bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg)
626{
627 return ff_layout_no_fallback_to_mds(lseg) ||
628 ff_layout_has_available_ds(lseg);
629}
630
Tom Haynesfb1084e2016-05-25 07:31:12 -0700631bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg)
632{
633 return lseg->pls_range.iomode == IOMODE_RW &&
634 ff_layout_no_read_on_rw(lseg);
635}
636
Tom Haynesd67ae822014-12-11 17:02:04 -0500637module_param(dataserver_retrans, uint, 0644);
638MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
639 "retries a request before it attempts further "
640 " recovery action.");
641module_param(dataserver_timeo, uint, 0644);
642MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
643 "NFSv4.1 client waits for a response from a "
644 " data server before it retries an NFS request.");