blob: 00b384934c3226bc8a510c38b5263f8648e59cb6 [file] [log] [blame]
Benny Halevyc93407d2011-05-22 19:49:06 +03001/*
2 * pNFS Objects layout implementation over open-osd initiator library
3 *
4 * Copyright (C) 2009 Panasas Inc. [year of first publication]
5 * All rights reserved.
6 *
7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <bharrosh@panasas.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * See the file COPYING included with this distribution for more details.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the Panasas company nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40#include <linux/module.h>
Boaz Harroshaf4f5b52011-10-31 15:04:19 -070041#include <scsi/osd_ore.h>
Boaz Harrosh09f5bf42011-05-22 19:50:20 +030042
43#include "objlayout.h"
44
45#define NFSDBG_FACILITY NFSDBG_PNFS_LD
46
Boaz Harroshb6c05f12011-05-26 21:45:34 +030047struct objio_dev_ent {
48 struct nfs4_deviceid_node id_node;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -070049 struct ore_dev od;
Boaz Harroshb6c05f12011-05-26 21:45:34 +030050};
51
52static void
53objio_free_deviceid_node(struct nfs4_deviceid_node *d)
54{
55 struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
56
Boaz Harroshaf4f5b52011-10-31 15:04:19 -070057 dprintk("%s: free od=%p\n", __func__, de->od.od);
58 osduld_put_device(de->od.od);
Boaz Harroshb6c05f12011-05-26 21:45:34 +030059 kfree(de);
60}
61
62static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss,
63 const struct nfs4_deviceid *d_id)
64{
65 struct nfs4_deviceid_node *d;
66 struct objio_dev_ent *de;
67
68 d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id);
69 if (!d)
70 return NULL;
71
72 de = container_of(d, struct objio_dev_ent, id_node);
73 return de;
74}
75
76static struct objio_dev_ent *
77_dev_list_add(const struct nfs_server *nfss,
78 const struct nfs4_deviceid *d_id, struct osd_dev *od,
79 gfp_t gfp_flags)
80{
81 struct nfs4_deviceid_node *d;
82 struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags);
83 struct objio_dev_ent *n;
84
85 if (!de) {
86 dprintk("%s: -ENOMEM od=%p\n", __func__, od);
87 return NULL;
88 }
89
90 dprintk("%s: Adding od=%p\n", __func__, od);
91 nfs4_init_deviceid_node(&de->id_node,
92 nfss->pnfs_curr_ld,
93 nfss->nfs_client,
94 d_id);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -070095 de->od.od = od;
Boaz Harroshb6c05f12011-05-26 21:45:34 +030096
97 d = nfs4_insert_deviceid_node(&de->id_node);
98 n = container_of(d, struct objio_dev_ent, id_node);
99 if (n != de) {
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700100 dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od);
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300101 objio_free_deviceid_node(&de->id_node);
102 de = n;
103 }
104
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300105 return de;
106}
107
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300108struct objio_segment {
109 struct pnfs_layout_segment lseg;
110
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700111 struct ore_layout layout;
112 struct ore_components oc;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300113};
114
115static inline struct objio_segment *
116OBJIO_LSEG(struct pnfs_layout_segment *lseg)
117{
118 return container_of(lseg, struct objio_segment, lseg);
119}
120
Boaz Harrosh04f83452011-05-22 19:52:19 +0300121struct objio_state {
122 /* Generic layer */
Boaz Harroshe2e04352011-10-31 15:03:35 -0700123 struct objlayout_io_res oir;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300124
Boaz Harrosh96218552011-10-31 14:47:32 -0700125 bool sync;
Boaz Harrosheecfc632011-10-31 15:15:38 -0700126 /*FIXME: Support for extra_bytes at ore_get_rw_state() */
127 struct ore_io_state *ios;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300128};
129
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300130/* Send and wait for a get_device_info of devices in the layout,
131 then look them up with the osd_initiator library */
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700132static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay,
133 struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id,
134 gfp_t gfp_flags)
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300135{
136 struct pnfs_osd_deviceaddr *deviceaddr;
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300137 struct objio_dev_ent *ode;
138 struct osd_dev *od;
139 struct osd_dev_info odi;
140 int err;
141
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300142 ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700143 if (ode) {
144 objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
145 return 0;
146 }
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300147
148 err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags);
149 if (unlikely(err)) {
150 dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n",
151 __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700152 return err;
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300153 }
154
155 odi.systemid_len = deviceaddr->oda_systemid.len;
156 if (odi.systemid_len > sizeof(odi.systemid)) {
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700157 dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
158 __func__, sizeof(odi.systemid));
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300159 err = -EINVAL;
160 goto out;
161 } else if (odi.systemid_len)
162 memcpy(odi.systemid, deviceaddr->oda_systemid.data,
163 odi.systemid_len);
164 odi.osdname_len = deviceaddr->oda_osdname.len;
165 odi.osdname = (u8 *)deviceaddr->oda_osdname.data;
166
167 if (!odi.osdname_len && !odi.systemid_len) {
168 dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
169 __func__);
170 err = -ENODEV;
171 goto out;
172 }
173
174 od = osduld_info_lookup(&odi);
175 if (unlikely(IS_ERR(od))) {
176 err = PTR_ERR(od);
177 dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
178 goto out;
179 }
180
181 ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od,
182 gfp_flags);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700183 objio_seg->oc.ods[c] = &ode->od; /* must use container_of */
184 dprintk("Adding new dev_id(%llx:%llx)\n",
185 _DEVID_LO(d_id), _DEVID_HI(d_id));
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300186out:
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300187 objlayout_put_deviceinfo(deviceaddr);
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300188 return err;
189}
190
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700191#if 0
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300192static int _verify_data_map(struct pnfs_osd_layout *layout)
193{
194 struct pnfs_osd_data_map *data_map = &layout->olo_map;
195 u64 stripe_length;
196 u32 group_width;
197
198/* FIXME: Only raid0 for now. if not go through MDS */
199 if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
200 printk(KERN_ERR "Only RAID_0 for now\n");
201 return -ENOTSUPP;
202 }
203 if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) {
204 printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n",
205 data_map->odm_num_comps, data_map->odm_mirror_cnt);
206 return -EINVAL;
207 }
208
209 if (data_map->odm_group_width)
210 group_width = data_map->odm_group_width;
211 else
212 group_width = data_map->odm_num_comps /
213 (data_map->odm_mirror_cnt + 1);
214
215 stripe_length = (u64)data_map->odm_stripe_unit * group_width;
216 if (stripe_length >= (1ULL << 32)) {
217 printk(KERN_ERR "Total Stripe length(0x%llx)"
218 " >= 32bit is not supported\n", _LLU(stripe_length));
219 return -ENOTSUPP;
220 }
221
222 if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) {
223 printk(KERN_ERR "Stripe Unit(0x%llx)"
224 " must be Multples of PAGE_SIZE(0x%lx)\n",
225 _LLU(data_map->odm_stripe_unit), PAGE_SIZE);
226 return -ENOTSUPP;
227 }
228
229 return 0;
230}
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700231#endif
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300232
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700233static void copy_single_comp(struct ore_components *oc, unsigned c,
234 struct pnfs_osd_object_cred *src_comp)
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300235{
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700236 struct ore_comp *ocomp = &oc->comps[c];
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300237
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700238 WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
239 WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300240
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700241 ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
242 ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300243
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700244 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
245}
246
247int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
248 struct objio_segment **pseg)
249{
250 struct __alloc_objio_segment {
251 struct objio_segment olseg;
252 struct ore_dev *ods[numdevs];
253 struct ore_comp comps[numdevs];
254 } *aolseg;
255
256 aolseg = kzalloc(sizeof(*aolseg), gfp_flags);
257 if (unlikely(!aolseg)) {
258 dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__,
259 numdevs, sizeof(*aolseg));
260 return -ENOMEM;
261 }
262
263 aolseg->olseg.oc.numdevs = numdevs;
264 aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS;
265 aolseg->olseg.oc.comps = aolseg->comps;
266 aolseg->olseg.oc.ods = aolseg->ods;
267
268 *pseg = &aolseg->olseg;
269 return 0;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300270}
271
272int objio_alloc_lseg(struct pnfs_layout_segment **outp,
273 struct pnfs_layout_hdr *pnfslay,
274 struct pnfs_layout_range *range,
275 struct xdr_stream *xdr,
276 gfp_t gfp_flags)
277{
278 struct objio_segment *objio_seg;
279 struct pnfs_osd_xdr_decode_layout_iter iter;
280 struct pnfs_osd_layout layout;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700281 struct pnfs_osd_object_cred src_comp;
282 unsigned cur_comp;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300283 int err;
284
285 err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
286 if (unlikely(err))
287 return err;
288
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700289 err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300290 if (unlikely(err))
291 return err;
292
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700293 objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
294 objio_seg->layout.group_width = layout.olo_map.odm_group_width;
295 objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
296 objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
297 objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300298
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700299 err = ore_verify_layout(layout.olo_map.odm_num_comps,
300 &objio_seg->layout);
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300301 if (unlikely(err))
302 goto err;
303
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700304 objio_seg->oc.first_dev = layout.olo_comps_index;
305 cur_comp = 0;
306 while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
307 copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
308 err = objio_devices_lookup(pnfslay, objio_seg, cur_comp,
309 &src_comp.oc_object_id.oid_device_id,
310 gfp_flags);
311 if (err)
312 goto err;
313 ++cur_comp;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300314 }
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700315 /* pnfs_osd_xdr_decode_layout_comp returns false on error */
316 if (unlikely(err))
317 goto err;
Boaz Harrosh93420772011-05-25 21:25:29 +0300318
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300319 *outp = &objio_seg->lseg;
320 return 0;
321
322err:
323 kfree(objio_seg);
324 dprintk("%s: Error: return %d\n", __func__, err);
325 *outp = NULL;
326 return err;
327}
328
329void objio_free_lseg(struct pnfs_layout_segment *lseg)
330{
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300331 int i;
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300332 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
333
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700334 for (i = 0; i < objio_seg->oc.numdevs; i++) {
335 struct ore_dev *od = objio_seg->oc.ods[i];
336 struct objio_dev_ent *ode;
337
338 if (!od)
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300339 break;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700340 ode = container_of(od, typeof(*ode), od);
341 nfs4_put_deviceid_node(&ode->id_node);
Boaz Harroshb6c05f12011-05-26 21:45:34 +0300342 }
Boaz Harrosh09f5bf42011-05-22 19:50:20 +0300343 kfree(objio_seg);
344}
345
Boaz Harrosh96218552011-10-31 14:47:32 -0700346static int
Boaz Harrosheecfc632011-10-31 15:15:38 -0700347objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
Boaz Harrosh96218552011-10-31 14:47:32 -0700348 struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
349 loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
350 struct objio_state **outp)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300351{
352 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
Boaz Harrosheecfc632011-10-31 15:15:38 -0700353 struct ore_io_state *ios;
354 int ret;
Boaz Harrosh96218552011-10-31 14:47:32 -0700355 struct __alloc_objio_state {
356 struct objio_state objios;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700357 struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
Boaz Harrosh96218552011-10-31 14:47:32 -0700358 } *aos;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300359
Boaz Harrosh96218552011-10-31 14:47:32 -0700360 aos = kzalloc(sizeof(*aos), gfp_flags);
361 if (unlikely(!aos))
Boaz Harrosh04f83452011-05-22 19:52:19 +0300362 return -ENOMEM;
363
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700364 objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
Boaz Harrosh96218552011-10-31 14:47:32 -0700365 aos->ioerrs, rpcdata, pnfs_layout_type);
366
Boaz Harrosheecfc632011-10-31 15:15:38 -0700367 ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
368 offset, count, &ios);
369 if (unlikely(ret)) {
370 kfree(aos);
371 return ret;
372 }
373
Boaz Harrosh96218552011-10-31 14:47:32 -0700374 ios->pages = pages;
375 ios->pgbase = pgbase;
Boaz Harrosheecfc632011-10-31 15:15:38 -0700376 ios->private = aos;
Boaz Harrosh96218552011-10-31 14:47:32 -0700377 BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
378
Boaz Harrosheecfc632011-10-31 15:15:38 -0700379 aos->objios.sync = 0;
380 aos->objios.ios = ios;
381 *outp = &aos->objios;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300382 return 0;
383}
384
Boaz Harroshe2e04352011-10-31 15:03:35 -0700385void objio_free_result(struct objlayout_io_res *oir)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300386{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700387 struct objio_state *objios = container_of(oir, struct objio_state, oir);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300388
Boaz Harrosheecfc632011-10-31 15:15:38 -0700389 ore_put_io_state(objios->ios);
390 kfree(objios);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300391}
392
Boaz Harroshadb58532011-05-26 21:49:46 +0300393enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
394{
395 switch (oep) {
396 case OSD_ERR_PRI_NO_ERROR:
397 return (enum pnfs_osd_errno)0;
398
399 case OSD_ERR_PRI_CLEAR_PAGES:
400 BUG_ON(1);
401 return 0;
402
403 case OSD_ERR_PRI_RESOURCE:
404 return PNFS_OSD_ERR_RESOURCE;
405 case OSD_ERR_PRI_BAD_CRED:
406 return PNFS_OSD_ERR_BAD_CRED;
407 case OSD_ERR_PRI_NO_ACCESS:
408 return PNFS_OSD_ERR_NO_ACCESS;
409 case OSD_ERR_PRI_UNREACHABLE:
410 return PNFS_OSD_ERR_UNREACHABLE;
411 case OSD_ERR_PRI_NOT_FOUND:
412 return PNFS_OSD_ERR_NOT_FOUND;
413 case OSD_ERR_PRI_NO_SPACE:
414 return PNFS_OSD_ERR_NO_SPACE;
415 default:
416 WARN_ON(1);
417 /* fallthrough */
418 case OSD_ERR_PRI_EIO:
419 return PNFS_OSD_ERR_EIO;
420 }
421}
422
Boaz Harrosheecfc632011-10-31 15:15:38 -0700423static void __on_dev_error(struct ore_io_state *ios,
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700424 struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
425 u64 dev_offset, u64 dev_len)
426{
427 struct objio_state *objios = ios->private;
428 struct pnfs_osd_objid pooid;
429 struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
430 /* FIXME: what to do with more-then-one-group layouts. We need to
431 * translate from ore_io_state index to oc->comps index
432 */
433 unsigned comp = dev_index;
434
435 pooid.oid_device_id = ode->id_node.deviceid;
436 pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
437 pooid.oid_object_id = ios->oc->comps[comp].obj.id;
438
439 objlayout_io_set_result(&objios->oir, comp,
440 &pooid, osd_pri_2_pnfs_err(oep),
Boaz Harrosheecfc632011-10-31 15:15:38 -0700441 dev_offset, dev_len, !ios->reading);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700442}
443
Boaz Harrosheecfc632011-10-31 15:15:38 -0700444#if 0
Boaz Harrosh04f83452011-05-22 19:52:19 +0300445static void _clear_bio(struct bio *bio)
446{
447 struct bio_vec *bv;
448 unsigned i;
449
450 __bio_for_each_segment(bv, bio, i, 0) {
451 unsigned this_count = bv->bv_len;
452
453 if (likely(PAGE_SIZE == this_count))
454 clear_highpage(bv->bv_page);
455 else
456 zero_user(bv->bv_page, bv->bv_offset, this_count);
457 }
458}
459
460static int _io_check(struct objio_state *ios, bool is_write)
461{
462 enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR;
463 int lin_ret = 0;
464 int i;
465
466 for (i = 0; i < ios->numdevs; i++) {
467 struct osd_sense_info osi;
468 struct osd_request *or = ios->per_dev[i].or;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300469 int ret;
470
471 if (!or)
472 continue;
473
474 ret = osd_req_decode_sense(or, &osi);
475 if (likely(!ret))
476 continue;
477
478 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
479 /* start read offset passed endof file */
480 BUG_ON(is_write);
481 _clear_bio(ios->per_dev[i].bio);
482 dprintk("%s: start read offset passed end of file "
483 "offset=0x%llx, length=0x%lx\n", __func__,
484 _LLU(ios->per_dev[i].offset),
485 ios->per_dev[i].length);
486
487 continue; /* we recovered */
488 }
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700489 __on_dev_error(ios, is_write, ios->oc->ods[i],
490 ios->per_dev[i].dev, osi.osd_err_pri,
491 ios->per_dev[i].offset, ios->per_dev[i].length);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300492
493 if (osi.osd_err_pri >= oep) {
494 oep = osi.osd_err_pri;
495 lin_ret = ret;
496 }
497 }
498
499 return lin_ret;
500}
501
502/*
503 * Common IO state helpers.
504 */
505static void _io_free(struct objio_state *ios)
506{
507 unsigned i;
508
509 for (i = 0; i < ios->numdevs; i++) {
510 struct _objio_per_comp *per_dev = &ios->per_dev[i];
511
512 if (per_dev->or) {
513 osd_end_request(per_dev->or);
514 per_dev->or = NULL;
515 }
516
517 if (per_dev->bio) {
518 bio_put(per_dev->bio);
519 per_dev->bio = NULL;
520 }
521 }
522}
523
524struct osd_dev *_io_od(struct objio_state *ios, unsigned dev)
525{
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700526 unsigned min_dev = ios->oc->first_dev;
527 unsigned max_dev = min_dev + ios->oc->numdevs;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300528
529 BUG_ON(dev < min_dev || max_dev <= dev);
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700530 return ios->oc->ods[dev - min_dev]->od;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300531}
532
533struct _striping_info {
534 u64 obj_offset;
535 u64 group_length;
536 unsigned dev;
537 unsigned unit_off;
538};
539
540static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
541 struct _striping_info *si)
542{
543 u32 stripe_unit = ios->layout->stripe_unit;
544 u32 group_width = ios->layout->group_width;
545 u64 group_depth = ios->layout->group_depth;
546 u32 U = stripe_unit * group_width;
547
548 u64 T = U * group_depth;
549 u64 S = T * ios->layout->group_count;
550 u64 M = div64_u64(file_offset, S);
551
552 /*
553 G = (L - (M * S)) / T
554 H = (L - (M * S)) % T
555 */
556 u64 LmodU = file_offset - M * S;
557 u32 G = div64_u64(LmodU, T);
558 u64 H = LmodU - G * T;
559
560 u32 N = div_u64(H, U);
561
562 div_u64_rem(file_offset, stripe_unit, &si->unit_off);
563 si->obj_offset = si->unit_off + (N * stripe_unit) +
564 (M * group_depth * stripe_unit);
565
566 /* "H - (N * U)" is just "H % U" so it's bound to u32 */
567 si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
568 si->dev *= ios->layout->mirrors_p1;
569
570 si->group_length = T - H;
571}
572
573static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg,
Boaz Harrosh20618b22011-08-03 21:54:33 -0700574 unsigned pgbase, struct _objio_per_comp *per_dev, int len,
Boaz Harrosh04f83452011-05-22 19:52:19 +0300575 gfp_t gfp_flags)
576{
577 unsigned pg = *cur_pg;
Boaz Harrosh20618b22011-08-03 21:54:33 -0700578 int cur_len = len;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300579 struct request_queue *q =
580 osd_request_queue(_io_od(ios, per_dev->dev));
581
Boaz Harrosh04f83452011-05-22 19:52:19 +0300582 if (per_dev->bio == NULL) {
Boaz Harrosh20618b22011-08-03 21:54:33 -0700583 unsigned pages_in_stripe = ios->layout->group_width *
Boaz Harrosh04f83452011-05-22 19:52:19 +0300584 (ios->layout->stripe_unit / PAGE_SIZE);
Boaz Harrosh96218552011-10-31 14:47:32 -0700585 unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
Boaz Harrosh20618b22011-08-03 21:54:33 -0700586 ios->layout->group_width;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300587
588 if (BIO_MAX_PAGES_KMALLOC < bio_size)
589 bio_size = BIO_MAX_PAGES_KMALLOC;
590
591 per_dev->bio = bio_kmalloc(gfp_flags, bio_size);
592 if (unlikely(!per_dev->bio)) {
593 dprintk("Faild to allocate BIO size=%u\n", bio_size);
594 return -ENOMEM;
595 }
596 }
597
598 while (cur_len > 0) {
599 unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
600 unsigned added_len;
601
Boaz Harrosh96218552011-10-31 14:47:32 -0700602 BUG_ON(ios->nr_pages <= pg);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300603 cur_len -= pglen;
604
605 added_len = bio_add_pc_page(q, per_dev->bio,
Boaz Harrosh96218552011-10-31 14:47:32 -0700606 ios->pages[pg], pglen, pgbase);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300607 if (unlikely(pglen != added_len))
608 return -ENOMEM;
609 pgbase = 0;
610 ++pg;
611 }
612 BUG_ON(cur_len);
613
Boaz Harrosh20618b22011-08-03 21:54:33 -0700614 per_dev->length += len;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300615 *cur_pg = pg;
616 return 0;
617}
618
619static int _prepare_one_group(struct objio_state *ios, u64 length,
620 struct _striping_info *si, unsigned *last_pg,
621 gfp_t gfp_flags)
622{
623 unsigned stripe_unit = ios->layout->stripe_unit;
624 unsigned mirrors_p1 = ios->layout->mirrors_p1;
625 unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
626 unsigned dev = si->dev;
627 unsigned first_dev = dev - (dev % devs_in_group);
628 unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
629 unsigned cur_pg = *last_pg;
630 int ret = 0;
631
632 while (length) {
Boaz Harrosh9af7db32011-08-03 21:52:51 -0700633 struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev];
Boaz Harrosh04f83452011-05-22 19:52:19 +0300634 unsigned cur_len, page_off = 0;
635
636 if (!per_dev->length) {
637 per_dev->dev = dev;
638 if (dev < si->dev) {
639 per_dev->offset = si->obj_offset + stripe_unit -
640 si->unit_off;
641 cur_len = stripe_unit;
642 } else if (dev == si->dev) {
643 per_dev->offset = si->obj_offset;
644 cur_len = stripe_unit - si->unit_off;
645 page_off = si->unit_off & ~PAGE_MASK;
646 BUG_ON(page_off &&
Boaz Harrosh96218552011-10-31 14:47:32 -0700647 (page_off != ios->pgbase));
Boaz Harrosh04f83452011-05-22 19:52:19 +0300648 } else { /* dev > si->dev */
649 per_dev->offset = si->obj_offset - si->unit_off;
650 cur_len = stripe_unit;
651 }
652
Boaz Harrosh9af7db32011-08-03 21:52:51 -0700653 if (max_comp < dev - first_dev)
654 max_comp = dev - first_dev;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300655 } else {
656 cur_len = stripe_unit;
657 }
658 if (cur_len >= length)
659 cur_len = length;
660
661 ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
662 cur_len, gfp_flags);
663 if (unlikely(ret))
664 goto out;
665
666 dev += mirrors_p1;
667 dev = (dev % devs_in_group) + first_dev;
668
669 length -= cur_len;
670 ios->length += cur_len;
671 }
672out:
673 ios->numdevs = max_comp + mirrors_p1;
674 *last_pg = cur_pg;
675 return ret;
676}
677
678static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags)
679{
Boaz Harrosh96218552011-10-31 14:47:32 -0700680 u64 length = ios->count;
681 u64 offset = ios->offset;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300682 struct _striping_info si;
683 unsigned last_pg = 0;
684 int ret = 0;
685
686 while (length) {
687 _calc_stripe_info(ios, offset, &si);
688
689 if (length < si.group_length)
690 si.group_length = length;
691
692 ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags);
693 if (unlikely(ret))
694 goto out;
695
696 offset += si.group_length;
697 length -= si.group_length;
698 }
699
700out:
701 if (!ios->length)
702 return ret;
703
704 return 0;
705}
706
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700707static int _sync_done(struct objio_state *ios)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300708{
709 struct completion *waiting = ios->private;
710
711 complete(waiting);
712 return 0;
713}
714
715static void _last_io(struct kref *kref)
716{
717 struct objio_state *ios = container_of(kref, struct objio_state, kref);
718
719 ios->done(ios);
720}
721
722static void _done_io(struct osd_request *or, void *p)
723{
724 struct objio_state *ios = p;
725
726 kref_put(&ios->kref, _last_io);
727}
728
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700729static int _io_exec(struct objio_state *ios)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300730{
731 DECLARE_COMPLETION_ONSTACK(wait);
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700732 int ret = 0;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300733 unsigned i;
734 objio_done_fn saved_done_fn = ios->done;
Boaz Harrosh96218552011-10-31 14:47:32 -0700735 bool sync = ios->sync;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300736
737 if (sync) {
738 ios->done = _sync_done;
739 ios->private = &wait;
740 }
741
742 kref_init(&ios->kref);
743
744 for (i = 0; i < ios->numdevs; i++) {
745 struct osd_request *or = ios->per_dev[i].or;
746
747 if (!or)
748 continue;
749
750 kref_get(&ios->kref);
751 osd_execute_request_async(or, _done_io, ios);
752 }
753
754 kref_put(&ios->kref, _last_io);
755
756 if (sync) {
757 wait_for_completion(&wait);
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700758 ret = saved_done_fn(ios);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300759 }
760
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700761 return ret;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300762}
Boaz Harrosheecfc632011-10-31 15:15:38 -0700763#endif
Boaz Harrosh04f83452011-05-22 19:52:19 +0300764
765/*
766 * read
767 */
Boaz Harrosheecfc632011-10-31 15:15:38 -0700768static void _read_done(struct ore_io_state *ios, void *private)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300769{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700770 struct objio_state *objios = private;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300771 ssize_t status;
Boaz Harrosheecfc632011-10-31 15:15:38 -0700772 int ret = ore_check_io(ios, &__on_dev_error);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300773
Boaz Harrosheecfc632011-10-31 15:15:38 -0700774 /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
Boaz Harrosh04f83452011-05-22 19:52:19 +0300775
776 if (likely(!ret))
777 status = ios->length;
778 else
779 status = ret;
780
Boaz Harrosheecfc632011-10-31 15:15:38 -0700781 objlayout_read_done(&objios->oir, status, objios->sync);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300782}
783
Boaz Harrosheecfc632011-10-31 15:15:38 -0700784#if 0
Boaz Harrosh04f83452011-05-22 19:52:19 +0300785static int _read_mirrors(struct objio_state *ios, unsigned cur_comp)
786{
787 struct osd_request *or = NULL;
788 struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
789 unsigned dev = per_dev->dev;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700790 struct ore_comp *cred =
791 &ios->oc->comps[cur_comp];
792 struct osd_obj_id obj = cred->obj;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300793 int ret;
794
795 or = osd_start_request(_io_od(ios, dev), GFP_KERNEL);
796 if (unlikely(!or)) {
797 ret = -ENOMEM;
798 goto err;
799 }
800 per_dev->or = or;
801
802 osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length);
803
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700804 ret = osd_finalize_request(or, 0, cred->cred, NULL);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300805 if (ret) {
806 dprintk("%s: Faild to osd_finalize_request() => %d\n",
807 __func__, ret);
808 goto err;
809 }
810
811 dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
812 __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
813 per_dev->length);
814
815err:
816 return ret;
817}
818
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700819static int _read_exec(struct objio_state *ios)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300820{
821 unsigned i;
822 int ret;
823
824 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
825 if (!ios->per_dev[i].length)
826 continue;
827 ret = _read_mirrors(ios, i);
828 if (unlikely(ret))
829 goto err;
830 }
831
832 ios->done = _read_done;
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700833 return _io_exec(ios);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300834
835err:
836 _io_free(ios);
837 return ret;
838}
Boaz Harrosheecfc632011-10-31 15:15:38 -0700839#endif
Boaz Harrosh04f83452011-05-22 19:52:19 +0300840
Boaz Harrosh96218552011-10-31 14:47:32 -0700841int objio_read_pagelist(struct nfs_read_data *rdata)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300842{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700843 struct objio_state *objios;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300844 int ret;
845
Boaz Harrosheecfc632011-10-31 15:15:38 -0700846 ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
Boaz Harrosh96218552011-10-31 14:47:32 -0700847 rdata->lseg, rdata->args.pages, rdata->args.pgbase,
848 rdata->args.offset, rdata->args.count, rdata,
Boaz Harrosheecfc632011-10-31 15:15:38 -0700849 GFP_KERNEL, &objios);
Boaz Harrosh96218552011-10-31 14:47:32 -0700850 if (unlikely(ret))
851 return ret;
852
Boaz Harrosheecfc632011-10-31 15:15:38 -0700853 objios->ios->done = _read_done;
854 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
855 rdata->args.offset, rdata->args.count);
856 return ore_read(objios->ios);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300857}
858
859/*
860 * write
861 */
Boaz Harrosheecfc632011-10-31 15:15:38 -0700862static void _write_done(struct ore_io_state *ios, void *private)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300863{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700864 struct objio_state *objios = private;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300865 ssize_t status;
Boaz Harrosheecfc632011-10-31 15:15:38 -0700866 int ret = ore_check_io(ios, &__on_dev_error);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300867
Boaz Harrosheecfc632011-10-31 15:15:38 -0700868 /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
Boaz Harrosh04f83452011-05-22 19:52:19 +0300869
870 if (likely(!ret)) {
871 /* FIXME: should be based on the OSD's persistence model
872 * See OSD2r05 Section 4.13 Data persistence model */
Boaz Harrosheecfc632011-10-31 15:15:38 -0700873 objios->oir.committed = NFS_FILE_SYNC;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300874 status = ios->length;
875 } else {
876 status = ret;
877 }
878
Boaz Harrosheecfc632011-10-31 15:15:38 -0700879 objlayout_write_done(&objios->oir, status, objios->sync);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300880}
881
Boaz Harrosheecfc632011-10-31 15:15:38 -0700882#if 0
Boaz Harrosh04f83452011-05-22 19:52:19 +0300883static int _write_mirrors(struct objio_state *ios, unsigned cur_comp)
884{
885 struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp];
886 unsigned dev = ios->per_dev[cur_comp].dev;
887 unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
888 int ret;
889
890 for (; cur_comp < last_comp; ++cur_comp, ++dev) {
891 struct osd_request *or = NULL;
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700892 struct ore_comp *cred = &ios->oc->comps[cur_comp];
893 struct osd_obj_id obj = cred->obj;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300894 struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp];
895 struct bio *bio;
896
897 or = osd_start_request(_io_od(ios, dev), GFP_NOFS);
898 if (unlikely(!or)) {
899 ret = -ENOMEM;
900 goto err;
901 }
902 per_dev->or = or;
903
904 if (per_dev != master_dev) {
905 bio = bio_kmalloc(GFP_NOFS,
906 master_dev->bio->bi_max_vecs);
907 if (unlikely(!bio)) {
908 dprintk("Faild to allocate BIO size=%u\n",
909 master_dev->bio->bi_max_vecs);
910 ret = -ENOMEM;
911 goto err;
912 }
913
914 __bio_clone(bio, master_dev->bio);
915 bio->bi_bdev = NULL;
916 bio->bi_next = NULL;
917 per_dev->bio = bio;
918 per_dev->dev = dev;
919 per_dev->length = master_dev->length;
920 per_dev->offset = master_dev->offset;
921 } else {
922 bio = master_dev->bio;
923 bio->bi_rw |= REQ_WRITE;
924 }
925
926 osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length);
927
Boaz Harroshaf4f5b52011-10-31 15:04:19 -0700928 ret = osd_finalize_request(or, 0, cred->cred, NULL);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300929 if (ret) {
930 dprintk("%s: Faild to osd_finalize_request() => %d\n",
931 __func__, ret);
932 goto err;
933 }
934
935 dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n",
936 __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset),
937 per_dev->length);
938 }
939
940err:
941 return ret;
942}
943
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700944static int _write_exec(struct objio_state *ios)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300945{
946 unsigned i;
947 int ret;
948
949 for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
950 if (!ios->per_dev[i].length)
951 continue;
952 ret = _write_mirrors(ios, i);
953 if (unlikely(ret))
954 goto err;
955 }
956
957 ios->done = _write_done;
Boaz Harroshe6c40fe2011-10-31 14:45:46 -0700958 return _io_exec(ios);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300959
960err:
961 _io_free(ios);
962 return ret;
963}
Boaz Harrosheecfc632011-10-31 15:15:38 -0700964#endif
Boaz Harrosh04f83452011-05-22 19:52:19 +0300965
Boaz Harrosh96218552011-10-31 14:47:32 -0700966int objio_write_pagelist(struct nfs_write_data *wdata, int how)
Boaz Harrosh04f83452011-05-22 19:52:19 +0300967{
Boaz Harrosheecfc632011-10-31 15:15:38 -0700968 struct objio_state *objios;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300969 int ret;
970
Boaz Harrosheecfc632011-10-31 15:15:38 -0700971 ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
Boaz Harrosh96218552011-10-31 14:47:32 -0700972 wdata->lseg, wdata->args.pages, wdata->args.pgbase,
973 wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
Boaz Harrosheecfc632011-10-31 15:15:38 -0700974 &objios);
Boaz Harrosh96218552011-10-31 14:47:32 -0700975 if (unlikely(ret))
976 return ret;
977
Boaz Harrosheecfc632011-10-31 15:15:38 -0700978 objios->sync = 0 != (how & FLUSH_SYNC);
Boaz Harrosh96218552011-10-31 14:47:32 -0700979
Boaz Harrosheecfc632011-10-31 15:15:38 -0700980 if (!objios->sync)
981 objios->ios->done = _write_done;
982
983 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
984 wdata->args.offset, wdata->args.count);
985 ret = ore_write(objios->ios);
Boaz Harrosh04f83452011-05-22 19:52:19 +0300986 if (unlikely(ret))
987 return ret;
988
Boaz Harrosheecfc632011-10-31 15:15:38 -0700989 if (objios->sync)
990 _write_done(objios->ios, objios);
991
992 return 0;
Boaz Harrosh04f83452011-05-22 19:52:19 +0300993}
994
Boaz Harrosh93420772011-05-25 21:25:29 +0300995static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
996 struct nfs_page *prev, struct nfs_page *req)
997{
998 if (!pnfs_generic_pg_test(pgio, prev, req))
999 return false;
1000
1001 return pgio->pg_count + req->wb_bytes <=
Boaz Harroshaf4f5b52011-10-31 15:04:19 -07001002 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
Boaz Harrosh93420772011-05-25 21:25:29 +03001003}
1004
Trond Myklebust1751c362011-06-10 13:30:23 -04001005static const struct nfs_pageio_ops objio_pg_read_ops = {
Trond Myklebustd8007d42011-06-10 13:30:23 -04001006 .pg_init = pnfs_generic_pg_init_read,
Trond Myklebust1751c362011-06-10 13:30:23 -04001007 .pg_test = objio_pg_test,
Trond Myklebust493292d2011-07-13 15:58:28 -04001008 .pg_doio = pnfs_generic_pg_readpages,
Trond Myklebust1751c362011-06-10 13:30:23 -04001009};
1010
1011static const struct nfs_pageio_ops objio_pg_write_ops = {
Trond Myklebustd8007d42011-06-10 13:30:23 -04001012 .pg_init = pnfs_generic_pg_init_write,
Trond Myklebust1751c362011-06-10 13:30:23 -04001013 .pg_test = objio_pg_test,
Trond Myklebustdce81292011-07-13 15:59:19 -04001014 .pg_doio = pnfs_generic_pg_writepages,
Trond Myklebust1751c362011-06-10 13:30:23 -04001015};
1016
Benny Halevyc93407d2011-05-22 19:49:06 +03001017static struct pnfs_layoutdriver_type objlayout_type = {
1018 .id = LAYOUT_OSD2_OBJECTS,
1019 .name = "LAYOUT_OSD2_OBJECTS",
Benny Halevy8a1636c2010-07-14 15:43:57 -04001020 .flags = PNFS_LAYOUTRET_ON_SETATTR,
Boaz Harrosh09f5bf42011-05-22 19:50:20 +03001021
Benny Halevye51b8412011-05-22 19:51:48 +03001022 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
1023 .free_layout_hdr = objlayout_free_layout_hdr,
1024
Boaz Harrosh09f5bf42011-05-22 19:50:20 +03001025 .alloc_lseg = objlayout_alloc_lseg,
1026 .free_lseg = objlayout_free_lseg,
Boaz Harroshb6c05f12011-05-26 21:45:34 +03001027
Boaz Harrosh04f83452011-05-22 19:52:19 +03001028 .read_pagelist = objlayout_read_pagelist,
1029 .write_pagelist = objlayout_write_pagelist,
Trond Myklebust1751c362011-06-10 13:30:23 -04001030 .pg_read_ops = &objio_pg_read_ops,
1031 .pg_write_ops = &objio_pg_write_ops,
Boaz Harrosh04f83452011-05-22 19:52:19 +03001032
Boaz Harroshb6c05f12011-05-26 21:45:34 +03001033 .free_deviceid_node = objio_free_deviceid_node,
Boaz Harroshadb58532011-05-26 21:49:46 +03001034
Boaz Harrosha0fe8bf2011-05-22 19:54:13 +03001035 .encode_layoutcommit = objlayout_encode_layoutcommit,
Boaz Harroshadb58532011-05-26 21:49:46 +03001036 .encode_layoutreturn = objlayout_encode_layoutreturn,
Benny Halevyc93407d2011-05-22 19:49:06 +03001037};
1038
1039MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
1040MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
1041MODULE_LICENSE("GPL");
1042
1043static int __init
1044objlayout_init(void)
1045{
1046 int ret = pnfs_register_layoutdriver(&objlayout_type);
1047
1048 if (ret)
1049 printk(KERN_INFO
1050 "%s: Registering OSD pNFS Layout Driver failed: error=%d\n",
1051 __func__, ret);
1052 else
1053 printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n",
1054 __func__);
1055 return ret;
1056}
1057
1058static void __exit
1059objlayout_exit(void)
1060{
1061 pnfs_unregister_layoutdriver(&objlayout_type);
1062 printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n",
1063 __func__);
1064}
1065
J. Bruce Fieldsf85ef692011-07-15 19:18:42 -04001066MODULE_ALIAS("nfs-layouttype4-2");
1067
Benny Halevyc93407d2011-05-22 19:49:06 +03001068module_init(objlayout_init);
1069module_exit(objlayout_exit);