Benny Halevy | c93407d | 2011-05-22 19:49:06 +0300 | [diff] [blame] | 1 | /* |
| 2 | * pNFS Objects layout implementation over open-osd initiator library |
| 3 | * |
| 4 | * Copyright (C) 2009 Panasas Inc. [year of first publication] |
| 5 | * All rights reserved. |
| 6 | * |
| 7 | * Benny Halevy <bhalevy@panasas.com> |
| 8 | * Boaz Harrosh <bharrosh@panasas.com> |
| 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify |
| 11 | * it under the terms of the GNU General Public License version 2 |
| 12 | * See the file COPYING included with this distribution for more details. |
| 13 | * |
| 14 | * Redistribution and use in source and binary forms, with or without |
| 15 | * modification, are permitted provided that the following conditions |
| 16 | * are met: |
| 17 | * |
| 18 | * 1. Redistributions of source code must retain the above copyright |
| 19 | * notice, this list of conditions and the following disclaimer. |
| 20 | * 2. Redistributions in binary form must reproduce the above copyright |
| 21 | * notice, this list of conditions and the following disclaimer in the |
| 22 | * documentation and/or other materials provided with the distribution. |
| 23 | * 3. Neither the name of the Panasas company nor the names of its |
| 24 | * contributors may be used to endorse or promote products derived |
| 25 | * from this software without specific prior written permission. |
| 26 | * |
| 27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED |
| 28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
| 29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
| 34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 38 | */ |
| 39 | |
| 40 | #include <linux/module.h> |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 41 | #include <scsi/osd_ore.h> |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 42 | |
| 43 | #include "objlayout.h" |
| 44 | |
| 45 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
| 46 | |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 47 | struct objio_dev_ent { |
| 48 | struct nfs4_deviceid_node id_node; |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 49 | struct ore_dev od; |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 50 | }; |
| 51 | |
| 52 | static void |
| 53 | objio_free_deviceid_node(struct nfs4_deviceid_node *d) |
| 54 | { |
| 55 | struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); |
| 56 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 57 | dprintk("%s: free od=%p\n", __func__, de->od.od); |
| 58 | osduld_put_device(de->od.od); |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 59 | kfree(de); |
| 60 | } |
| 61 | |
| 62 | static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, |
| 63 | const struct nfs4_deviceid *d_id) |
| 64 | { |
| 65 | struct nfs4_deviceid_node *d; |
| 66 | struct objio_dev_ent *de; |
| 67 | |
| 68 | d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); |
| 69 | if (!d) |
| 70 | return NULL; |
| 71 | |
| 72 | de = container_of(d, struct objio_dev_ent, id_node); |
| 73 | return de; |
| 74 | } |
| 75 | |
| 76 | static struct objio_dev_ent * |
| 77 | _dev_list_add(const struct nfs_server *nfss, |
| 78 | const struct nfs4_deviceid *d_id, struct osd_dev *od, |
| 79 | gfp_t gfp_flags) |
| 80 | { |
| 81 | struct nfs4_deviceid_node *d; |
| 82 | struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); |
| 83 | struct objio_dev_ent *n; |
| 84 | |
| 85 | if (!de) { |
| 86 | dprintk("%s: -ENOMEM od=%p\n", __func__, od); |
| 87 | return NULL; |
| 88 | } |
| 89 | |
| 90 | dprintk("%s: Adding od=%p\n", __func__, od); |
| 91 | nfs4_init_deviceid_node(&de->id_node, |
| 92 | nfss->pnfs_curr_ld, |
| 93 | nfss->nfs_client, |
| 94 | d_id); |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 95 | de->od.od = od; |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 96 | |
| 97 | d = nfs4_insert_deviceid_node(&de->id_node); |
| 98 | n = container_of(d, struct objio_dev_ent, id_node); |
| 99 | if (n != de) { |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 100 | dprintk("%s: Race with other n->od=%p\n", __func__, n->od.od); |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 101 | objio_free_deviceid_node(&de->id_node); |
| 102 | de = n; |
| 103 | } |
| 104 | |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 105 | return de; |
| 106 | } |
| 107 | |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 108 | struct objio_segment { |
| 109 | struct pnfs_layout_segment lseg; |
| 110 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 111 | struct ore_layout layout; |
| 112 | struct ore_components oc; |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 113 | }; |
| 114 | |
| 115 | static inline struct objio_segment * |
| 116 | OBJIO_LSEG(struct pnfs_layout_segment *lseg) |
| 117 | { |
| 118 | return container_of(lseg, struct objio_segment, lseg); |
| 119 | } |
| 120 | |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 121 | struct objio_state { |
| 122 | /* Generic layer */ |
Boaz Harrosh | e2e0435 | 2011-10-31 15:03:35 -0700 | [diff] [blame] | 123 | struct objlayout_io_res oir; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 124 | |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 125 | bool sync; |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 126 | /*FIXME: Support for extra_bytes at ore_get_rw_state() */ |
| 127 | struct ore_io_state *ios; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 128 | }; |
| 129 | |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 130 | /* Send and wait for a get_device_info of devices in the layout, |
| 131 | then look them up with the osd_initiator library */ |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 132 | static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, |
| 133 | struct objio_segment *objio_seg, unsigned c, struct nfs4_deviceid *d_id, |
| 134 | gfp_t gfp_flags) |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 135 | { |
| 136 | struct pnfs_osd_deviceaddr *deviceaddr; |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 137 | struct objio_dev_ent *ode; |
| 138 | struct osd_dev *od; |
| 139 | struct osd_dev_info odi; |
| 140 | int err; |
| 141 | |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 142 | ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 143 | if (ode) { |
| 144 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ |
| 145 | return 0; |
| 146 | } |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 147 | |
| 148 | err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); |
| 149 | if (unlikely(err)) { |
| 150 | dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", |
| 151 | __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 152 | return err; |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 153 | } |
| 154 | |
| 155 | odi.systemid_len = deviceaddr->oda_systemid.len; |
| 156 | if (odi.systemid_len > sizeof(odi.systemid)) { |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 157 | dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n", |
| 158 | __func__, sizeof(odi.systemid)); |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 159 | err = -EINVAL; |
| 160 | goto out; |
| 161 | } else if (odi.systemid_len) |
| 162 | memcpy(odi.systemid, deviceaddr->oda_systemid.data, |
| 163 | odi.systemid_len); |
| 164 | odi.osdname_len = deviceaddr->oda_osdname.len; |
| 165 | odi.osdname = (u8 *)deviceaddr->oda_osdname.data; |
| 166 | |
| 167 | if (!odi.osdname_len && !odi.systemid_len) { |
| 168 | dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", |
| 169 | __func__); |
| 170 | err = -ENODEV; |
| 171 | goto out; |
| 172 | } |
| 173 | |
| 174 | od = osduld_info_lookup(&odi); |
| 175 | if (unlikely(IS_ERR(od))) { |
| 176 | err = PTR_ERR(od); |
| 177 | dprintk("%s: osduld_info_lookup => %d\n", __func__, err); |
| 178 | goto out; |
| 179 | } |
| 180 | |
| 181 | ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, |
| 182 | gfp_flags); |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 183 | objio_seg->oc.ods[c] = &ode->od; /* must use container_of */ |
| 184 | dprintk("Adding new dev_id(%llx:%llx)\n", |
| 185 | _DEVID_LO(d_id), _DEVID_HI(d_id)); |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 186 | out: |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 187 | objlayout_put_deviceinfo(deviceaddr); |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 188 | return err; |
| 189 | } |
| 190 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 191 | static void copy_single_comp(struct ore_components *oc, unsigned c, |
| 192 | struct pnfs_osd_object_cred *src_comp) |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 193 | { |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 194 | struct ore_comp *ocomp = &oc->comps[c]; |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 195 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 196 | WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */ |
| 197 | WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred)); |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 198 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 199 | ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id; |
| 200 | ocomp->obj.id = src_comp->oc_object_id.oid_object_id; |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 201 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 202 | memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred)); |
| 203 | } |
| 204 | |
| 205 | int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, |
| 206 | struct objio_segment **pseg) |
| 207 | { |
| 208 | struct __alloc_objio_segment { |
| 209 | struct objio_segment olseg; |
| 210 | struct ore_dev *ods[numdevs]; |
| 211 | struct ore_comp comps[numdevs]; |
| 212 | } *aolseg; |
| 213 | |
| 214 | aolseg = kzalloc(sizeof(*aolseg), gfp_flags); |
| 215 | if (unlikely(!aolseg)) { |
| 216 | dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, |
| 217 | numdevs, sizeof(*aolseg)); |
| 218 | return -ENOMEM; |
| 219 | } |
| 220 | |
| 221 | aolseg->olseg.oc.numdevs = numdevs; |
| 222 | aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; |
| 223 | aolseg->olseg.oc.comps = aolseg->comps; |
| 224 | aolseg->olseg.oc.ods = aolseg->ods; |
| 225 | |
| 226 | *pseg = &aolseg->olseg; |
| 227 | return 0; |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 228 | } |
| 229 | |
| 230 | int objio_alloc_lseg(struct pnfs_layout_segment **outp, |
| 231 | struct pnfs_layout_hdr *pnfslay, |
| 232 | struct pnfs_layout_range *range, |
| 233 | struct xdr_stream *xdr, |
| 234 | gfp_t gfp_flags) |
| 235 | { |
| 236 | struct objio_segment *objio_seg; |
| 237 | struct pnfs_osd_xdr_decode_layout_iter iter; |
| 238 | struct pnfs_osd_layout layout; |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 239 | struct pnfs_osd_object_cred src_comp; |
| 240 | unsigned cur_comp; |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 241 | int err; |
| 242 | |
| 243 | err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); |
| 244 | if (unlikely(err)) |
| 245 | return err; |
| 246 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 247 | err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg); |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 248 | if (unlikely(err)) |
| 249 | return err; |
| 250 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 251 | objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit; |
| 252 | objio_seg->layout.group_width = layout.olo_map.odm_group_width; |
| 253 | objio_seg->layout.group_depth = layout.olo_map.odm_group_depth; |
| 254 | objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; |
| 255 | objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm; |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 256 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 257 | err = ore_verify_layout(layout.olo_map.odm_num_comps, |
| 258 | &objio_seg->layout); |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 259 | if (unlikely(err)) |
| 260 | goto err; |
| 261 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 262 | objio_seg->oc.first_dev = layout.olo_comps_index; |
| 263 | cur_comp = 0; |
| 264 | while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) { |
| 265 | copy_single_comp(&objio_seg->oc, cur_comp, &src_comp); |
| 266 | err = objio_devices_lookup(pnfslay, objio_seg, cur_comp, |
| 267 | &src_comp.oc_object_id.oid_device_id, |
| 268 | gfp_flags); |
| 269 | if (err) |
| 270 | goto err; |
| 271 | ++cur_comp; |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 272 | } |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 273 | /* pnfs_osd_xdr_decode_layout_comp returns false on error */ |
| 274 | if (unlikely(err)) |
| 275 | goto err; |
Boaz Harrosh | 9342077 | 2011-05-25 21:25:29 +0300 | [diff] [blame] | 276 | |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 277 | *outp = &objio_seg->lseg; |
| 278 | return 0; |
| 279 | |
| 280 | err: |
| 281 | kfree(objio_seg); |
| 282 | dprintk("%s: Error: return %d\n", __func__, err); |
| 283 | *outp = NULL; |
| 284 | return err; |
| 285 | } |
| 286 | |
| 287 | void objio_free_lseg(struct pnfs_layout_segment *lseg) |
| 288 | { |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 289 | int i; |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 290 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
| 291 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 292 | for (i = 0; i < objio_seg->oc.numdevs; i++) { |
| 293 | struct ore_dev *od = objio_seg->oc.ods[i]; |
| 294 | struct objio_dev_ent *ode; |
| 295 | |
| 296 | if (!od) |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 297 | break; |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 298 | ode = container_of(od, typeof(*ode), od); |
| 299 | nfs4_put_deviceid_node(&ode->id_node); |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 300 | } |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 301 | kfree(objio_seg); |
| 302 | } |
| 303 | |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 304 | static int |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 305 | objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading, |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 306 | struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase, |
| 307 | loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags, |
| 308 | struct objio_state **outp) |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 309 | { |
| 310 | struct objio_segment *objio_seg = OBJIO_LSEG(lseg); |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 311 | struct ore_io_state *ios; |
| 312 | int ret; |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 313 | struct __alloc_objio_state { |
| 314 | struct objio_state objios; |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 315 | struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs]; |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 316 | } *aos; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 317 | |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 318 | aos = kzalloc(sizeof(*aos), gfp_flags); |
| 319 | if (unlikely(!aos)) |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 320 | return -ENOMEM; |
| 321 | |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 322 | objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs, |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 323 | aos->ioerrs, rpcdata, pnfs_layout_type); |
| 324 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 325 | ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading, |
| 326 | offset, count, &ios); |
| 327 | if (unlikely(ret)) { |
| 328 | kfree(aos); |
| 329 | return ret; |
| 330 | } |
| 331 | |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 332 | ios->pages = pages; |
| 333 | ios->pgbase = pgbase; |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 334 | ios->private = aos; |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 335 | BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT); |
| 336 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 337 | aos->objios.sync = 0; |
| 338 | aos->objios.ios = ios; |
| 339 | *outp = &aos->objios; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 340 | return 0; |
| 341 | } |
| 342 | |
Boaz Harrosh | e2e0435 | 2011-10-31 15:03:35 -0700 | [diff] [blame] | 343 | void objio_free_result(struct objlayout_io_res *oir) |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 344 | { |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 345 | struct objio_state *objios = container_of(oir, struct objio_state, oir); |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 346 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 347 | ore_put_io_state(objios->ios); |
| 348 | kfree(objios); |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 349 | } |
| 350 | |
Boaz Harrosh | adb5853 | 2011-05-26 21:49:46 +0300 | [diff] [blame] | 351 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) |
| 352 | { |
| 353 | switch (oep) { |
| 354 | case OSD_ERR_PRI_NO_ERROR: |
| 355 | return (enum pnfs_osd_errno)0; |
| 356 | |
| 357 | case OSD_ERR_PRI_CLEAR_PAGES: |
| 358 | BUG_ON(1); |
| 359 | return 0; |
| 360 | |
| 361 | case OSD_ERR_PRI_RESOURCE: |
| 362 | return PNFS_OSD_ERR_RESOURCE; |
| 363 | case OSD_ERR_PRI_BAD_CRED: |
| 364 | return PNFS_OSD_ERR_BAD_CRED; |
| 365 | case OSD_ERR_PRI_NO_ACCESS: |
| 366 | return PNFS_OSD_ERR_NO_ACCESS; |
| 367 | case OSD_ERR_PRI_UNREACHABLE: |
| 368 | return PNFS_OSD_ERR_UNREACHABLE; |
| 369 | case OSD_ERR_PRI_NOT_FOUND: |
| 370 | return PNFS_OSD_ERR_NOT_FOUND; |
| 371 | case OSD_ERR_PRI_NO_SPACE: |
| 372 | return PNFS_OSD_ERR_NO_SPACE; |
| 373 | default: |
| 374 | WARN_ON(1); |
| 375 | /* fallthrough */ |
| 376 | case OSD_ERR_PRI_EIO: |
| 377 | return PNFS_OSD_ERR_EIO; |
| 378 | } |
| 379 | } |
| 380 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 381 | static void __on_dev_error(struct ore_io_state *ios, |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 382 | struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep, |
| 383 | u64 dev_offset, u64 dev_len) |
| 384 | { |
| 385 | struct objio_state *objios = ios->private; |
| 386 | struct pnfs_osd_objid pooid; |
| 387 | struct objio_dev_ent *ode = container_of(od, typeof(*ode), od); |
| 388 | /* FIXME: what to do with more-then-one-group layouts. We need to |
| 389 | * translate from ore_io_state index to oc->comps index |
| 390 | */ |
| 391 | unsigned comp = dev_index; |
| 392 | |
| 393 | pooid.oid_device_id = ode->id_node.deviceid; |
| 394 | pooid.oid_partition_id = ios->oc->comps[comp].obj.partition; |
| 395 | pooid.oid_object_id = ios->oc->comps[comp].obj.id; |
| 396 | |
| 397 | objlayout_io_set_result(&objios->oir, comp, |
| 398 | &pooid, osd_pri_2_pnfs_err(oep), |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 399 | dev_offset, dev_len, !ios->reading); |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 400 | } |
| 401 | |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 402 | /* |
| 403 | * read |
| 404 | */ |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 405 | static void _read_done(struct ore_io_state *ios, void *private) |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 406 | { |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 407 | struct objio_state *objios = private; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 408 | ssize_t status; |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 409 | int ret = ore_check_io(ios, &__on_dev_error); |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 410 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 411 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 412 | |
| 413 | if (likely(!ret)) |
| 414 | status = ios->length; |
| 415 | else |
| 416 | status = ret; |
| 417 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 418 | objlayout_read_done(&objios->oir, status, objios->sync); |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 419 | } |
| 420 | |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 421 | int objio_read_pagelist(struct nfs_read_data *rdata) |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 422 | { |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 423 | struct objio_state *objios; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 424 | int ret; |
| 425 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 426 | ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true, |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 427 | rdata->lseg, rdata->args.pages, rdata->args.pgbase, |
| 428 | rdata->args.offset, rdata->args.count, rdata, |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 429 | GFP_KERNEL, &objios); |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 430 | if (unlikely(ret)) |
| 431 | return ret; |
| 432 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 433 | objios->ios->done = _read_done; |
| 434 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, |
| 435 | rdata->args.offset, rdata->args.count); |
| 436 | return ore_read(objios->ios); |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 437 | } |
| 438 | |
| 439 | /* |
| 440 | * write |
| 441 | */ |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 442 | static void _write_done(struct ore_io_state *ios, void *private) |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 443 | { |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 444 | struct objio_state *objios = private; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 445 | ssize_t status; |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 446 | int ret = ore_check_io(ios, &__on_dev_error); |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 447 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 448 | /* FIXME: _io_free(ios) can we dealocate the libosd resources; */ |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 449 | |
| 450 | if (likely(!ret)) { |
| 451 | /* FIXME: should be based on the OSD's persistence model |
| 452 | * See OSD2r05 Section 4.13 Data persistence model */ |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 453 | objios->oir.committed = NFS_FILE_SYNC; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 454 | status = ios->length; |
| 455 | } else { |
| 456 | status = ret; |
| 457 | } |
| 458 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 459 | objlayout_write_done(&objios->oir, status, objios->sync); |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 460 | } |
| 461 | |
Boaz Harrosh | 278c023 | 2011-10-31 15:16:54 -0700 | [diff] [blame] | 462 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) |
| 463 | { |
| 464 | struct objio_state *objios = priv; |
| 465 | struct nfs_write_data *wdata = objios->oir.rpcdata; |
| 466 | pgoff_t index = offset / PAGE_SIZE; |
| 467 | struct page *page = find_get_page(wdata->inode->i_mapping, index); |
| 468 | |
| 469 | if (!page) { |
| 470 | page = find_or_create_page(wdata->inode->i_mapping, |
| 471 | index, GFP_NOFS); |
| 472 | if (unlikely(!page)) { |
| 473 | dprintk("%s: grab_cache_page Failed index=0x%lx\n", |
| 474 | __func__, index); |
| 475 | return NULL; |
| 476 | } |
| 477 | unlock_page(page); |
| 478 | } |
| 479 | if (PageDirty(page) || PageWriteback(page)) |
| 480 | *uptodate = true; |
| 481 | else |
| 482 | *uptodate = PageUptodate(page); |
| 483 | dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); |
| 484 | return page; |
| 485 | } |
| 486 | |
| 487 | static void __r4w_put_page(void *priv, struct page *page) |
| 488 | { |
| 489 | dprintk("%s: index=0x%lx\n", __func__, page->index); |
| 490 | page_cache_release(page); |
| 491 | return; |
| 492 | } |
| 493 | |
| 494 | static const struct _ore_r4w_op _r4w_op = { |
| 495 | .get_page = &__r4w_get_page, |
| 496 | .put_page = &__r4w_put_page, |
| 497 | }; |
| 498 | |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 499 | int objio_write_pagelist(struct nfs_write_data *wdata, int how) |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 500 | { |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 501 | struct objio_state *objios; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 502 | int ret; |
| 503 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 504 | ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false, |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 505 | wdata->lseg, wdata->args.pages, wdata->args.pgbase, |
| 506 | wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 507 | &objios); |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 508 | if (unlikely(ret)) |
| 509 | return ret; |
| 510 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 511 | objios->sync = 0 != (how & FLUSH_SYNC); |
Boaz Harrosh | 278c023 | 2011-10-31 15:16:54 -0700 | [diff] [blame] | 512 | objios->ios->r4w = &_r4w_op; |
Boaz Harrosh | 9621855 | 2011-10-31 14:47:32 -0700 | [diff] [blame] | 513 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 514 | if (!objios->sync) |
| 515 | objios->ios->done = _write_done; |
| 516 | |
| 517 | dprintk("%s: offset=0x%llx length=0x%x\n", __func__, |
| 518 | wdata->args.offset, wdata->args.count); |
| 519 | ret = ore_write(objios->ios); |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 520 | if (unlikely(ret)) |
| 521 | return ret; |
| 522 | |
Boaz Harrosh | eecfc63 | 2011-10-31 15:15:38 -0700 | [diff] [blame] | 523 | if (objios->sync) |
| 524 | _write_done(objios->ios, objios); |
| 525 | |
| 526 | return 0; |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 527 | } |
| 528 | |
Boaz Harrosh | 9342077 | 2011-05-25 21:25:29 +0300 | [diff] [blame] | 529 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, |
| 530 | struct nfs_page *prev, struct nfs_page *req) |
| 531 | { |
| 532 | if (!pnfs_generic_pg_test(pgio, prev, req)) |
| 533 | return false; |
| 534 | |
| 535 | return pgio->pg_count + req->wb_bytes <= |
Boaz Harrosh | af4f5b5 | 2011-10-31 15:04:19 -0700 | [diff] [blame] | 536 | OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length; |
Boaz Harrosh | 9342077 | 2011-05-25 21:25:29 +0300 | [diff] [blame] | 537 | } |
| 538 | |
Trond Myklebust | 1751c36 | 2011-06-10 13:30:23 -0400 | [diff] [blame] | 539 | static const struct nfs_pageio_ops objio_pg_read_ops = { |
Trond Myklebust | d8007d4 | 2011-06-10 13:30:23 -0400 | [diff] [blame] | 540 | .pg_init = pnfs_generic_pg_init_read, |
Trond Myklebust | 1751c36 | 2011-06-10 13:30:23 -0400 | [diff] [blame] | 541 | .pg_test = objio_pg_test, |
Trond Myklebust | 493292d | 2011-07-13 15:58:28 -0400 | [diff] [blame] | 542 | .pg_doio = pnfs_generic_pg_readpages, |
Trond Myklebust | 1751c36 | 2011-06-10 13:30:23 -0400 | [diff] [blame] | 543 | }; |
| 544 | |
| 545 | static const struct nfs_pageio_ops objio_pg_write_ops = { |
Trond Myklebust | d8007d4 | 2011-06-10 13:30:23 -0400 | [diff] [blame] | 546 | .pg_init = pnfs_generic_pg_init_write, |
Trond Myklebust | 1751c36 | 2011-06-10 13:30:23 -0400 | [diff] [blame] | 547 | .pg_test = objio_pg_test, |
Trond Myklebust | dce8129 | 2011-07-13 15:59:19 -0400 | [diff] [blame] | 548 | .pg_doio = pnfs_generic_pg_writepages, |
Trond Myklebust | 1751c36 | 2011-06-10 13:30:23 -0400 | [diff] [blame] | 549 | }; |
| 550 | |
Benny Halevy | c93407d | 2011-05-22 19:49:06 +0300 | [diff] [blame] | 551 | static struct pnfs_layoutdriver_type objlayout_type = { |
| 552 | .id = LAYOUT_OSD2_OBJECTS, |
| 553 | .name = "LAYOUT_OSD2_OBJECTS", |
Boaz Harrosh | fe0fe83 | 2012-01-06 09:31:20 +0200 | [diff] [blame] | 554 | .flags = PNFS_LAYOUTRET_ON_SETATTR | |
| 555 | PNFS_LAYOUTRET_ON_ERROR, |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 556 | |
Benny Halevy | e51b841 | 2011-05-22 19:51:48 +0300 | [diff] [blame] | 557 | .alloc_layout_hdr = objlayout_alloc_layout_hdr, |
| 558 | .free_layout_hdr = objlayout_free_layout_hdr, |
| 559 | |
Boaz Harrosh | 09f5bf4 | 2011-05-22 19:50:20 +0300 | [diff] [blame] | 560 | .alloc_lseg = objlayout_alloc_lseg, |
| 561 | .free_lseg = objlayout_free_lseg, |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 562 | |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 563 | .read_pagelist = objlayout_read_pagelist, |
| 564 | .write_pagelist = objlayout_write_pagelist, |
Trond Myklebust | 1751c36 | 2011-06-10 13:30:23 -0400 | [diff] [blame] | 565 | .pg_read_ops = &objio_pg_read_ops, |
| 566 | .pg_write_ops = &objio_pg_write_ops, |
Boaz Harrosh | 04f8345 | 2011-05-22 19:52:19 +0300 | [diff] [blame] | 567 | |
Boaz Harrosh | b6c05f1 | 2011-05-26 21:45:34 +0300 | [diff] [blame] | 568 | .free_deviceid_node = objio_free_deviceid_node, |
Boaz Harrosh | adb5853 | 2011-05-26 21:49:46 +0300 | [diff] [blame] | 569 | |
Boaz Harrosh | a0fe8bf | 2011-05-22 19:54:13 +0300 | [diff] [blame] | 570 | .encode_layoutcommit = objlayout_encode_layoutcommit, |
Boaz Harrosh | adb5853 | 2011-05-26 21:49:46 +0300 | [diff] [blame] | 571 | .encode_layoutreturn = objlayout_encode_layoutreturn, |
Benny Halevy | c93407d | 2011-05-22 19:49:06 +0300 | [diff] [blame] | 572 | }; |
| 573 | |
| 574 | MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); |
| 575 | MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>"); |
| 576 | MODULE_LICENSE("GPL"); |
| 577 | |
| 578 | static int __init |
| 579 | objlayout_init(void) |
| 580 | { |
| 581 | int ret = pnfs_register_layoutdriver(&objlayout_type); |
| 582 | |
| 583 | if (ret) |
| 584 | printk(KERN_INFO |
| 585 | "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", |
| 586 | __func__, ret); |
| 587 | else |
| 588 | printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", |
| 589 | __func__); |
| 590 | return ret; |
| 591 | } |
| 592 | |
| 593 | static void __exit |
| 594 | objlayout_exit(void) |
| 595 | { |
| 596 | pnfs_unregister_layoutdriver(&objlayout_type); |
| 597 | printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", |
| 598 | __func__); |
| 599 | } |
| 600 | |
J. Bruce Fields | f85ef69 | 2011-07-15 19:18:42 -0400 | [diff] [blame] | 601 | MODULE_ALIAS("nfs-layouttype4-2"); |
| 602 | |
Benny Halevy | c93407d | 2011-05-22 19:49:06 +0300 | [diff] [blame] | 603 | module_init(objlayout_init); |
| 604 | module_exit(objlayout_exit); |