Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* volume.c: AFS volume management |
| 2 | * |
| 3 | * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. |
| 4 | * Written by David Howells (dhowells@redhat.com) |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | */ |
| 11 | |
| 12 | #include <linux/kernel.h> |
| 13 | #include <linux/module.h> |
| 14 | #include <linux/init.h> |
| 15 | #include <linux/slab.h> |
| 16 | #include <linux/fs.h> |
| 17 | #include <linux/pagemap.h> |
| 18 | #include "volume.h" |
| 19 | #include "vnode.h" |
| 20 | #include "cell.h" |
| 21 | #include "cache.h" |
| 22 | #include "cmservice.h" |
| 23 | #include "fsclient.h" |
| 24 | #include "vlclient.h" |
| 25 | #include "internal.h" |
| 26 | |
| 27 | #ifdef __KDEBUG |
| 28 | static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; |
| 29 | #endif |
| 30 | |
| 31 | #ifdef AFS_CACHING_SUPPORT |
| 32 | static cachefs_match_val_t afs_volume_cache_match(void *target, |
| 33 | const void *entry); |
| 34 | static void afs_volume_cache_update(void *source, void *entry); |
| 35 | |
| 36 | struct cachefs_index_def afs_volume_cache_index_def = { |
| 37 | .name = "volume", |
| 38 | .data_size = sizeof(struct afs_cache_vhash), |
| 39 | .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 }, |
| 40 | .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 }, |
| 41 | .match = afs_volume_cache_match, |
| 42 | .update = afs_volume_cache_update, |
| 43 | }; |
| 44 | #endif |
| 45 | |
| 46 | /*****************************************************************************/ |
| 47 | /* |
| 48 | * lookup a volume by name |
| 49 | * - this can be one of the following: |
| 50 | * "%[cell:]volume[.]" R/W volume |
| 51 | * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), |
| 52 | * or R/W (rwparent=1) volume |
| 53 | * "%[cell:]volume.readonly" R/O volume |
| 54 | * "#[cell:]volume.readonly" R/O volume |
| 55 | * "%[cell:]volume.backup" Backup volume |
| 56 | * "#[cell:]volume.backup" Backup volume |
| 57 | * |
| 58 | * The cell name is optional, and defaults to the current cell. |
| 59 | * |
| 60 | * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin |
| 61 | * Guide |
| 62 | * - Rule 1: Explicit type suffix forces access of that type or nothing |
| 63 | * (no suffix, then use Rule 2 & 3) |
| 64 | * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W |
| 65 | * if not available |
| 66 | * - Rule 3: If parent volume is R/W, then only mount R/W volume unless |
| 67 | * explicitly told otherwise |
| 68 | */ |
| 69 | int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath, |
| 70 | struct afs_volume **_volume) |
| 71 | { |
| 72 | struct afs_vlocation *vlocation = NULL; |
| 73 | struct afs_volume *volume = NULL; |
| 74 | afs_voltype_t type; |
| 75 | const char *cellname, *volname, *suffix; |
| 76 | char srvtmask; |
| 77 | int force, ret, loop, cellnamesz, volnamesz; |
| 78 | |
| 79 | _enter("%s,,%d,", name, rwpath); |
| 80 | |
| 81 | if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) { |
| 82 | printk("kAFS: unparsable volume name\n"); |
| 83 | return -EINVAL; |
| 84 | } |
| 85 | |
| 86 | /* determine the type of volume we're looking for */ |
| 87 | force = 0; |
| 88 | type = AFSVL_ROVOL; |
| 89 | |
| 90 | if (rwpath || name[0] == '%') { |
| 91 | type = AFSVL_RWVOL; |
| 92 | force = 1; |
| 93 | } |
| 94 | |
| 95 | suffix = strrchr(name, '.'); |
| 96 | if (suffix) { |
| 97 | if (strcmp(suffix, ".readonly") == 0) { |
| 98 | type = AFSVL_ROVOL; |
| 99 | force = 1; |
| 100 | } |
| 101 | else if (strcmp(suffix, ".backup") == 0) { |
| 102 | type = AFSVL_BACKVOL; |
| 103 | force = 1; |
| 104 | } |
| 105 | else if (suffix[1] == 0) { |
| 106 | } |
| 107 | else { |
| 108 | suffix = NULL; |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | /* split the cell and volume names */ |
| 113 | name++; |
| 114 | volname = strchr(name, ':'); |
| 115 | if (volname) { |
| 116 | cellname = name; |
| 117 | cellnamesz = volname - name; |
| 118 | volname++; |
| 119 | } |
| 120 | else { |
| 121 | volname = name; |
| 122 | cellname = NULL; |
| 123 | cellnamesz = 0; |
| 124 | } |
| 125 | |
| 126 | volnamesz = suffix ? suffix - volname : strlen(volname); |
| 127 | |
| 128 | _debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", |
| 129 | cellnamesz, cellnamesz, cellname ?: "", cell, |
| 130 | volnamesz, volnamesz, volname, suffix ?: "-", |
| 131 | type, |
| 132 | force ? " FORCE" : ""); |
| 133 | |
| 134 | /* lookup the cell record */ |
| 135 | if (cellname || !cell) { |
| 136 | ret = afs_cell_lookup(cellname, cellnamesz, &cell); |
| 137 | if (ret<0) { |
| 138 | printk("kAFS: unable to lookup cell '%s'\n", |
| 139 | cellname ?: ""); |
| 140 | goto error; |
| 141 | } |
| 142 | } |
| 143 | else { |
| 144 | afs_get_cell(cell); |
| 145 | } |
| 146 | |
| 147 | /* lookup the volume location record */ |
| 148 | ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation); |
| 149 | if (ret < 0) |
| 150 | goto error; |
| 151 | |
| 152 | /* make the final decision on the type we want */ |
| 153 | ret = -ENOMEDIUM; |
| 154 | if (force && !(vlocation->vldb.vidmask & (1 << type))) |
| 155 | goto error; |
| 156 | |
| 157 | srvtmask = 0; |
| 158 | for (loop = 0; loop < vlocation->vldb.nservers; loop++) |
| 159 | srvtmask |= vlocation->vldb.srvtmask[loop]; |
| 160 | |
| 161 | if (force) { |
| 162 | if (!(srvtmask & (1 << type))) |
| 163 | goto error; |
| 164 | } |
| 165 | else if (srvtmask & AFS_VOL_VTM_RO) { |
| 166 | type = AFSVL_ROVOL; |
| 167 | } |
| 168 | else if (srvtmask & AFS_VOL_VTM_RW) { |
| 169 | type = AFSVL_RWVOL; |
| 170 | } |
| 171 | else { |
| 172 | goto error; |
| 173 | } |
| 174 | |
| 175 | down_write(&cell->vl_sem); |
| 176 | |
| 177 | /* is the volume already active? */ |
| 178 | if (vlocation->vols[type]) { |
| 179 | /* yes - re-use it */ |
| 180 | volume = vlocation->vols[type]; |
| 181 | afs_get_volume(volume); |
| 182 | goto success; |
| 183 | } |
| 184 | |
| 185 | /* create a new volume record */ |
| 186 | _debug("creating new volume record"); |
| 187 | |
| 188 | ret = -ENOMEM; |
Panagiotis Issaris | f8314dc | 2006-09-27 01:49:37 -0700 | [diff] [blame] | 189 | volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 190 | if (!volume) |
| 191 | goto error_up; |
| 192 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 193 | atomic_set(&volume->usage, 1); |
| 194 | volume->type = type; |
| 195 | volume->type_force = force; |
| 196 | volume->cell = cell; |
| 197 | volume->vid = vlocation->vldb.vid[type]; |
| 198 | |
| 199 | init_rwsem(&volume->server_sem); |
| 200 | |
| 201 | /* look up all the applicable server records */ |
| 202 | for (loop = 0; loop < 8; loop++) { |
| 203 | if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) { |
| 204 | ret = afs_server_lookup( |
| 205 | volume->cell, |
| 206 | &vlocation->vldb.servers[loop], |
| 207 | &volume->servers[volume->nservers]); |
| 208 | if (ret < 0) |
| 209 | goto error_discard; |
| 210 | |
| 211 | volume->nservers++; |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | /* attach the cache and volume location */ |
| 216 | #ifdef AFS_CACHING_SUPPORT |
| 217 | cachefs_acquire_cookie(vlocation->cache, |
| 218 | &afs_vnode_cache_index_def, |
| 219 | volume, |
| 220 | &volume->cache); |
| 221 | #endif |
| 222 | |
| 223 | afs_get_vlocation(vlocation); |
| 224 | volume->vlocation = vlocation; |
| 225 | |
| 226 | vlocation->vols[type] = volume; |
| 227 | |
| 228 | success: |
| 229 | _debug("kAFS selected %s volume %08x", |
| 230 | afs_voltypes[volume->type], volume->vid); |
| 231 | *_volume = volume; |
| 232 | ret = 0; |
| 233 | |
| 234 | /* clean up */ |
| 235 | error_up: |
| 236 | up_write(&cell->vl_sem); |
| 237 | error: |
| 238 | afs_put_vlocation(vlocation); |
| 239 | afs_put_cell(cell); |
| 240 | |
| 241 | _leave(" = %d (%p)", ret, volume); |
| 242 | return ret; |
| 243 | |
| 244 | error_discard: |
| 245 | up_write(&cell->vl_sem); |
| 246 | |
| 247 | for (loop = volume->nservers - 1; loop >= 0; loop--) |
| 248 | afs_put_server(volume->servers[loop]); |
| 249 | |
| 250 | kfree(volume); |
| 251 | goto error; |
| 252 | } /* end afs_volume_lookup() */ |
| 253 | |
| 254 | /*****************************************************************************/ |
| 255 | /* |
| 256 | * destroy a volume record |
| 257 | */ |
| 258 | void afs_put_volume(struct afs_volume *volume) |
| 259 | { |
| 260 | struct afs_vlocation *vlocation; |
| 261 | int loop; |
| 262 | |
| 263 | if (!volume) |
| 264 | return; |
| 265 | |
| 266 | _enter("%p", volume); |
| 267 | |
| 268 | vlocation = volume->vlocation; |
| 269 | |
| 270 | /* sanity check */ |
| 271 | BUG_ON(atomic_read(&volume->usage) <= 0); |
| 272 | |
| 273 | /* to prevent a race, the decrement and the dequeue must be effectively |
| 274 | * atomic */ |
| 275 | down_write(&vlocation->cell->vl_sem); |
| 276 | |
| 277 | if (likely(!atomic_dec_and_test(&volume->usage))) { |
| 278 | up_write(&vlocation->cell->vl_sem); |
| 279 | _leave(""); |
| 280 | return; |
| 281 | } |
| 282 | |
| 283 | vlocation->vols[volume->type] = NULL; |
| 284 | |
| 285 | up_write(&vlocation->cell->vl_sem); |
| 286 | |
| 287 | /* finish cleaning up the volume */ |
| 288 | #ifdef AFS_CACHING_SUPPORT |
| 289 | cachefs_relinquish_cookie(volume->cache, 0); |
| 290 | #endif |
| 291 | afs_put_vlocation(vlocation); |
| 292 | |
| 293 | for (loop = volume->nservers - 1; loop >= 0; loop--) |
| 294 | afs_put_server(volume->servers[loop]); |
| 295 | |
| 296 | kfree(volume); |
| 297 | |
| 298 | _leave(" [destroyed]"); |
| 299 | } /* end afs_put_volume() */ |
| 300 | |
| 301 | /*****************************************************************************/ |
| 302 | /* |
| 303 | * pick a server to use to try accessing this volume |
| 304 | * - returns with an elevated usage count on the server chosen |
| 305 | */ |
| 306 | int afs_volume_pick_fileserver(struct afs_volume *volume, |
| 307 | struct afs_server **_server) |
| 308 | { |
| 309 | struct afs_server *server; |
| 310 | int ret, state, loop; |
| 311 | |
| 312 | _enter("%s", volume->vlocation->vldb.name); |
| 313 | |
| 314 | down_read(&volume->server_sem); |
| 315 | |
| 316 | /* handle the no-server case */ |
| 317 | if (volume->nservers == 0) { |
| 318 | ret = volume->rjservers ? -ENOMEDIUM : -ESTALE; |
| 319 | up_read(&volume->server_sem); |
| 320 | _leave(" = %d [no servers]", ret); |
| 321 | return ret; |
| 322 | } |
| 323 | |
| 324 | /* basically, just search the list for the first live server and use |
| 325 | * that */ |
| 326 | ret = 0; |
| 327 | for (loop = 0; loop < volume->nservers; loop++) { |
| 328 | server = volume->servers[loop]; |
| 329 | state = server->fs_state; |
| 330 | |
| 331 | switch (state) { |
| 332 | /* found an apparently healthy server */ |
| 333 | case 0: |
| 334 | afs_get_server(server); |
| 335 | up_read(&volume->server_sem); |
| 336 | *_server = server; |
| 337 | _leave(" = 0 (picked %08x)", |
| 338 | ntohl(server->addr.s_addr)); |
| 339 | return 0; |
| 340 | |
| 341 | case -ENETUNREACH: |
| 342 | if (ret == 0) |
| 343 | ret = state; |
| 344 | break; |
| 345 | |
| 346 | case -EHOSTUNREACH: |
| 347 | if (ret == 0 || |
| 348 | ret == -ENETUNREACH) |
| 349 | ret = state; |
| 350 | break; |
| 351 | |
| 352 | case -ECONNREFUSED: |
| 353 | if (ret == 0 || |
| 354 | ret == -ENETUNREACH || |
| 355 | ret == -EHOSTUNREACH) |
| 356 | ret = state; |
| 357 | break; |
| 358 | |
| 359 | default: |
| 360 | case -EREMOTEIO: |
| 361 | if (ret == 0 || |
| 362 | ret == -ENETUNREACH || |
| 363 | ret == -EHOSTUNREACH || |
| 364 | ret == -ECONNREFUSED) |
| 365 | ret = state; |
| 366 | break; |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | /* no available servers |
| 371 | * - TODO: handle the no active servers case better |
| 372 | */ |
| 373 | up_read(&volume->server_sem); |
| 374 | _leave(" = %d", ret); |
| 375 | return ret; |
| 376 | } /* end afs_volume_pick_fileserver() */ |
| 377 | |
| 378 | /*****************************************************************************/ |
| 379 | /* |
| 380 | * release a server after use |
| 381 | * - releases the ref on the server struct that was acquired by picking |
| 382 | * - records result of using a particular server to access a volume |
| 383 | * - return 0 to try again, 1 if okay or to issue error |
| 384 | */ |
| 385 | int afs_volume_release_fileserver(struct afs_volume *volume, |
| 386 | struct afs_server *server, |
| 387 | int result) |
| 388 | { |
| 389 | unsigned loop; |
| 390 | |
| 391 | _enter("%s,%08x,%d", |
| 392 | volume->vlocation->vldb.name, ntohl(server->addr.s_addr), |
| 393 | result); |
| 394 | |
| 395 | switch (result) { |
| 396 | /* success */ |
| 397 | case 0: |
| 398 | server->fs_act_jif = jiffies; |
| 399 | break; |
| 400 | |
| 401 | /* the fileserver denied all knowledge of the volume */ |
| 402 | case -ENOMEDIUM: |
| 403 | server->fs_act_jif = jiffies; |
| 404 | down_write(&volume->server_sem); |
| 405 | |
| 406 | /* first, find where the server is in the active list (if it |
| 407 | * is) */ |
| 408 | for (loop = 0; loop < volume->nservers; loop++) |
| 409 | if (volume->servers[loop] == server) |
| 410 | goto present; |
| 411 | |
| 412 | /* no longer there - may have been discarded by another op */ |
| 413 | goto try_next_server_upw; |
| 414 | |
| 415 | present: |
| 416 | volume->nservers--; |
| 417 | memmove(&volume->servers[loop], |
| 418 | &volume->servers[loop + 1], |
| 419 | sizeof(volume->servers[loop]) * |
| 420 | (volume->nservers - loop)); |
| 421 | volume->servers[volume->nservers] = NULL; |
| 422 | afs_put_server(server); |
| 423 | volume->rjservers++; |
| 424 | |
| 425 | if (volume->nservers > 0) |
| 426 | /* another server might acknowledge its existence */ |
| 427 | goto try_next_server_upw; |
| 428 | |
| 429 | /* handle the case where all the fileservers have rejected the |
| 430 | * volume |
| 431 | * - TODO: try asking the fileservers for volume information |
| 432 | * - TODO: contact the VL server again to see if the volume is |
| 433 | * no longer registered |
| 434 | */ |
| 435 | up_write(&volume->server_sem); |
| 436 | afs_put_server(server); |
| 437 | _leave(" [completely rejected]"); |
| 438 | return 1; |
| 439 | |
| 440 | /* problem reaching the server */ |
| 441 | case -ENETUNREACH: |
| 442 | case -EHOSTUNREACH: |
| 443 | case -ECONNREFUSED: |
| 444 | case -ETIMEDOUT: |
| 445 | case -EREMOTEIO: |
| 446 | /* mark the server as dead |
| 447 | * TODO: vary dead timeout depending on error |
| 448 | */ |
| 449 | spin_lock(&server->fs_lock); |
| 450 | if (!server->fs_state) { |
| 451 | server->fs_dead_jif = jiffies + HZ * 10; |
| 452 | server->fs_state = result; |
| 453 | printk("kAFS: SERVER DEAD state=%d\n", result); |
| 454 | } |
| 455 | spin_unlock(&server->fs_lock); |
| 456 | goto try_next_server; |
| 457 | |
| 458 | /* miscellaneous error */ |
| 459 | default: |
| 460 | server->fs_act_jif = jiffies; |
| 461 | case -ENOMEM: |
| 462 | case -ENONET: |
| 463 | break; |
| 464 | } |
| 465 | |
| 466 | /* tell the caller to accept the result */ |
| 467 | afs_put_server(server); |
| 468 | _leave(""); |
| 469 | return 1; |
| 470 | |
| 471 | /* tell the caller to loop around and try the next server */ |
| 472 | try_next_server_upw: |
| 473 | up_write(&volume->server_sem); |
| 474 | try_next_server: |
| 475 | afs_put_server(server); |
| 476 | _leave(" [try next server]"); |
| 477 | return 0; |
| 478 | |
| 479 | } /* end afs_volume_release_fileserver() */ |
| 480 | |
| 481 | /*****************************************************************************/ |
| 482 | /* |
| 483 | * match a volume hash record stored in the cache |
| 484 | */ |
| 485 | #ifdef AFS_CACHING_SUPPORT |
| 486 | static cachefs_match_val_t afs_volume_cache_match(void *target, |
| 487 | const void *entry) |
| 488 | { |
| 489 | const struct afs_cache_vhash *vhash = entry; |
| 490 | struct afs_volume *volume = target; |
| 491 | |
| 492 | _enter("{%u},{%u}", volume->type, vhash->vtype); |
| 493 | |
| 494 | if (volume->type == vhash->vtype) { |
| 495 | _leave(" = SUCCESS"); |
| 496 | return CACHEFS_MATCH_SUCCESS; |
| 497 | } |
| 498 | |
| 499 | _leave(" = FAILED"); |
| 500 | return CACHEFS_MATCH_FAILED; |
| 501 | } /* end afs_volume_cache_match() */ |
| 502 | #endif |
| 503 | |
| 504 | /*****************************************************************************/ |
| 505 | /* |
| 506 | * update a volume hash record stored in the cache |
| 507 | */ |
| 508 | #ifdef AFS_CACHING_SUPPORT |
| 509 | static void afs_volume_cache_update(void *source, void *entry) |
| 510 | { |
| 511 | struct afs_cache_vhash *vhash = entry; |
| 512 | struct afs_volume *volume = source; |
| 513 | |
| 514 | _enter(""); |
| 515 | |
| 516 | vhash->vtype = volume->type; |
| 517 | |
| 518 | } /* end afs_volume_cache_update() */ |
| 519 | #endif |