Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * linux/fs/sysv/itree.c |
| 3 | * |
| 4 | * Handling of indirect blocks' trees. |
| 5 | * AV, Sep--Dec 2000 |
| 6 | */ |
| 7 | |
| 8 | #include <linux/buffer_head.h> |
| 9 | #include <linux/mount.h> |
| 10 | #include <linux/string.h> |
| 11 | #include "sysv.h" |
| 12 | |
| 13 | enum {DIRECT = 10, DEPTH = 4}; /* Have triple indirect */ |
| 14 | |
| 15 | static inline void dirty_indirect(struct buffer_head *bh, struct inode *inode) |
| 16 | { |
| 17 | mark_buffer_dirty_inode(bh, inode); |
| 18 | if (IS_SYNC(inode)) |
| 19 | sync_dirty_buffer(bh); |
| 20 | } |
| 21 | |
| 22 | static int block_to_path(struct inode *inode, long block, int offsets[DEPTH]) |
| 23 | { |
| 24 | struct super_block *sb = inode->i_sb; |
| 25 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
| 26 | int ptrs_bits = sbi->s_ind_per_block_bits; |
| 27 | unsigned long indirect_blocks = sbi->s_ind_per_block, |
| 28 | double_blocks = sbi->s_ind_per_block_2; |
| 29 | int n = 0; |
| 30 | |
| 31 | if (block < 0) { |
| 32 | printk("sysv_block_map: block < 0\n"); |
| 33 | } else if (block < DIRECT) { |
| 34 | offsets[n++] = block; |
| 35 | } else if ( (block -= DIRECT) < indirect_blocks) { |
| 36 | offsets[n++] = DIRECT; |
| 37 | offsets[n++] = block; |
| 38 | } else if ((block -= indirect_blocks) < double_blocks) { |
| 39 | offsets[n++] = DIRECT+1; |
| 40 | offsets[n++] = block >> ptrs_bits; |
| 41 | offsets[n++] = block & (indirect_blocks - 1); |
| 42 | } else if (((block -= double_blocks) >> (ptrs_bits * 2)) < indirect_blocks) { |
| 43 | offsets[n++] = DIRECT+2; |
| 44 | offsets[n++] = block >> (ptrs_bits * 2); |
| 45 | offsets[n++] = (block >> ptrs_bits) & (indirect_blocks - 1); |
| 46 | offsets[n++] = block & (indirect_blocks - 1); |
| 47 | } else { |
| 48 | /* nothing */; |
| 49 | } |
| 50 | return n; |
| 51 | } |
| 52 | |
| 53 | static inline int block_to_cpu(struct sysv_sb_info *sbi, sysv_zone_t nr) |
| 54 | { |
| 55 | return sbi->s_block_base + fs32_to_cpu(sbi, nr); |
| 56 | } |
| 57 | |
| 58 | typedef struct { |
| 59 | sysv_zone_t *p; |
| 60 | sysv_zone_t key; |
| 61 | struct buffer_head *bh; |
| 62 | } Indirect; |
| 63 | |
| 64 | static DEFINE_RWLOCK(pointers_lock); |
| 65 | |
| 66 | static inline void add_chain(Indirect *p, struct buffer_head *bh, sysv_zone_t *v) |
| 67 | { |
| 68 | p->key = *(p->p = v); |
| 69 | p->bh = bh; |
| 70 | } |
| 71 | |
| 72 | static inline int verify_chain(Indirect *from, Indirect *to) |
| 73 | { |
| 74 | while (from <= to && from->key == *from->p) |
| 75 | from++; |
| 76 | return (from > to); |
| 77 | } |
| 78 | |
| 79 | static inline sysv_zone_t *block_end(struct buffer_head *bh) |
| 80 | { |
| 81 | return (sysv_zone_t*)((char*)bh->b_data + bh->b_size); |
| 82 | } |
| 83 | |
| 84 | /* |
| 85 | * Requires read_lock(&pointers_lock) or write_lock(&pointers_lock) |
| 86 | */ |
| 87 | static Indirect *get_branch(struct inode *inode, |
| 88 | int depth, |
| 89 | int offsets[], |
| 90 | Indirect chain[], |
| 91 | int *err) |
| 92 | { |
| 93 | struct super_block *sb = inode->i_sb; |
| 94 | Indirect *p = chain; |
| 95 | struct buffer_head *bh; |
| 96 | |
| 97 | *err = 0; |
| 98 | add_chain(chain, NULL, SYSV_I(inode)->i_data + *offsets); |
| 99 | if (!p->key) |
| 100 | goto no_block; |
| 101 | while (--depth) { |
| 102 | int block = block_to_cpu(SYSV_SB(sb), p->key); |
| 103 | bh = sb_bread(sb, block); |
| 104 | if (!bh) |
| 105 | goto failure; |
| 106 | if (!verify_chain(chain, p)) |
| 107 | goto changed; |
| 108 | add_chain(++p, bh, (sysv_zone_t*)bh->b_data + *++offsets); |
| 109 | if (!p->key) |
| 110 | goto no_block; |
| 111 | } |
| 112 | return NULL; |
| 113 | |
| 114 | changed: |
| 115 | brelse(bh); |
| 116 | *err = -EAGAIN; |
| 117 | goto no_block; |
| 118 | failure: |
| 119 | *err = -EIO; |
| 120 | no_block: |
| 121 | return p; |
| 122 | } |
| 123 | |
| 124 | static int alloc_branch(struct inode *inode, |
| 125 | int num, |
| 126 | int *offsets, |
| 127 | Indirect *branch) |
| 128 | { |
| 129 | int blocksize = inode->i_sb->s_blocksize; |
| 130 | int n = 0; |
| 131 | int i; |
| 132 | |
| 133 | branch[0].key = sysv_new_block(inode->i_sb); |
| 134 | if (branch[0].key) for (n = 1; n < num; n++) { |
| 135 | struct buffer_head *bh; |
| 136 | int parent; |
| 137 | /* Allocate the next block */ |
| 138 | branch[n].key = sysv_new_block(inode->i_sb); |
| 139 | if (!branch[n].key) |
| 140 | break; |
| 141 | /* |
| 142 | * Get buffer_head for parent block, zero it out and set |
| 143 | * the pointer to new one, then send parent to disk. |
| 144 | */ |
| 145 | parent = block_to_cpu(SYSV_SB(inode->i_sb), branch[n-1].key); |
| 146 | bh = sb_getblk(inode->i_sb, parent); |
| 147 | lock_buffer(bh); |
| 148 | memset(bh->b_data, 0, blocksize); |
| 149 | branch[n].bh = bh; |
| 150 | branch[n].p = (sysv_zone_t*) bh->b_data + offsets[n]; |
| 151 | *branch[n].p = branch[n].key; |
| 152 | set_buffer_uptodate(bh); |
| 153 | unlock_buffer(bh); |
| 154 | dirty_indirect(bh, inode); |
| 155 | } |
| 156 | if (n == num) |
| 157 | return 0; |
| 158 | |
| 159 | /* Allocation failed, free what we already allocated */ |
| 160 | for (i = 1; i < n; i++) |
| 161 | bforget(branch[i].bh); |
| 162 | for (i = 0; i < n; i++) |
| 163 | sysv_free_block(inode->i_sb, branch[i].key); |
| 164 | return -ENOSPC; |
| 165 | } |
| 166 | |
| 167 | static inline int splice_branch(struct inode *inode, |
| 168 | Indirect chain[], |
| 169 | Indirect *where, |
| 170 | int num) |
| 171 | { |
| 172 | int i; |
| 173 | |
| 174 | /* Verify that place we are splicing to is still there and vacant */ |
| 175 | write_lock(&pointers_lock); |
| 176 | if (!verify_chain(chain, where-1) || *where->p) |
| 177 | goto changed; |
| 178 | *where->p = where->key; |
| 179 | write_unlock(&pointers_lock); |
| 180 | |
| 181 | inode->i_ctime = CURRENT_TIME_SEC; |
| 182 | |
| 183 | /* had we spliced it onto indirect block? */ |
| 184 | if (where->bh) |
| 185 | dirty_indirect(where->bh, inode); |
| 186 | |
| 187 | if (IS_SYNC(inode)) |
| 188 | sysv_sync_inode(inode); |
| 189 | else |
| 190 | mark_inode_dirty(inode); |
| 191 | return 0; |
| 192 | |
| 193 | changed: |
| 194 | write_unlock(&pointers_lock); |
| 195 | for (i = 1; i < num; i++) |
| 196 | bforget(where[i].bh); |
| 197 | for (i = 0; i < num; i++) |
| 198 | sysv_free_block(inode->i_sb, where[i].key); |
| 199 | return -EAGAIN; |
| 200 | } |
| 201 | |
| 202 | static int get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) |
| 203 | { |
| 204 | int err = -EIO; |
| 205 | int offsets[DEPTH]; |
| 206 | Indirect chain[DEPTH]; |
| 207 | struct super_block *sb = inode->i_sb; |
| 208 | Indirect *partial; |
| 209 | int left; |
| 210 | int depth = block_to_path(inode, iblock, offsets); |
| 211 | |
| 212 | if (depth == 0) |
| 213 | goto out; |
| 214 | |
| 215 | reread: |
| 216 | read_lock(&pointers_lock); |
| 217 | partial = get_branch(inode, depth, offsets, chain, &err); |
| 218 | read_unlock(&pointers_lock); |
| 219 | |
| 220 | /* Simplest case - block found, no allocation needed */ |
| 221 | if (!partial) { |
| 222 | got_it: |
| 223 | map_bh(bh_result, sb, block_to_cpu(SYSV_SB(sb), |
| 224 | chain[depth-1].key)); |
| 225 | /* Clean up and exit */ |
| 226 | partial = chain+depth-1; /* the whole chain */ |
| 227 | goto cleanup; |
| 228 | } |
| 229 | |
| 230 | /* Next simple case - plain lookup or failed read of indirect block */ |
| 231 | if (!create || err == -EIO) { |
| 232 | cleanup: |
| 233 | while (partial > chain) { |
| 234 | brelse(partial->bh); |
| 235 | partial--; |
| 236 | } |
| 237 | out: |
| 238 | return err; |
| 239 | } |
| 240 | |
| 241 | /* |
| 242 | * Indirect block might be removed by truncate while we were |
| 243 | * reading it. Handling of that case (forget what we've got and |
| 244 | * reread) is taken out of the main path. |
| 245 | */ |
| 246 | if (err == -EAGAIN) |
| 247 | goto changed; |
| 248 | |
| 249 | left = (chain + depth) - partial; |
| 250 | err = alloc_branch(inode, left, offsets+(partial-chain), partial); |
| 251 | if (err) |
| 252 | goto cleanup; |
| 253 | |
| 254 | if (splice_branch(inode, chain, partial, left) < 0) |
| 255 | goto changed; |
| 256 | |
| 257 | set_buffer_new(bh_result); |
| 258 | goto got_it; |
| 259 | |
| 260 | changed: |
| 261 | while (partial > chain) { |
| 262 | brelse(partial->bh); |
| 263 | partial--; |
| 264 | } |
| 265 | goto reread; |
| 266 | } |
| 267 | |
| 268 | static inline int all_zeroes(sysv_zone_t *p, sysv_zone_t *q) |
| 269 | { |
| 270 | while (p < q) |
| 271 | if (*p++) |
| 272 | return 0; |
| 273 | return 1; |
| 274 | } |
| 275 | |
| 276 | static Indirect *find_shared(struct inode *inode, |
| 277 | int depth, |
| 278 | int offsets[], |
| 279 | Indirect chain[], |
| 280 | sysv_zone_t *top) |
| 281 | { |
| 282 | Indirect *partial, *p; |
| 283 | int k, err; |
| 284 | |
| 285 | *top = 0; |
| 286 | for (k = depth; k > 1 && !offsets[k-1]; k--) |
| 287 | ; |
| 288 | |
| 289 | write_lock(&pointers_lock); |
| 290 | partial = get_branch(inode, k, offsets, chain, &err); |
| 291 | if (!partial) |
| 292 | partial = chain + k-1; |
| 293 | /* |
| 294 | * If the branch acquired continuation since we've looked at it - |
| 295 | * fine, it should all survive and (new) top doesn't belong to us. |
| 296 | */ |
| 297 | if (!partial->key && *partial->p) { |
| 298 | write_unlock(&pointers_lock); |
| 299 | goto no_top; |
| 300 | } |
| 301 | for (p=partial; p>chain && all_zeroes((sysv_zone_t*)p->bh->b_data,p->p); p--) |
| 302 | ; |
| 303 | /* |
| 304 | * OK, we've found the last block that must survive. The rest of our |
| 305 | * branch should be detached before unlocking. However, if that rest |
| 306 | * of branch is all ours and does not grow immediately from the inode |
| 307 | * it's easier to cheat and just decrement partial->p. |
| 308 | */ |
| 309 | if (p == chain + k - 1 && p > chain) { |
| 310 | p->p--; |
| 311 | } else { |
| 312 | *top = *p->p; |
| 313 | *p->p = 0; |
| 314 | } |
| 315 | write_unlock(&pointers_lock); |
| 316 | |
| 317 | while (partial > p) { |
| 318 | brelse(partial->bh); |
| 319 | partial--; |
| 320 | } |
| 321 | no_top: |
| 322 | return partial; |
| 323 | } |
| 324 | |
| 325 | static inline void free_data(struct inode *inode, sysv_zone_t *p, sysv_zone_t *q) |
| 326 | { |
| 327 | for ( ; p < q ; p++) { |
| 328 | sysv_zone_t nr = *p; |
| 329 | if (nr) { |
| 330 | *p = 0; |
| 331 | sysv_free_block(inode->i_sb, nr); |
| 332 | mark_inode_dirty(inode); |
| 333 | } |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | static void free_branches(struct inode *inode, sysv_zone_t *p, sysv_zone_t *q, int depth) |
| 338 | { |
| 339 | struct buffer_head * bh; |
| 340 | struct super_block *sb = inode->i_sb; |
| 341 | |
| 342 | if (depth--) { |
| 343 | for ( ; p < q ; p++) { |
| 344 | int block; |
| 345 | sysv_zone_t nr = *p; |
| 346 | if (!nr) |
| 347 | continue; |
| 348 | *p = 0; |
| 349 | block = block_to_cpu(SYSV_SB(sb), nr); |
| 350 | bh = sb_bread(sb, block); |
| 351 | if (!bh) |
| 352 | continue; |
| 353 | free_branches(inode, (sysv_zone_t*)bh->b_data, |
| 354 | block_end(bh), depth); |
| 355 | bforget(bh); |
| 356 | sysv_free_block(sb, nr); |
| 357 | mark_inode_dirty(inode); |
| 358 | } |
| 359 | } else |
| 360 | free_data(inode, p, q); |
| 361 | } |
| 362 | |
| 363 | void sysv_truncate (struct inode * inode) |
| 364 | { |
| 365 | sysv_zone_t *i_data = SYSV_I(inode)->i_data; |
| 366 | int offsets[DEPTH]; |
| 367 | Indirect chain[DEPTH]; |
| 368 | Indirect *partial; |
| 369 | sysv_zone_t nr = 0; |
| 370 | int n; |
| 371 | long iblock; |
| 372 | unsigned blocksize; |
| 373 | |
| 374 | if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
| 375 | S_ISLNK(inode->i_mode))) |
| 376 | return; |
| 377 | |
| 378 | blocksize = inode->i_sb->s_blocksize; |
| 379 | iblock = (inode->i_size + blocksize-1) |
| 380 | >> inode->i_sb->s_blocksize_bits; |
| 381 | |
| 382 | block_truncate_page(inode->i_mapping, inode->i_size, get_block); |
| 383 | |
| 384 | n = block_to_path(inode, iblock, offsets); |
| 385 | if (n == 0) |
| 386 | return; |
| 387 | |
| 388 | if (n == 1) { |
| 389 | free_data(inode, i_data+offsets[0], i_data + DIRECT); |
| 390 | goto do_indirects; |
| 391 | } |
| 392 | |
| 393 | partial = find_shared(inode, n, offsets, chain, &nr); |
| 394 | /* Kill the top of shared branch (already detached) */ |
| 395 | if (nr) { |
| 396 | if (partial == chain) |
| 397 | mark_inode_dirty(inode); |
| 398 | else |
| 399 | dirty_indirect(partial->bh, inode); |
| 400 | free_branches(inode, &nr, &nr+1, (chain+n-1) - partial); |
| 401 | } |
| 402 | /* Clear the ends of indirect blocks on the shared branch */ |
| 403 | while (partial > chain) { |
| 404 | free_branches(inode, partial->p + 1, block_end(partial->bh), |
| 405 | (chain+n-1) - partial); |
| 406 | dirty_indirect(partial->bh, inode); |
| 407 | brelse (partial->bh); |
| 408 | partial--; |
| 409 | } |
| 410 | do_indirects: |
| 411 | /* Kill the remaining (whole) subtrees (== subtrees deeper than...) */ |
| 412 | while (n < DEPTH) { |
| 413 | nr = i_data[DIRECT + n - 1]; |
| 414 | if (nr) { |
| 415 | i_data[DIRECT + n - 1] = 0; |
| 416 | mark_inode_dirty(inode); |
| 417 | free_branches(inode, &nr, &nr+1, n); |
| 418 | } |
| 419 | n++; |
| 420 | } |
| 421 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
| 422 | if (IS_SYNC(inode)) |
| 423 | sysv_sync_inode (inode); |
| 424 | else |
| 425 | mark_inode_dirty(inode); |
| 426 | } |
| 427 | |
| 428 | static unsigned sysv_nblocks(struct super_block *s, loff_t size) |
| 429 | { |
| 430 | struct sysv_sb_info *sbi = SYSV_SB(s); |
| 431 | int ptrs_bits = sbi->s_ind_per_block_bits; |
| 432 | unsigned blocks, res, direct = DIRECT, i = DEPTH; |
| 433 | blocks = (size + s->s_blocksize - 1) >> s->s_blocksize_bits; |
| 434 | res = blocks; |
| 435 | while (--i && blocks > direct) { |
| 436 | blocks = ((blocks - direct - 1) >> ptrs_bits) + 1; |
| 437 | res += blocks; |
| 438 | direct = 1; |
| 439 | } |
| 440 | return blocks; |
| 441 | } |
| 442 | |
| 443 | int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
| 444 | { |
Al Viro | d8c9584 | 2011-12-07 18:16:57 -0500 | [diff] [blame] | 445 | struct super_block *s = dentry->d_sb; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 446 | generic_fillattr(dentry->d_inode, stat); |
| 447 | stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size); |
| 448 | stat->blksize = s->s_blocksize; |
| 449 | return 0; |
| 450 | } |
| 451 | |
| 452 | static int sysv_writepage(struct page *page, struct writeback_control *wbc) |
| 453 | { |
| 454 | return block_write_full_page(page,get_block,wbc); |
| 455 | } |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 456 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 457 | static int sysv_readpage(struct file *file, struct page *page) |
| 458 | { |
| 459 | return block_read_full_page(page,get_block); |
| 460 | } |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 461 | |
Christoph Hellwig | f4e420d | 2010-06-04 11:29:56 +0200 | [diff] [blame] | 462 | int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 463 | { |
Christoph Hellwig | 6e1db88 | 2010-06-04 11:29:57 +0200 | [diff] [blame] | 464 | return __block_write_begin(page, pos, len, get_block); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 465 | } |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 466 | |
Marco Stornelli | fa4d62a | 2012-12-15 11:45:58 +0100 | [diff] [blame] | 467 | static void sysv_write_failed(struct address_space *mapping, loff_t to) |
| 468 | { |
| 469 | struct inode *inode = mapping->host; |
| 470 | |
| 471 | if (to > inode->i_size) { |
Kirill A. Shutemov | 7caef26 | 2013-09-12 15:13:56 -0700 | [diff] [blame] | 472 | truncate_pagecache(inode, inode->i_size); |
Marco Stornelli | fa4d62a | 2012-12-15 11:45:58 +0100 | [diff] [blame] | 473 | sysv_truncate(inode); |
| 474 | } |
| 475 | } |
| 476 | |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 477 | static int sysv_write_begin(struct file *file, struct address_space *mapping, |
| 478 | loff_t pos, unsigned len, unsigned flags, |
| 479 | struct page **pagep, void **fsdata) |
| 480 | { |
Christoph Hellwig | 155130a | 2010-06-04 11:29:58 +0200 | [diff] [blame] | 481 | int ret; |
| 482 | |
| 483 | ret = block_write_begin(mapping, pos, len, flags, pagep, get_block); |
Marco Stornelli | fa4d62a | 2012-12-15 11:45:58 +0100 | [diff] [blame] | 484 | if (unlikely(ret)) |
| 485 | sysv_write_failed(mapping, pos + len); |
Christoph Hellwig | 155130a | 2010-06-04 11:29:58 +0200 | [diff] [blame] | 486 | |
| 487 | return ret; |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 488 | } |
| 489 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 490 | static sector_t sysv_bmap(struct address_space *mapping, sector_t block) |
| 491 | { |
| 492 | return generic_block_bmap(mapping,block,get_block); |
| 493 | } |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 494 | |
Christoph Hellwig | f5e54d6 | 2006-06-28 04:26:44 -0700 | [diff] [blame] | 495 | const struct address_space_operations sysv_aops = { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 496 | .readpage = sysv_readpage, |
| 497 | .writepage = sysv_writepage, |
Nick Piggin | 26a6441 | 2007-10-16 01:25:21 -0700 | [diff] [blame] | 498 | .write_begin = sysv_write_begin, |
| 499 | .write_end = generic_write_end, |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 500 | .bmap = sysv_bmap |
| 501 | }; |