blob: 239e7c908abf884dc65c6ab1aa2c471210691848 [file] [log] [blame]
Chris Masond1310b22008-01-24 16:13:08 -05001#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h>
7#include <linux/page-flags.h>
8#include <linux/module.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/version.h>
13#include <linux/writeback.h>
14#include <linux/pagevec.h>
15#include "extent_io.h"
16#include "extent_map.h"
David Woodhouse2db04962008-08-07 11:19:43 -040017#include "compat.h"
David Woodhouse902b22f2008-08-20 08:51:49 -040018#include "ctree.h"
19#include "btrfs_inode.h"
Chris Masond1310b22008-01-24 16:13:08 -050020
21/* temporary define until extent_map moves out of btrfs */
22struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
23 unsigned long extra_flags,
24 void (*ctor)(void *, struct kmem_cache *,
25 unsigned long));
26
27static struct kmem_cache *extent_state_cache;
28static struct kmem_cache *extent_buffer_cache;
29
30static LIST_HEAD(buffers);
31static LIST_HEAD(states);
Chris Mason2d2ae542008-03-26 16:24:23 -040032static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
Chris Masond1310b22008-01-24 16:13:08 -050033
Chris Masond1310b22008-01-24 16:13:08 -050034#define BUFFER_LRU_MAX 64
35
36struct tree_entry {
37 u64 start;
38 u64 end;
Chris Masond1310b22008-01-24 16:13:08 -050039 struct rb_node rb_node;
40};
41
42struct extent_page_data {
43 struct bio *bio;
44 struct extent_io_tree *tree;
45 get_extent_t *get_extent;
46};
47
48int __init extent_io_init(void)
49{
50 extent_state_cache = btrfs_cache_create("extent_state",
51 sizeof(struct extent_state), 0,
52 NULL);
53 if (!extent_state_cache)
54 return -ENOMEM;
55
56 extent_buffer_cache = btrfs_cache_create("extent_buffers",
57 sizeof(struct extent_buffer), 0,
58 NULL);
59 if (!extent_buffer_cache)
60 goto free_state_cache;
61 return 0;
62
63free_state_cache:
64 kmem_cache_destroy(extent_state_cache);
65 return -ENOMEM;
66}
67
68void extent_io_exit(void)
69{
70 struct extent_state *state;
Chris Mason2d2ae542008-03-26 16:24:23 -040071 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -050072
73 while (!list_empty(&states)) {
Chris Mason2d2ae542008-03-26 16:24:23 -040074 state = list_entry(states.next, struct extent_state, leak_list);
Chris Mason70dec802008-01-29 09:59:12 -050075 printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
Chris Mason2d2ae542008-03-26 16:24:23 -040076 list_del(&state->leak_list);
Chris Masond1310b22008-01-24 16:13:08 -050077 kmem_cache_free(extent_state_cache, state);
78
79 }
80
Chris Mason2d2ae542008-03-26 16:24:23 -040081 while (!list_empty(&buffers)) {
82 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
83 printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs));
84 list_del(&eb->leak_list);
85 kmem_cache_free(extent_buffer_cache, eb);
86 }
Chris Masond1310b22008-01-24 16:13:08 -050087 if (extent_state_cache)
88 kmem_cache_destroy(extent_state_cache);
89 if (extent_buffer_cache)
90 kmem_cache_destroy(extent_buffer_cache);
91}
92
93void extent_io_tree_init(struct extent_io_tree *tree,
94 struct address_space *mapping, gfp_t mask)
95{
96 tree->state.rb_node = NULL;
Chris Mason6af118ce2008-07-22 11:18:07 -040097 tree->buffer.rb_node = NULL;
Chris Masond1310b22008-01-24 16:13:08 -050098 tree->ops = NULL;
99 tree->dirty_bytes = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500100 spin_lock_init(&tree->lock);
Chris Mason6af118ce2008-07-22 11:18:07 -0400101 spin_lock_init(&tree->buffer_lock);
Chris Masond1310b22008-01-24 16:13:08 -0500102 tree->mapping = mapping;
Chris Masond1310b22008-01-24 16:13:08 -0500103}
104EXPORT_SYMBOL(extent_io_tree_init);
105
Chris Masond1310b22008-01-24 16:13:08 -0500106struct extent_state *alloc_extent_state(gfp_t mask)
107{
108 struct extent_state *state;
Chris Mason2d2ae542008-03-26 16:24:23 -0400109 unsigned long flags;
Chris Masond1310b22008-01-24 16:13:08 -0500110
111 state = kmem_cache_alloc(extent_state_cache, mask);
Peter2b114d12008-04-01 11:21:40 -0400112 if (!state)
Chris Masond1310b22008-01-24 16:13:08 -0500113 return state;
114 state->state = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500115 state->private = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500116 state->tree = NULL;
Chris Mason2d2ae542008-03-26 16:24:23 -0400117 spin_lock_irqsave(&leak_lock, flags);
118 list_add(&state->leak_list, &states);
119 spin_unlock_irqrestore(&leak_lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500120
121 atomic_set(&state->refs, 1);
122 init_waitqueue_head(&state->wq);
123 return state;
124}
125EXPORT_SYMBOL(alloc_extent_state);
126
127void free_extent_state(struct extent_state *state)
128{
Chris Masond1310b22008-01-24 16:13:08 -0500129 if (!state)
130 return;
131 if (atomic_dec_and_test(&state->refs)) {
Chris Mason2d2ae542008-03-26 16:24:23 -0400132 unsigned long flags;
Chris Mason70dec802008-01-29 09:59:12 -0500133 WARN_ON(state->tree);
Chris Mason2d2ae542008-03-26 16:24:23 -0400134 spin_lock_irqsave(&leak_lock, flags);
135 list_del(&state->leak_list);
136 spin_unlock_irqrestore(&leak_lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500137 kmem_cache_free(extent_state_cache, state);
138 }
139}
140EXPORT_SYMBOL(free_extent_state);
141
142static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
143 struct rb_node *node)
144{
145 struct rb_node ** p = &root->rb_node;
146 struct rb_node * parent = NULL;
147 struct tree_entry *entry;
148
149 while(*p) {
150 parent = *p;
151 entry = rb_entry(parent, struct tree_entry, rb_node);
152
153 if (offset < entry->start)
154 p = &(*p)->rb_left;
155 else if (offset > entry->end)
156 p = &(*p)->rb_right;
157 else
158 return parent;
159 }
160
161 entry = rb_entry(node, struct tree_entry, rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500162 rb_link_node(node, parent, p);
163 rb_insert_color(node, root);
164 return NULL;
165}
166
Chris Mason80ea96b2008-02-01 14:51:59 -0500167static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
Chris Masond1310b22008-01-24 16:13:08 -0500168 struct rb_node **prev_ret,
169 struct rb_node **next_ret)
170{
Chris Mason80ea96b2008-02-01 14:51:59 -0500171 struct rb_root *root = &tree->state;
Chris Masond1310b22008-01-24 16:13:08 -0500172 struct rb_node * n = root->rb_node;
173 struct rb_node *prev = NULL;
174 struct rb_node *orig_prev = NULL;
175 struct tree_entry *entry;
176 struct tree_entry *prev_entry = NULL;
177
178 while(n) {
179 entry = rb_entry(n, struct tree_entry, rb_node);
180 prev = n;
181 prev_entry = entry;
182
183 if (offset < entry->start)
184 n = n->rb_left;
185 else if (offset > entry->end)
186 n = n->rb_right;
Chris Mason80ea96b2008-02-01 14:51:59 -0500187 else {
Chris Masond1310b22008-01-24 16:13:08 -0500188 return n;
Chris Mason80ea96b2008-02-01 14:51:59 -0500189 }
Chris Masond1310b22008-01-24 16:13:08 -0500190 }
191
192 if (prev_ret) {
193 orig_prev = prev;
194 while(prev && offset > prev_entry->end) {
195 prev = rb_next(prev);
196 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
197 }
198 *prev_ret = prev;
199 prev = orig_prev;
200 }
201
202 if (next_ret) {
203 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
204 while(prev && offset < prev_entry->start) {
205 prev = rb_prev(prev);
206 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
207 }
208 *next_ret = prev;
209 }
210 return NULL;
211}
212
Chris Mason80ea96b2008-02-01 14:51:59 -0500213static inline struct rb_node *tree_search(struct extent_io_tree *tree,
214 u64 offset)
Chris Masond1310b22008-01-24 16:13:08 -0500215{
Chris Mason70dec802008-01-29 09:59:12 -0500216 struct rb_node *prev = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500217 struct rb_node *ret;
Chris Mason70dec802008-01-29 09:59:12 -0500218
Chris Mason80ea96b2008-02-01 14:51:59 -0500219 ret = __etree_search(tree, offset, &prev, NULL);
220 if (!ret) {
Chris Masond1310b22008-01-24 16:13:08 -0500221 return prev;
Chris Mason80ea96b2008-02-01 14:51:59 -0500222 }
Chris Masond1310b22008-01-24 16:13:08 -0500223 return ret;
224}
225
Chris Mason6af118ce2008-07-22 11:18:07 -0400226static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
227 u64 offset, struct rb_node *node)
228{
229 struct rb_root *root = &tree->buffer;
230 struct rb_node ** p = &root->rb_node;
231 struct rb_node * parent = NULL;
232 struct extent_buffer *eb;
233
234 while(*p) {
235 parent = *p;
236 eb = rb_entry(parent, struct extent_buffer, rb_node);
237
238 if (offset < eb->start)
239 p = &(*p)->rb_left;
240 else if (offset > eb->start)
241 p = &(*p)->rb_right;
242 else
243 return eb;
244 }
245
246 rb_link_node(node, parent, p);
247 rb_insert_color(node, root);
248 return NULL;
249}
250
251static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
252 u64 offset)
253{
254 struct rb_root *root = &tree->buffer;
255 struct rb_node * n = root->rb_node;
256 struct extent_buffer *eb;
257
258 while(n) {
259 eb = rb_entry(n, struct extent_buffer, rb_node);
260 if (offset < eb->start)
261 n = n->rb_left;
262 else if (offset > eb->start)
263 n = n->rb_right;
264 else
265 return eb;
266 }
267 return NULL;
268}
269
Chris Masond1310b22008-01-24 16:13:08 -0500270/*
271 * utility function to look for merge candidates inside a given range.
272 * Any extents with matching state are merged together into a single
273 * extent in the tree. Extents with EXTENT_IO in their state field
274 * are not merged because the end_io handlers need to be able to do
275 * operations on them without sleeping (or doing allocations/splits).
276 *
277 * This should be called with the tree lock held.
278 */
279static int merge_state(struct extent_io_tree *tree,
280 struct extent_state *state)
281{
282 struct extent_state *other;
283 struct rb_node *other_node;
284
285 if (state->state & EXTENT_IOBITS)
286 return 0;
287
288 other_node = rb_prev(&state->rb_node);
289 if (other_node) {
290 other = rb_entry(other_node, struct extent_state, rb_node);
291 if (other->end == state->start - 1 &&
292 other->state == state->state) {
293 state->start = other->start;
Chris Mason70dec802008-01-29 09:59:12 -0500294 other->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500295 rb_erase(&other->rb_node, &tree->state);
296 free_extent_state(other);
297 }
298 }
299 other_node = rb_next(&state->rb_node);
300 if (other_node) {
301 other = rb_entry(other_node, struct extent_state, rb_node);
302 if (other->start == state->end + 1 &&
303 other->state == state->state) {
304 other->start = state->start;
Chris Mason70dec802008-01-29 09:59:12 -0500305 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500306 rb_erase(&state->rb_node, &tree->state);
307 free_extent_state(state);
308 }
309 }
310 return 0;
311}
312
Chris Mason291d6732008-01-29 15:55:23 -0500313static void set_state_cb(struct extent_io_tree *tree,
314 struct extent_state *state,
315 unsigned long bits)
316{
317 if (tree->ops && tree->ops->set_bit_hook) {
318 tree->ops->set_bit_hook(tree->mapping->host, state->start,
Chris Masonb0c68f82008-01-31 11:05:37 -0500319 state->end, state->state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500320 }
321}
322
323static void clear_state_cb(struct extent_io_tree *tree,
324 struct extent_state *state,
325 unsigned long bits)
326{
327 if (tree->ops && tree->ops->set_bit_hook) {
328 tree->ops->clear_bit_hook(tree->mapping->host, state->start,
Chris Masonb0c68f82008-01-31 11:05:37 -0500329 state->end, state->state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500330 }
331}
332
Chris Masond1310b22008-01-24 16:13:08 -0500333/*
334 * insert an extent_state struct into the tree. 'bits' are set on the
335 * struct before it is inserted.
336 *
337 * This may return -EEXIST if the extent is already there, in which case the
338 * state struct is freed.
339 *
340 * The tree lock is not taken internally. This is a utility function and
341 * probably isn't what you want to call (see set/clear_extent_bit).
342 */
343static int insert_state(struct extent_io_tree *tree,
344 struct extent_state *state, u64 start, u64 end,
345 int bits)
346{
347 struct rb_node *node;
348
349 if (end < start) {
350 printk("end < start %Lu %Lu\n", end, start);
351 WARN_ON(1);
352 }
353 if (bits & EXTENT_DIRTY)
354 tree->dirty_bytes += end - start + 1;
Chris Masonb0c68f82008-01-31 11:05:37 -0500355 set_state_cb(tree, state, bits);
Chris Masond1310b22008-01-24 16:13:08 -0500356 state->state |= bits;
357 state->start = start;
358 state->end = end;
359 node = tree_insert(&tree->state, end, &state->rb_node);
360 if (node) {
361 struct extent_state *found;
362 found = rb_entry(node, struct extent_state, rb_node);
363 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
364 free_extent_state(state);
365 return -EEXIST;
366 }
Chris Mason70dec802008-01-29 09:59:12 -0500367 state->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500368 merge_state(tree, state);
369 return 0;
370}
371
372/*
373 * split a given extent state struct in two, inserting the preallocated
374 * struct 'prealloc' as the newly created second half. 'split' indicates an
375 * offset inside 'orig' where it should be split.
376 *
377 * Before calling,
378 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
379 * are two extent state structs in the tree:
380 * prealloc: [orig->start, split - 1]
381 * orig: [ split, orig->end ]
382 *
383 * The tree locks are not taken by this function. They need to be held
384 * by the caller.
385 */
386static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
387 struct extent_state *prealloc, u64 split)
388{
389 struct rb_node *node;
390 prealloc->start = orig->start;
391 prealloc->end = split - 1;
392 prealloc->state = orig->state;
393 orig->start = split;
394
395 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
396 if (node) {
397 struct extent_state *found;
398 found = rb_entry(node, struct extent_state, rb_node);
399 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
400 free_extent_state(prealloc);
401 return -EEXIST;
402 }
Chris Mason70dec802008-01-29 09:59:12 -0500403 prealloc->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500404 return 0;
405}
406
407/*
408 * utility function to clear some bits in an extent state struct.
409 * it will optionally wake up any one waiting on this state (wake == 1), or
410 * forcibly remove the state from the tree (delete == 1).
411 *
412 * If no bits are set on the state struct after clearing things, the
413 * struct is freed and removed from the tree
414 */
415static int clear_state_bit(struct extent_io_tree *tree,
416 struct extent_state *state, int bits, int wake,
417 int delete)
418{
419 int ret = state->state & bits;
420
421 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
422 u64 range = state->end - state->start + 1;
423 WARN_ON(range > tree->dirty_bytes);
424 tree->dirty_bytes -= range;
425 }
Chris Mason291d6732008-01-29 15:55:23 -0500426 clear_state_cb(tree, state, bits);
Chris Masonb0c68f82008-01-31 11:05:37 -0500427 state->state &= ~bits;
Chris Masond1310b22008-01-24 16:13:08 -0500428 if (wake)
429 wake_up(&state->wq);
430 if (delete || state->state == 0) {
Chris Mason70dec802008-01-29 09:59:12 -0500431 if (state->tree) {
Chris Masonae9d1282008-02-01 15:42:15 -0500432 clear_state_cb(tree, state, state->state);
Chris Masond1310b22008-01-24 16:13:08 -0500433 rb_erase(&state->rb_node, &tree->state);
Chris Mason70dec802008-01-29 09:59:12 -0500434 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500435 free_extent_state(state);
436 } else {
437 WARN_ON(1);
438 }
439 } else {
440 merge_state(tree, state);
441 }
442 return ret;
443}
444
445/*
446 * clear some bits on a range in the tree. This may require splitting
447 * or inserting elements in the tree, so the gfp mask is used to
448 * indicate which allocations or sleeping are allowed.
449 *
450 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
451 * the given range from the tree regardless of state (ie for truncate).
452 *
453 * the range [start, end] is inclusive.
454 *
455 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
456 * bits were already set, or zero if none of the bits were already set.
457 */
458int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
459 int bits, int wake, int delete, gfp_t mask)
460{
461 struct extent_state *state;
462 struct extent_state *prealloc = NULL;
463 struct rb_node *node;
464 unsigned long flags;
465 int err;
466 int set = 0;
467
468again:
469 if (!prealloc && (mask & __GFP_WAIT)) {
470 prealloc = alloc_extent_state(mask);
471 if (!prealloc)
472 return -ENOMEM;
473 }
474
Chris Mason70dec802008-01-29 09:59:12 -0500475 spin_lock_irqsave(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500476 /*
477 * this search will find the extents that end after
478 * our range starts
479 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500480 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500481 if (!node)
482 goto out;
483 state = rb_entry(node, struct extent_state, rb_node);
484 if (state->start > end)
485 goto out;
486 WARN_ON(state->end < start);
487
488 /*
489 * | ---- desired range ---- |
490 * | state | or
491 * | ------------- state -------------- |
492 *
493 * We need to split the extent we found, and may flip
494 * bits on second half.
495 *
496 * If the extent we found extends past our range, we
497 * just split and search again. It'll get split again
498 * the next time though.
499 *
500 * If the extent we found is inside our range, we clear
501 * the desired bit on it.
502 */
503
504 if (state->start < start) {
Chris Mason70dec802008-01-29 09:59:12 -0500505 if (!prealloc)
506 prealloc = alloc_extent_state(GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -0500507 err = split_state(tree, state, prealloc, start);
508 BUG_ON(err == -EEXIST);
509 prealloc = NULL;
510 if (err)
511 goto out;
512 if (state->end <= end) {
513 start = state->end + 1;
514 set |= clear_state_bit(tree, state, bits,
515 wake, delete);
516 } else {
517 start = state->start;
518 }
519 goto search_again;
520 }
521 /*
522 * | ---- desired range ---- |
523 * | state |
524 * We need to split the extent, and clear the bit
525 * on the first half
526 */
527 if (state->start <= end && state->end > end) {
Chris Mason70dec802008-01-29 09:59:12 -0500528 if (!prealloc)
529 prealloc = alloc_extent_state(GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -0500530 err = split_state(tree, state, prealloc, end + 1);
531 BUG_ON(err == -EEXIST);
532
533 if (wake)
534 wake_up(&state->wq);
535 set |= clear_state_bit(tree, prealloc, bits,
536 wake, delete);
537 prealloc = NULL;
538 goto out;
539 }
540
541 start = state->end + 1;
542 set |= clear_state_bit(tree, state, bits, wake, delete);
543 goto search_again;
544
545out:
Chris Mason70dec802008-01-29 09:59:12 -0500546 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500547 if (prealloc)
548 free_extent_state(prealloc);
549
550 return set;
551
552search_again:
553 if (start > end)
554 goto out;
Chris Mason70dec802008-01-29 09:59:12 -0500555 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500556 if (mask & __GFP_WAIT)
557 cond_resched();
558 goto again;
559}
560EXPORT_SYMBOL(clear_extent_bit);
561
562static int wait_on_state(struct extent_io_tree *tree,
563 struct extent_state *state)
564{
565 DEFINE_WAIT(wait);
566 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
Chris Mason70dec802008-01-29 09:59:12 -0500567 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500568 schedule();
Chris Mason70dec802008-01-29 09:59:12 -0500569 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500570 finish_wait(&state->wq, &wait);
571 return 0;
572}
573
574/*
575 * waits for one or more bits to clear on a range in the state tree.
576 * The range [start, end] is inclusive.
577 * The tree lock is taken by this function
578 */
579int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
580{
581 struct extent_state *state;
582 struct rb_node *node;
583
Chris Mason70dec802008-01-29 09:59:12 -0500584 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500585again:
586 while (1) {
587 /*
588 * this search will find all the extents that end after
589 * our range starts
590 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500591 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500592 if (!node)
593 break;
594
595 state = rb_entry(node, struct extent_state, rb_node);
596
597 if (state->start > end)
598 goto out;
599
600 if (state->state & bits) {
601 start = state->start;
602 atomic_inc(&state->refs);
603 wait_on_state(tree, state);
604 free_extent_state(state);
605 goto again;
606 }
607 start = state->end + 1;
608
609 if (start > end)
610 break;
611
612 if (need_resched()) {
Chris Mason70dec802008-01-29 09:59:12 -0500613 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500614 cond_resched();
Chris Mason70dec802008-01-29 09:59:12 -0500615 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500616 }
617 }
618out:
Chris Mason70dec802008-01-29 09:59:12 -0500619 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500620 return 0;
621}
622EXPORT_SYMBOL(wait_extent_bit);
623
624static void set_state_bits(struct extent_io_tree *tree,
625 struct extent_state *state,
626 int bits)
627{
628 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
629 u64 range = state->end - state->start + 1;
630 tree->dirty_bytes += range;
631 }
Chris Mason291d6732008-01-29 15:55:23 -0500632 set_state_cb(tree, state, bits);
Chris Masonb0c68f82008-01-31 11:05:37 -0500633 state->state |= bits;
Chris Masond1310b22008-01-24 16:13:08 -0500634}
635
636/*
637 * set some bits on a range in the tree. This may require allocations
638 * or sleeping, so the gfp mask is used to indicate what is allowed.
639 *
640 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
641 * range already has the desired bits set. The start of the existing
642 * range is returned in failed_start in this case.
643 *
644 * [start, end] is inclusive
645 * This takes the tree lock.
646 */
647int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
648 int exclusive, u64 *failed_start, gfp_t mask)
649{
650 struct extent_state *state;
651 struct extent_state *prealloc = NULL;
652 struct rb_node *node;
653 unsigned long flags;
654 int err = 0;
655 int set;
656 u64 last_start;
657 u64 last_end;
658again:
659 if (!prealloc && (mask & __GFP_WAIT)) {
660 prealloc = alloc_extent_state(mask);
661 if (!prealloc)
662 return -ENOMEM;
663 }
664
Chris Mason70dec802008-01-29 09:59:12 -0500665 spin_lock_irqsave(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500666 /*
667 * this search will find all the extents that end after
668 * our range starts.
669 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500670 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500671 if (!node) {
672 err = insert_state(tree, prealloc, start, end, bits);
673 prealloc = NULL;
674 BUG_ON(err == -EEXIST);
675 goto out;
676 }
677
678 state = rb_entry(node, struct extent_state, rb_node);
679 last_start = state->start;
680 last_end = state->end;
681
682 /*
683 * | ---- desired range ---- |
684 * | state |
685 *
686 * Just lock what we found and keep going
687 */
688 if (state->start == start && state->end <= end) {
689 set = state->state & bits;
690 if (set && exclusive) {
691 *failed_start = state->start;
692 err = -EEXIST;
693 goto out;
694 }
695 set_state_bits(tree, state, bits);
696 start = state->end + 1;
697 merge_state(tree, state);
698 goto search_again;
699 }
700
701 /*
702 * | ---- desired range ---- |
703 * | state |
704 * or
705 * | ------------- state -------------- |
706 *
707 * We need to split the extent we found, and may flip bits on
708 * second half.
709 *
710 * If the extent we found extends past our
711 * range, we just split and search again. It'll get split
712 * again the next time though.
713 *
714 * If the extent we found is inside our range, we set the
715 * desired bit on it.
716 */
717 if (state->start < start) {
718 set = state->state & bits;
719 if (exclusive && set) {
720 *failed_start = start;
721 err = -EEXIST;
722 goto out;
723 }
724 err = split_state(tree, state, prealloc, start);
725 BUG_ON(err == -EEXIST);
726 prealloc = NULL;
727 if (err)
728 goto out;
729 if (state->end <= end) {
730 set_state_bits(tree, state, bits);
731 start = state->end + 1;
732 merge_state(tree, state);
733 } else {
734 start = state->start;
735 }
736 goto search_again;
737 }
738 /*
739 * | ---- desired range ---- |
740 * | state | or | state |
741 *
742 * There's a hole, we need to insert something in it and
743 * ignore the extent we found.
744 */
745 if (state->start > start) {
746 u64 this_end;
747 if (end < last_start)
748 this_end = end;
749 else
750 this_end = last_start -1;
751 err = insert_state(tree, prealloc, start, this_end,
752 bits);
753 prealloc = NULL;
754 BUG_ON(err == -EEXIST);
755 if (err)
756 goto out;
757 start = this_end + 1;
758 goto search_again;
759 }
760 /*
761 * | ---- desired range ---- |
762 * | state |
763 * We need to split the extent, and set the bit
764 * on the first half
765 */
766 if (state->start <= end && state->end > end) {
767 set = state->state & bits;
768 if (exclusive && set) {
769 *failed_start = start;
770 err = -EEXIST;
771 goto out;
772 }
773 err = split_state(tree, state, prealloc, end + 1);
774 BUG_ON(err == -EEXIST);
775
776 set_state_bits(tree, prealloc, bits);
777 merge_state(tree, prealloc);
778 prealloc = NULL;
779 goto out;
780 }
781
782 goto search_again;
783
784out:
Chris Mason70dec802008-01-29 09:59:12 -0500785 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500786 if (prealloc)
787 free_extent_state(prealloc);
788
789 return err;
790
791search_again:
792 if (start > end)
793 goto out;
Chris Mason70dec802008-01-29 09:59:12 -0500794 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500795 if (mask & __GFP_WAIT)
796 cond_resched();
797 goto again;
798}
799EXPORT_SYMBOL(set_extent_bit);
800
801/* wrappers around set/clear extent bit */
802int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
803 gfp_t mask)
804{
805 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
806 mask);
807}
808EXPORT_SYMBOL(set_extent_dirty);
809
Chris Masone6dcd2d2008-07-17 12:53:50 -0400810int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
811 gfp_t mask)
812{
813 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
814}
815EXPORT_SYMBOL(set_extent_ordered);
816
Chris Masond1310b22008-01-24 16:13:08 -0500817int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
818 int bits, gfp_t mask)
819{
820 return set_extent_bit(tree, start, end, bits, 0, NULL,
821 mask);
822}
823EXPORT_SYMBOL(set_extent_bits);
824
825int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
826 int bits, gfp_t mask)
827{
828 return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
829}
830EXPORT_SYMBOL(clear_extent_bits);
831
832int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
833 gfp_t mask)
834{
835 return set_extent_bit(tree, start, end,
Chris Masone6dcd2d2008-07-17 12:53:50 -0400836 EXTENT_DELALLOC | EXTENT_DIRTY,
837 0, NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500838}
839EXPORT_SYMBOL(set_extent_delalloc);
840
841int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
842 gfp_t mask)
843{
844 return clear_extent_bit(tree, start, end,
845 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
846}
847EXPORT_SYMBOL(clear_extent_dirty);
848
Chris Masone6dcd2d2008-07-17 12:53:50 -0400849int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
850 gfp_t mask)
851{
852 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
853}
854EXPORT_SYMBOL(clear_extent_ordered);
855
Chris Masond1310b22008-01-24 16:13:08 -0500856int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
857 gfp_t mask)
858{
859 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
860 mask);
861}
862EXPORT_SYMBOL(set_extent_new);
863
864int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
865 gfp_t mask)
866{
867 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
868}
869EXPORT_SYMBOL(clear_extent_new);
870
871int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
872 gfp_t mask)
873{
874 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
875 mask);
876}
877EXPORT_SYMBOL(set_extent_uptodate);
878
879int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
880 gfp_t mask)
881{
882 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
883}
884EXPORT_SYMBOL(clear_extent_uptodate);
885
886int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
887 gfp_t mask)
888{
889 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
890 0, NULL, mask);
891}
892EXPORT_SYMBOL(set_extent_writeback);
893
894int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
895 gfp_t mask)
896{
897 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
898}
899EXPORT_SYMBOL(clear_extent_writeback);
900
901int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
902{
903 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
904}
905EXPORT_SYMBOL(wait_on_extent_writeback);
906
Chris Masond1310b22008-01-24 16:13:08 -0500907int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
908{
909 int err;
910 u64 failed_start;
911 while (1) {
912 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
913 &failed_start, mask);
914 if (err == -EEXIST && (mask & __GFP_WAIT)) {
915 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
916 start = failed_start;
917 } else {
918 break;
919 }
920 WARN_ON(start > end);
921 }
922 return err;
923}
924EXPORT_SYMBOL(lock_extent);
925
926int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
927 gfp_t mask)
928{
929 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
930}
931EXPORT_SYMBOL(unlock_extent);
932
933/*
934 * helper function to set pages and extents in the tree dirty
935 */
936int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
937{
938 unsigned long index = start >> PAGE_CACHE_SHIFT;
939 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
940 struct page *page;
941
942 while (index <= end_index) {
943 page = find_get_page(tree->mapping, index);
944 BUG_ON(!page);
945 __set_page_dirty_nobuffers(page);
946 page_cache_release(page);
947 index++;
948 }
949 set_extent_dirty(tree, start, end, GFP_NOFS);
950 return 0;
951}
952EXPORT_SYMBOL(set_range_dirty);
953
954/*
955 * helper function to set both pages and extents in the tree writeback
956 */
957int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
958{
959 unsigned long index = start >> PAGE_CACHE_SHIFT;
960 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
961 struct page *page;
962
963 while (index <= end_index) {
964 page = find_get_page(tree->mapping, index);
965 BUG_ON(!page);
966 set_page_writeback(page);
967 page_cache_release(page);
968 index++;
969 }
970 set_extent_writeback(tree, start, end, GFP_NOFS);
971 return 0;
972}
973EXPORT_SYMBOL(set_range_writeback);
974
975int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
976 u64 *start_ret, u64 *end_ret, int bits)
977{
978 struct rb_node *node;
979 struct extent_state *state;
980 int ret = 1;
981
Chris Mason70dec802008-01-29 09:59:12 -0500982 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500983 /*
984 * this search will find all the extents that end after
985 * our range starts.
986 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500987 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -0400988 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -0500989 goto out;
990 }
991
992 while(1) {
993 state = rb_entry(node, struct extent_state, rb_node);
994 if (state->end >= start && (state->state & bits)) {
995 *start_ret = state->start;
996 *end_ret = state->end;
997 ret = 0;
998 break;
999 }
1000 node = rb_next(node);
1001 if (!node)
1002 break;
1003 }
1004out:
Chris Mason70dec802008-01-29 09:59:12 -05001005 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001006 return ret;
1007}
1008EXPORT_SYMBOL(find_first_extent_bit);
1009
Chris Masond7fc6402008-02-18 12:12:38 -05001010struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
1011 u64 start, int bits)
1012{
1013 struct rb_node *node;
1014 struct extent_state *state;
1015
1016 /*
1017 * this search will find all the extents that end after
1018 * our range starts.
1019 */
1020 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001021 if (!node) {
Chris Masond7fc6402008-02-18 12:12:38 -05001022 goto out;
1023 }
1024
1025 while(1) {
1026 state = rb_entry(node, struct extent_state, rb_node);
1027 if (state->end >= start && (state->state & bits)) {
1028 return state;
1029 }
1030 node = rb_next(node);
1031 if (!node)
1032 break;
1033 }
1034out:
1035 return NULL;
1036}
1037EXPORT_SYMBOL(find_first_extent_bit_state);
1038
Chris Masond1310b22008-01-24 16:13:08 -05001039u64 find_lock_delalloc_range(struct extent_io_tree *tree,
1040 u64 *start, u64 *end, u64 max_bytes)
1041{
1042 struct rb_node *node;
1043 struct extent_state *state;
1044 u64 cur_start = *start;
1045 u64 found = 0;
1046 u64 total_bytes = 0;
1047
Chris Mason70dec802008-01-29 09:59:12 -05001048 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001049 /*
1050 * this search will find all the extents that end after
1051 * our range starts.
1052 */
1053search_again:
Chris Mason80ea96b2008-02-01 14:51:59 -05001054 node = tree_search(tree, cur_start);
Peter2b114d12008-04-01 11:21:40 -04001055 if (!node) {
Chris Mason3b951512008-04-17 11:29:12 -04001056 if (!found)
1057 *end = (u64)-1;
Chris Masond1310b22008-01-24 16:13:08 -05001058 goto out;
1059 }
1060
1061 while(1) {
1062 state = rb_entry(node, struct extent_state, rb_node);
1063 if (found && state->start != cur_start) {
1064 goto out;
1065 }
1066 if (!(state->state & EXTENT_DELALLOC)) {
1067 if (!found)
1068 *end = state->end;
1069 goto out;
1070 }
1071 if (!found) {
1072 struct extent_state *prev_state;
1073 struct rb_node *prev_node = node;
1074 while(1) {
1075 prev_node = rb_prev(prev_node);
1076 if (!prev_node)
1077 break;
1078 prev_state = rb_entry(prev_node,
1079 struct extent_state,
1080 rb_node);
1081 if (!(prev_state->state & EXTENT_DELALLOC))
1082 break;
1083 state = prev_state;
1084 node = prev_node;
1085 }
1086 }
1087 if (state->state & EXTENT_LOCKED) {
1088 DEFINE_WAIT(wait);
1089 atomic_inc(&state->refs);
1090 prepare_to_wait(&state->wq, &wait,
1091 TASK_UNINTERRUPTIBLE);
Chris Mason70dec802008-01-29 09:59:12 -05001092 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001093 schedule();
Chris Mason70dec802008-01-29 09:59:12 -05001094 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001095 finish_wait(&state->wq, &wait);
1096 free_extent_state(state);
1097 goto search_again;
1098 }
Chris Mason291d6732008-01-29 15:55:23 -05001099 set_state_cb(tree, state, EXTENT_LOCKED);
Chris Masonb0c68f82008-01-31 11:05:37 -05001100 state->state |= EXTENT_LOCKED;
Chris Masond1310b22008-01-24 16:13:08 -05001101 if (!found)
1102 *start = state->start;
1103 found++;
1104 *end = state->end;
1105 cur_start = state->end + 1;
1106 node = rb_next(node);
1107 if (!node)
1108 break;
1109 total_bytes += state->end - state->start + 1;
1110 if (total_bytes >= max_bytes)
1111 break;
1112 }
1113out:
Chris Mason70dec802008-01-29 09:59:12 -05001114 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001115 return found;
1116}
1117
1118u64 count_range_bits(struct extent_io_tree *tree,
1119 u64 *start, u64 search_end, u64 max_bytes,
1120 unsigned long bits)
1121{
1122 struct rb_node *node;
1123 struct extent_state *state;
1124 u64 cur_start = *start;
1125 u64 total_bytes = 0;
1126 int found = 0;
1127
1128 if (search_end <= cur_start) {
1129 printk("search_end %Lu start %Lu\n", search_end, cur_start);
1130 WARN_ON(1);
1131 return 0;
1132 }
1133
Chris Mason70dec802008-01-29 09:59:12 -05001134 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001135 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1136 total_bytes = tree->dirty_bytes;
1137 goto out;
1138 }
1139 /*
1140 * this search will find all the extents that end after
1141 * our range starts.
1142 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001143 node = tree_search(tree, cur_start);
Peter2b114d12008-04-01 11:21:40 -04001144 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001145 goto out;
1146 }
1147
1148 while(1) {
1149 state = rb_entry(node, struct extent_state, rb_node);
1150 if (state->start > search_end)
1151 break;
1152 if (state->end >= cur_start && (state->state & bits)) {
1153 total_bytes += min(search_end, state->end) + 1 -
1154 max(cur_start, state->start);
1155 if (total_bytes >= max_bytes)
1156 break;
1157 if (!found) {
1158 *start = state->start;
1159 found = 1;
1160 }
1161 }
1162 node = rb_next(node);
1163 if (!node)
1164 break;
1165 }
1166out:
Chris Mason70dec802008-01-29 09:59:12 -05001167 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001168 return total_bytes;
1169}
1170/*
1171 * helper function to lock both pages and extents in the tree.
1172 * pages must be locked first.
1173 */
1174int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
1175{
1176 unsigned long index = start >> PAGE_CACHE_SHIFT;
1177 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1178 struct page *page;
1179 int err;
1180
1181 while (index <= end_index) {
1182 page = grab_cache_page(tree->mapping, index);
1183 if (!page) {
1184 err = -ENOMEM;
1185 goto failed;
1186 }
1187 if (IS_ERR(page)) {
1188 err = PTR_ERR(page);
1189 goto failed;
1190 }
1191 index++;
1192 }
1193 lock_extent(tree, start, end, GFP_NOFS);
1194 return 0;
1195
1196failed:
1197 /*
1198 * we failed above in getting the page at 'index', so we undo here
1199 * up to but not including the page at 'index'
1200 */
1201 end_index = index;
1202 index = start >> PAGE_CACHE_SHIFT;
1203 while (index < end_index) {
1204 page = find_get_page(tree->mapping, index);
1205 unlock_page(page);
1206 page_cache_release(page);
1207 index++;
1208 }
1209 return err;
1210}
1211EXPORT_SYMBOL(lock_range);
1212
1213/*
1214 * helper function to unlock both pages and extents in the tree.
1215 */
1216int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
1217{
1218 unsigned long index = start >> PAGE_CACHE_SHIFT;
1219 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1220 struct page *page;
1221
1222 while (index <= end_index) {
1223 page = find_get_page(tree->mapping, index);
1224 unlock_page(page);
1225 page_cache_release(page);
1226 index++;
1227 }
1228 unlock_extent(tree, start, end, GFP_NOFS);
1229 return 0;
1230}
1231EXPORT_SYMBOL(unlock_range);
1232
1233int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1234{
1235 struct rb_node *node;
1236 struct extent_state *state;
1237 int ret = 0;
1238
Chris Mason70dec802008-01-29 09:59:12 -05001239 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001240 /*
1241 * this search will find all the extents that end after
1242 * our range starts.
1243 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001244 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001245 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001246 ret = -ENOENT;
1247 goto out;
1248 }
1249 state = rb_entry(node, struct extent_state, rb_node);
1250 if (state->start != start) {
1251 ret = -ENOENT;
1252 goto out;
1253 }
1254 state->private = private;
1255out:
Chris Mason70dec802008-01-29 09:59:12 -05001256 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001257 return ret;
1258}
1259
1260int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1261{
1262 struct rb_node *node;
1263 struct extent_state *state;
1264 int ret = 0;
1265
Chris Mason70dec802008-01-29 09:59:12 -05001266 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001267 /*
1268 * this search will find all the extents that end after
1269 * our range starts.
1270 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001271 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001272 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001273 ret = -ENOENT;
1274 goto out;
1275 }
1276 state = rb_entry(node, struct extent_state, rb_node);
1277 if (state->start != start) {
1278 ret = -ENOENT;
1279 goto out;
1280 }
1281 *private = state->private;
1282out:
Chris Mason70dec802008-01-29 09:59:12 -05001283 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001284 return ret;
1285}
1286
1287/*
1288 * searches a range in the state tree for a given mask.
Chris Mason70dec802008-01-29 09:59:12 -05001289 * If 'filled' == 1, this returns 1 only if every extent in the tree
Chris Masond1310b22008-01-24 16:13:08 -05001290 * has the bits set. Otherwise, 1 is returned if any bit in the
1291 * range is found set.
1292 */
1293int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1294 int bits, int filled)
1295{
1296 struct extent_state *state = NULL;
1297 struct rb_node *node;
1298 int bitset = 0;
1299 unsigned long flags;
1300
Chris Mason70dec802008-01-29 09:59:12 -05001301 spin_lock_irqsave(&tree->lock, flags);
Chris Mason80ea96b2008-02-01 14:51:59 -05001302 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -05001303 while (node && start <= end) {
1304 state = rb_entry(node, struct extent_state, rb_node);
1305
1306 if (filled && state->start > start) {
1307 bitset = 0;
1308 break;
1309 }
1310
1311 if (state->start > end)
1312 break;
1313
1314 if (state->state & bits) {
1315 bitset = 1;
1316 if (!filled)
1317 break;
1318 } else if (filled) {
1319 bitset = 0;
1320 break;
1321 }
1322 start = state->end + 1;
1323 if (start > end)
1324 break;
1325 node = rb_next(node);
1326 if (!node) {
1327 if (filled)
1328 bitset = 0;
1329 break;
1330 }
1331 }
Chris Mason70dec802008-01-29 09:59:12 -05001332 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -05001333 return bitset;
1334}
1335EXPORT_SYMBOL(test_range_bit);
1336
1337/*
1338 * helper function to set a given page up to date if all the
1339 * extents in the tree for that page are up to date
1340 */
1341static int check_page_uptodate(struct extent_io_tree *tree,
1342 struct page *page)
1343{
1344 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1345 u64 end = start + PAGE_CACHE_SIZE - 1;
1346 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1347 SetPageUptodate(page);
1348 return 0;
1349}
1350
1351/*
1352 * helper function to unlock a page if all the extents in the tree
1353 * for that page are unlocked
1354 */
1355static int check_page_locked(struct extent_io_tree *tree,
1356 struct page *page)
1357{
1358 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1359 u64 end = start + PAGE_CACHE_SIZE - 1;
1360 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1361 unlock_page(page);
1362 return 0;
1363}
1364
1365/*
1366 * helper function to end page writeback if all the extents
1367 * in the tree for that page are done with writeback
1368 */
1369static int check_page_writeback(struct extent_io_tree *tree,
1370 struct page *page)
1371{
1372 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1373 u64 end = start + PAGE_CACHE_SIZE - 1;
1374 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1375 end_page_writeback(page);
1376 return 0;
1377}
1378
1379/* lots and lots of room for performance fixes in the end_bio funcs */
1380
1381/*
1382 * after a writepage IO is done, we need to:
1383 * clear the uptodate bits on error
1384 * clear the writeback bits in the extent tree for this IO
1385 * end_page_writeback if the page has no more pending IO
1386 *
1387 * Scheduling is not allowed, so the extent state tree is expected
1388 * to have one and only one object corresponding to this IO.
1389 */
1390#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1391static void end_bio_extent_writepage(struct bio *bio, int err)
1392#else
1393static int end_bio_extent_writepage(struct bio *bio,
1394 unsigned int bytes_done, int err)
1395#endif
1396{
Chris Mason1259ab72008-05-12 13:39:03 -04001397 int uptodate = err == 0;
Chris Masond1310b22008-01-24 16:13:08 -05001398 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04001399 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001400 u64 start;
1401 u64 end;
1402 int whole_page;
Chris Mason1259ab72008-05-12 13:39:03 -04001403 int ret;
Chris Masond1310b22008-01-24 16:13:08 -05001404
1405#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1406 if (bio->bi_size)
1407 return 1;
1408#endif
Chris Masond1310b22008-01-24 16:13:08 -05001409 do {
1410 struct page *page = bvec->bv_page;
David Woodhouse902b22f2008-08-20 08:51:49 -04001411 tree = &BTRFS_I(page->mapping->host)->io_tree;
1412
Chris Masond1310b22008-01-24 16:13:08 -05001413 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1414 bvec->bv_offset;
1415 end = start + bvec->bv_len - 1;
1416
1417 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1418 whole_page = 1;
1419 else
1420 whole_page = 0;
1421
1422 if (--bvec >= bio->bi_io_vec)
1423 prefetchw(&bvec->bv_page->flags);
Chris Mason1259ab72008-05-12 13:39:03 -04001424 if (tree->ops && tree->ops->writepage_end_io_hook) {
1425 ret = tree->ops->writepage_end_io_hook(page, start,
David Woodhouse902b22f2008-08-20 08:51:49 -04001426 end, NULL, uptodate);
Chris Mason1259ab72008-05-12 13:39:03 -04001427 if (ret)
1428 uptodate = 0;
1429 }
1430
1431 if (!uptodate && tree->ops &&
1432 tree->ops->writepage_io_failed_hook) {
1433 ret = tree->ops->writepage_io_failed_hook(bio, page,
David Woodhouse902b22f2008-08-20 08:51:49 -04001434 start, end, NULL);
Chris Mason1259ab72008-05-12 13:39:03 -04001435 if (ret == 0) {
Chris Mason1259ab72008-05-12 13:39:03 -04001436 uptodate = (err == 0);
1437 continue;
1438 }
1439 }
1440
Chris Masond1310b22008-01-24 16:13:08 -05001441 if (!uptodate) {
1442 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1443 ClearPageUptodate(page);
1444 SetPageError(page);
1445 }
Chris Mason70dec802008-01-29 09:59:12 -05001446
David Woodhouse902b22f2008-08-20 08:51:49 -04001447 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -05001448
1449 if (whole_page)
1450 end_page_writeback(page);
1451 else
1452 check_page_writeback(tree, page);
Chris Masond1310b22008-01-24 16:13:08 -05001453 } while (bvec >= bio->bi_io_vec);
Chris Masond1310b22008-01-24 16:13:08 -05001454 bio_put(bio);
1455#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1456 return 0;
1457#endif
1458}
1459
1460/*
1461 * after a readpage IO is done, we need to:
1462 * clear the uptodate bits on error
1463 * set the uptodate bits if things worked
1464 * set the page up to date if all extents in the tree are uptodate
1465 * clear the lock bit in the extent tree
1466 * unlock the page if there are no other extents locked for it
1467 *
1468 * Scheduling is not allowed, so the extent state tree is expected
1469 * to have one and only one object corresponding to this IO.
1470 */
1471#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1472static void end_bio_extent_readpage(struct bio *bio, int err)
1473#else
1474static int end_bio_extent_readpage(struct bio *bio,
1475 unsigned int bytes_done, int err)
1476#endif
1477{
1478 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1479 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04001480 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001481 u64 start;
1482 u64 end;
1483 int whole_page;
1484 int ret;
1485
1486#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1487 if (bio->bi_size)
1488 return 1;
1489#endif
1490
1491 do {
1492 struct page *page = bvec->bv_page;
David Woodhouse902b22f2008-08-20 08:51:49 -04001493 tree = &BTRFS_I(page->mapping->host)->io_tree;
1494
Chris Masond1310b22008-01-24 16:13:08 -05001495 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1496 bvec->bv_offset;
1497 end = start + bvec->bv_len - 1;
1498
1499 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1500 whole_page = 1;
1501 else
1502 whole_page = 0;
1503
1504 if (--bvec >= bio->bi_io_vec)
1505 prefetchw(&bvec->bv_page->flags);
1506
1507 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
Chris Mason70dec802008-01-29 09:59:12 -05001508 ret = tree->ops->readpage_end_io_hook(page, start, end,
David Woodhouse902b22f2008-08-20 08:51:49 -04001509 NULL);
Chris Masond1310b22008-01-24 16:13:08 -05001510 if (ret)
1511 uptodate = 0;
1512 }
Chris Mason7e383262008-04-09 16:28:12 -04001513 if (!uptodate && tree->ops &&
1514 tree->ops->readpage_io_failed_hook) {
1515 ret = tree->ops->readpage_io_failed_hook(bio, page,
David Woodhouse902b22f2008-08-20 08:51:49 -04001516 start, end, NULL);
Chris Mason7e383262008-04-09 16:28:12 -04001517 if (ret == 0) {
Chris Mason3b951512008-04-17 11:29:12 -04001518 uptodate =
1519 test_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Mason7e383262008-04-09 16:28:12 -04001520 continue;
1521 }
1522 }
Chris Mason70dec802008-01-29 09:59:12 -05001523
David Woodhouse902b22f2008-08-20 08:51:49 -04001524 if (uptodate)
1525 set_extent_uptodate(tree, start, end,
1526 GFP_ATOMIC);
1527 unlock_extent(tree, start, end, GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -05001528
Chris Mason70dec802008-01-29 09:59:12 -05001529 if (whole_page) {
1530 if (uptodate) {
1531 SetPageUptodate(page);
1532 } else {
1533 ClearPageUptodate(page);
1534 SetPageError(page);
1535 }
Chris Masond1310b22008-01-24 16:13:08 -05001536 unlock_page(page);
Chris Mason70dec802008-01-29 09:59:12 -05001537 } else {
1538 if (uptodate) {
1539 check_page_uptodate(tree, page);
1540 } else {
1541 ClearPageUptodate(page);
1542 SetPageError(page);
1543 }
Chris Masond1310b22008-01-24 16:13:08 -05001544 check_page_locked(tree, page);
Chris Mason70dec802008-01-29 09:59:12 -05001545 }
Chris Masond1310b22008-01-24 16:13:08 -05001546 } while (bvec >= bio->bi_io_vec);
1547
1548 bio_put(bio);
1549#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1550 return 0;
1551#endif
1552}
1553
1554/*
1555 * IO done from prepare_write is pretty simple, we just unlock
1556 * the structs in the extent tree when done, and set the uptodate bits
1557 * as appropriate.
1558 */
1559#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1560static void end_bio_extent_preparewrite(struct bio *bio, int err)
1561#else
1562static int end_bio_extent_preparewrite(struct bio *bio,
1563 unsigned int bytes_done, int err)
1564#endif
1565{
1566 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1567 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
David Woodhouse902b22f2008-08-20 08:51:49 -04001568 struct extent_io_tree *tree;
Chris Masond1310b22008-01-24 16:13:08 -05001569 u64 start;
1570 u64 end;
1571
1572#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1573 if (bio->bi_size)
1574 return 1;
1575#endif
1576
1577 do {
1578 struct page *page = bvec->bv_page;
David Woodhouse902b22f2008-08-20 08:51:49 -04001579 tree = &BTRFS_I(page->mapping->host)->io_tree;
1580
Chris Masond1310b22008-01-24 16:13:08 -05001581 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1582 bvec->bv_offset;
1583 end = start + bvec->bv_len - 1;
1584
1585 if (--bvec >= bio->bi_io_vec)
1586 prefetchw(&bvec->bv_page->flags);
1587
1588 if (uptodate) {
1589 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1590 } else {
1591 ClearPageUptodate(page);
1592 SetPageError(page);
1593 }
1594
1595 unlock_extent(tree, start, end, GFP_ATOMIC);
1596
1597 } while (bvec >= bio->bi_io_vec);
1598
1599 bio_put(bio);
1600#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1601 return 0;
1602#endif
1603}
1604
1605static struct bio *
1606extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1607 gfp_t gfp_flags)
1608{
1609 struct bio *bio;
1610
1611 bio = bio_alloc(gfp_flags, nr_vecs);
1612
1613 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
1614 while (!bio && (nr_vecs /= 2))
1615 bio = bio_alloc(gfp_flags, nr_vecs);
1616 }
1617
1618 if (bio) {
Chris Masone1c4b742008-04-22 13:26:46 -04001619 bio->bi_size = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001620 bio->bi_bdev = bdev;
1621 bio->bi_sector = first_sector;
1622 }
1623 return bio;
1624}
1625
Chris Masonf1885912008-04-09 16:28:12 -04001626static int submit_one_bio(int rw, struct bio *bio, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05001627{
Chris Masond1310b22008-01-24 16:13:08 -05001628 int ret = 0;
Chris Mason70dec802008-01-29 09:59:12 -05001629 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1630 struct page *page = bvec->bv_page;
1631 struct extent_io_tree *tree = bio->bi_private;
1632 struct rb_node *node;
1633 struct extent_state *state;
1634 u64 start;
1635 u64 end;
1636
1637 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1638 end = start + bvec->bv_len - 1;
1639
1640 spin_lock_irq(&tree->lock);
Chris Mason80ea96b2008-02-01 14:51:59 -05001641 node = __etree_search(tree, start, NULL, NULL);
Chris Mason70dec802008-01-29 09:59:12 -05001642 BUG_ON(!node);
1643 state = rb_entry(node, struct extent_state, rb_node);
1644 while(state->end < end) {
1645 node = rb_next(node);
1646 state = rb_entry(node, struct extent_state, rb_node);
1647 }
1648 BUG_ON(state->end != end);
1649 spin_unlock_irq(&tree->lock);
1650
David Woodhouse902b22f2008-08-20 08:51:49 -04001651 bio->bi_private = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05001652
1653 bio_get(bio);
1654
Chris Mason065631f2008-02-20 12:07:25 -05001655 if (tree->ops && tree->ops->submit_bio_hook)
Chris Masonf1885912008-04-09 16:28:12 -04001656 tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1657 mirror_num);
Chris Mason0b86a832008-03-24 15:01:56 -04001658 else
1659 submit_bio(rw, bio);
Chris Masond1310b22008-01-24 16:13:08 -05001660 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1661 ret = -EOPNOTSUPP;
1662 bio_put(bio);
1663 return ret;
1664}
1665
1666static int submit_extent_page(int rw, struct extent_io_tree *tree,
1667 struct page *page, sector_t sector,
1668 size_t size, unsigned long offset,
1669 struct block_device *bdev,
1670 struct bio **bio_ret,
1671 unsigned long max_pages,
Chris Masonf1885912008-04-09 16:28:12 -04001672 bio_end_io_t end_io_func,
1673 int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05001674{
1675 int ret = 0;
1676 struct bio *bio;
1677 int nr;
1678
1679 if (bio_ret && *bio_ret) {
1680 bio = *bio_ret;
1681 if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
Chris Mason239b14b2008-03-24 15:02:07 -04001682 (tree->ops && tree->ops->merge_bio_hook &&
1683 tree->ops->merge_bio_hook(page, offset, size, bio)) ||
Chris Masond1310b22008-01-24 16:13:08 -05001684 bio_add_page(bio, page, size, offset) < size) {
Chris Masonf1885912008-04-09 16:28:12 -04001685 ret = submit_one_bio(rw, bio, mirror_num);
Chris Masond1310b22008-01-24 16:13:08 -05001686 bio = NULL;
1687 } else {
1688 return 0;
1689 }
1690 }
Chris Mason961d0232008-02-06 11:01:42 -05001691 nr = bio_get_nr_vecs(bdev);
Chris Masond1310b22008-01-24 16:13:08 -05001692 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1693 if (!bio) {
1694 printk("failed to allocate bio nr %d\n", nr);
1695 }
Chris Mason70dec802008-01-29 09:59:12 -05001696
1697
Chris Masond1310b22008-01-24 16:13:08 -05001698 bio_add_page(bio, page, size, offset);
1699 bio->bi_end_io = end_io_func;
1700 bio->bi_private = tree;
Chris Mason70dec802008-01-29 09:59:12 -05001701
Chris Masond1310b22008-01-24 16:13:08 -05001702 if (bio_ret) {
1703 *bio_ret = bio;
1704 } else {
Chris Masonf1885912008-04-09 16:28:12 -04001705 ret = submit_one_bio(rw, bio, mirror_num);
Chris Masond1310b22008-01-24 16:13:08 -05001706 }
1707
1708 return ret;
1709}
1710
1711void set_page_extent_mapped(struct page *page)
1712{
1713 if (!PagePrivate(page)) {
1714 SetPagePrivate(page);
Chris Masond1310b22008-01-24 16:13:08 -05001715 page_cache_get(page);
Chris Mason6af118ce2008-07-22 11:18:07 -04001716 set_page_private(page, EXTENT_PAGE_PRIVATE);
Chris Masond1310b22008-01-24 16:13:08 -05001717 }
1718}
1719
1720void set_page_extent_head(struct page *page, unsigned long len)
1721{
1722 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1723}
1724
1725/*
1726 * basic readpage implementation. Locked extent state structs are inserted
1727 * into the tree that are removed when the IO is done (by the end_io
1728 * handlers)
1729 */
1730static int __extent_read_full_page(struct extent_io_tree *tree,
1731 struct page *page,
1732 get_extent_t *get_extent,
Chris Masonf1885912008-04-09 16:28:12 -04001733 struct bio **bio, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05001734{
1735 struct inode *inode = page->mapping->host;
1736 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1737 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1738 u64 end;
1739 u64 cur = start;
1740 u64 extent_offset;
1741 u64 last_byte = i_size_read(inode);
1742 u64 block_start;
1743 u64 cur_end;
1744 sector_t sector;
1745 struct extent_map *em;
1746 struct block_device *bdev;
1747 int ret;
1748 int nr = 0;
1749 size_t page_offset = 0;
1750 size_t iosize;
1751 size_t blocksize = inode->i_sb->s_blocksize;
1752
1753 set_page_extent_mapped(page);
1754
1755 end = page_end;
1756 lock_extent(tree, start, end, GFP_NOFS);
1757
1758 while (cur <= end) {
1759 if (cur >= last_byte) {
1760 char *userpage;
1761 iosize = PAGE_CACHE_SIZE - page_offset;
1762 userpage = kmap_atomic(page, KM_USER0);
1763 memset(userpage + page_offset, 0, iosize);
1764 flush_dcache_page(page);
1765 kunmap_atomic(userpage, KM_USER0);
1766 set_extent_uptodate(tree, cur, cur + iosize - 1,
1767 GFP_NOFS);
1768 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1769 break;
1770 }
1771 em = get_extent(inode, page, page_offset, cur,
1772 end - cur + 1, 0);
1773 if (IS_ERR(em) || !em) {
1774 SetPageError(page);
1775 unlock_extent(tree, cur, end, GFP_NOFS);
1776 break;
1777 }
Chris Masond1310b22008-01-24 16:13:08 -05001778 extent_offset = cur - em->start;
Chris Masone6dcd2d2008-07-17 12:53:50 -04001779 if (extent_map_end(em) <= cur) {
1780printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
1781 }
Chris Masond1310b22008-01-24 16:13:08 -05001782 BUG_ON(extent_map_end(em) <= cur);
Chris Masone6dcd2d2008-07-17 12:53:50 -04001783 if (end < cur) {
1784printk("2bad mapping end %Lu cur %Lu\n", end, cur);
1785 }
Chris Masond1310b22008-01-24 16:13:08 -05001786 BUG_ON(end < cur);
1787
1788 iosize = min(extent_map_end(em) - cur, end - cur + 1);
1789 cur_end = min(extent_map_end(em) - 1, end);
1790 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1791 sector = (em->block_start + extent_offset) >> 9;
1792 bdev = em->bdev;
1793 block_start = em->block_start;
1794 free_extent_map(em);
1795 em = NULL;
1796
1797 /* we've found a hole, just zero and go on */
1798 if (block_start == EXTENT_MAP_HOLE) {
1799 char *userpage;
1800 userpage = kmap_atomic(page, KM_USER0);
1801 memset(userpage + page_offset, 0, iosize);
1802 flush_dcache_page(page);
1803 kunmap_atomic(userpage, KM_USER0);
1804
1805 set_extent_uptodate(tree, cur, cur + iosize - 1,
1806 GFP_NOFS);
1807 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1808 cur = cur + iosize;
1809 page_offset += iosize;
1810 continue;
1811 }
1812 /* the get_extent function already copied into the page */
1813 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
Chris Masona1b32a52008-09-05 16:09:51 -04001814 check_page_uptodate(tree, page);
Chris Masond1310b22008-01-24 16:13:08 -05001815 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1816 cur = cur + iosize;
1817 page_offset += iosize;
1818 continue;
1819 }
Chris Mason70dec802008-01-29 09:59:12 -05001820 /* we have an inline extent but it didn't get marked up
1821 * to date. Error out
1822 */
1823 if (block_start == EXTENT_MAP_INLINE) {
1824 SetPageError(page);
1825 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1826 cur = cur + iosize;
1827 page_offset += iosize;
1828 continue;
1829 }
Chris Masond1310b22008-01-24 16:13:08 -05001830
1831 ret = 0;
1832 if (tree->ops && tree->ops->readpage_io_hook) {
1833 ret = tree->ops->readpage_io_hook(page, cur,
1834 cur + iosize - 1);
1835 }
1836 if (!ret) {
Chris Mason89642222008-07-24 09:41:53 -04001837 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
1838 pnr -= page->index;
Chris Masond1310b22008-01-24 16:13:08 -05001839 ret = submit_extent_page(READ, tree, page,
1840 sector, iosize, page_offset,
Chris Mason89642222008-07-24 09:41:53 -04001841 bdev, bio, pnr,
Chris Masonf1885912008-04-09 16:28:12 -04001842 end_bio_extent_readpage, mirror_num);
Chris Mason89642222008-07-24 09:41:53 -04001843 nr++;
Chris Masond1310b22008-01-24 16:13:08 -05001844 }
1845 if (ret)
1846 SetPageError(page);
1847 cur = cur + iosize;
1848 page_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05001849 }
1850 if (!nr) {
1851 if (!PageError(page))
1852 SetPageUptodate(page);
1853 unlock_page(page);
1854 }
1855 return 0;
1856}
1857
1858int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
1859 get_extent_t *get_extent)
1860{
1861 struct bio *bio = NULL;
1862 int ret;
1863
Chris Masonf1885912008-04-09 16:28:12 -04001864 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05001865 if (bio)
Chris Masonf1885912008-04-09 16:28:12 -04001866 submit_one_bio(READ, bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05001867 return ret;
1868}
1869EXPORT_SYMBOL(extent_read_full_page);
1870
1871/*
1872 * the writepage semantics are similar to regular writepage. extent
1873 * records are inserted to lock ranges in the tree, and as dirty areas
1874 * are found, they are marked writeback. Then the lock bits are removed
1875 * and the end_io handler clears the writeback ranges
1876 */
1877static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1878 void *data)
1879{
1880 struct inode *inode = page->mapping->host;
1881 struct extent_page_data *epd = data;
1882 struct extent_io_tree *tree = epd->tree;
1883 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1884 u64 delalloc_start;
1885 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1886 u64 end;
1887 u64 cur = start;
1888 u64 extent_offset;
1889 u64 last_byte = i_size_read(inode);
1890 u64 block_start;
1891 u64 iosize;
Chris Masone6dcd2d2008-07-17 12:53:50 -04001892 u64 unlock_start;
Chris Masond1310b22008-01-24 16:13:08 -05001893 sector_t sector;
1894 struct extent_map *em;
1895 struct block_device *bdev;
1896 int ret;
1897 int nr = 0;
Chris Mason7f3c74f2008-07-18 12:01:11 -04001898 size_t pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001899 size_t blocksize;
1900 loff_t i_size = i_size_read(inode);
1901 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
1902 u64 nr_delalloc;
1903 u64 delalloc_end;
1904
1905 WARN_ON(!PageLocked(page));
Chris Mason7f3c74f2008-07-18 12:01:11 -04001906 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
Chris Mason211c17f2008-05-15 09:13:45 -04001907 if (page->index > end_index ||
Chris Mason7f3c74f2008-07-18 12:01:11 -04001908 (page->index == end_index && !pg_offset)) {
Chris Mason211c17f2008-05-15 09:13:45 -04001909 page->mapping->a_ops->invalidatepage(page, 0);
Chris Masond1310b22008-01-24 16:13:08 -05001910 unlock_page(page);
1911 return 0;
1912 }
1913
1914 if (page->index == end_index) {
1915 char *userpage;
1916
Chris Masond1310b22008-01-24 16:13:08 -05001917 userpage = kmap_atomic(page, KM_USER0);
Chris Mason7f3c74f2008-07-18 12:01:11 -04001918 memset(userpage + pg_offset, 0,
1919 PAGE_CACHE_SIZE - pg_offset);
Chris Masond1310b22008-01-24 16:13:08 -05001920 kunmap_atomic(userpage, KM_USER0);
Chris Mason211c17f2008-05-15 09:13:45 -04001921 flush_dcache_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05001922 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04001923 pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001924
1925 set_page_extent_mapped(page);
1926
1927 delalloc_start = start;
1928 delalloc_end = 0;
1929 while(delalloc_end < page_end) {
1930 nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
1931 &delalloc_end,
1932 128 * 1024 * 1024);
1933 if (nr_delalloc == 0) {
1934 delalloc_start = delalloc_end + 1;
1935 continue;
1936 }
1937 tree->ops->fill_delalloc(inode, delalloc_start,
1938 delalloc_end);
1939 clear_extent_bit(tree, delalloc_start,
1940 delalloc_end,
1941 EXTENT_LOCKED | EXTENT_DELALLOC,
1942 1, 0, GFP_NOFS);
1943 delalloc_start = delalloc_end + 1;
1944 }
1945 lock_extent(tree, start, page_end, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04001946 unlock_start = start;
Chris Masond1310b22008-01-24 16:13:08 -05001947
Chris Mason247e7432008-07-17 12:53:51 -04001948 if (tree->ops && tree->ops->writepage_start_hook) {
1949 ret = tree->ops->writepage_start_hook(page, start, page_end);
1950 if (ret == -EAGAIN) {
1951 unlock_extent(tree, start, page_end, GFP_NOFS);
1952 redirty_page_for_writepage(wbc, page);
1953 unlock_page(page);
1954 return 0;
1955 }
1956 }
1957
Chris Masond1310b22008-01-24 16:13:08 -05001958 end = page_end;
1959 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1960 printk("found delalloc bits after lock_extent\n");
1961 }
1962
1963 if (last_byte <= start) {
1964 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04001965 unlock_extent(tree, start, page_end, GFP_NOFS);
1966 if (tree->ops && tree->ops->writepage_end_io_hook)
1967 tree->ops->writepage_end_io_hook(page, start,
1968 page_end, NULL, 1);
1969 unlock_start = page_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05001970 goto done;
1971 }
1972
1973 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
1974 blocksize = inode->i_sb->s_blocksize;
1975
1976 while (cur <= end) {
1977 if (cur >= last_byte) {
1978 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04001979 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
1980 if (tree->ops && tree->ops->writepage_end_io_hook)
1981 tree->ops->writepage_end_io_hook(page, cur,
1982 page_end, NULL, 1);
1983 unlock_start = page_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05001984 break;
1985 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04001986 em = epd->get_extent(inode, page, pg_offset, cur,
Chris Masond1310b22008-01-24 16:13:08 -05001987 end - cur + 1, 1);
1988 if (IS_ERR(em) || !em) {
1989 SetPageError(page);
1990 break;
1991 }
1992
1993 extent_offset = cur - em->start;
1994 BUG_ON(extent_map_end(em) <= cur);
1995 BUG_ON(end < cur);
1996 iosize = min(extent_map_end(em) - cur, end - cur + 1);
1997 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1998 sector = (em->block_start + extent_offset) >> 9;
1999 bdev = em->bdev;
2000 block_start = em->block_start;
2001 free_extent_map(em);
2002 em = NULL;
2003
2004 if (block_start == EXTENT_MAP_HOLE ||
2005 block_start == EXTENT_MAP_INLINE) {
2006 clear_extent_dirty(tree, cur,
2007 cur + iosize - 1, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04002008
2009 unlock_extent(tree, unlock_start, cur + iosize -1,
2010 GFP_NOFS);
Chris Mason7f3c74f2008-07-18 12:01:11 -04002011
Chris Masone6dcd2d2008-07-17 12:53:50 -04002012 if (tree->ops && tree->ops->writepage_end_io_hook)
2013 tree->ops->writepage_end_io_hook(page, cur,
2014 cur + iosize - 1,
2015 NULL, 1);
Chris Masond1310b22008-01-24 16:13:08 -05002016 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002017 pg_offset += iosize;
Chris Masone6dcd2d2008-07-17 12:53:50 -04002018 unlock_start = cur;
Chris Masond1310b22008-01-24 16:13:08 -05002019 continue;
2020 }
2021
2022 /* leave this out until we have a page_mkwrite call */
2023 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2024 EXTENT_DIRTY, 0)) {
2025 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002026 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002027 continue;
2028 }
2029 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2030 if (tree->ops && tree->ops->writepage_io_hook) {
2031 ret = tree->ops->writepage_io_hook(page, cur,
2032 cur + iosize - 1);
2033 } else {
2034 ret = 0;
2035 }
Chris Mason1259ab72008-05-12 13:39:03 -04002036 if (ret) {
Chris Masond1310b22008-01-24 16:13:08 -05002037 SetPageError(page);
Chris Mason1259ab72008-05-12 13:39:03 -04002038 } else {
Chris Masond1310b22008-01-24 16:13:08 -05002039 unsigned long max_nr = end_index + 1;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002040
Chris Masond1310b22008-01-24 16:13:08 -05002041 set_range_writeback(tree, cur, cur + iosize - 1);
2042 if (!PageWriteback(page)) {
2043 printk("warning page %lu not writeback, "
2044 "cur %llu end %llu\n", page->index,
2045 (unsigned long long)cur,
2046 (unsigned long long)end);
2047 }
2048
2049 ret = submit_extent_page(WRITE, tree, page, sector,
Chris Mason7f3c74f2008-07-18 12:01:11 -04002050 iosize, pg_offset, bdev,
Chris Masond1310b22008-01-24 16:13:08 -05002051 &epd->bio, max_nr,
Chris Masonf1885912008-04-09 16:28:12 -04002052 end_bio_extent_writepage, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002053 if (ret)
2054 SetPageError(page);
2055 }
2056 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002057 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002058 nr++;
2059 }
2060done:
2061 if (nr == 0) {
2062 /* make sure the mapping tag for page dirty gets cleared */
2063 set_page_writeback(page);
2064 end_page_writeback(page);
2065 }
Chris Masone6dcd2d2008-07-17 12:53:50 -04002066 if (unlock_start <= page_end)
2067 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002068 unlock_page(page);
2069 return 0;
2070}
2071
Chris Mason5e478dc2008-04-25 09:10:45 -04002072#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
Chris Masond1310b22008-01-24 16:13:08 -05002073/* Taken directly from 2.6.23 for 2.6.18 back port */
2074typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
2075 void *data);
2076
2077/**
2078 * write_cache_pages - walk the list of dirty pages of the given address space
2079 * and write all of them.
2080 * @mapping: address space structure to write
2081 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2082 * @writepage: function called for each page
2083 * @data: data passed to writepage function
2084 *
2085 * If a page is already under I/O, write_cache_pages() skips it, even
2086 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
2087 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
2088 * and msync() need to guarantee that all the data which was dirty at the time
2089 * the call was made get new I/O started against them. If wbc->sync_mode is
2090 * WB_SYNC_ALL then we were called for data integrity and we must wait for
2091 * existing IO to complete.
2092 */
2093static int write_cache_pages(struct address_space *mapping,
2094 struct writeback_control *wbc, writepage_t writepage,
2095 void *data)
2096{
2097 struct backing_dev_info *bdi = mapping->backing_dev_info;
2098 int ret = 0;
2099 int done = 0;
2100 struct pagevec pvec;
2101 int nr_pages;
2102 pgoff_t index;
2103 pgoff_t end; /* Inclusive */
2104 int scanned = 0;
2105 int range_whole = 0;
2106
2107 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2108 wbc->encountered_congestion = 1;
2109 return 0;
2110 }
2111
2112 pagevec_init(&pvec, 0);
2113 if (wbc->range_cyclic) {
2114 index = mapping->writeback_index; /* Start from prev offset */
2115 end = -1;
2116 } else {
2117 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2118 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2119 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2120 range_whole = 1;
2121 scanned = 1;
2122 }
2123retry:
2124 while (!done && (index <= end) &&
2125 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
2126 PAGECACHE_TAG_DIRTY,
2127 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
2128 unsigned i;
2129
2130 scanned = 1;
2131 for (i = 0; i < nr_pages; i++) {
2132 struct page *page = pvec.pages[i];
2133
2134 /*
2135 * At this point we hold neither mapping->tree_lock nor
2136 * lock on the page itself: the page may be truncated or
2137 * invalidated (changing page->mapping to NULL), or even
2138 * swizzled back from swapper_space to tmpfs file
2139 * mapping
2140 */
2141 lock_page(page);
2142
2143 if (unlikely(page->mapping != mapping)) {
2144 unlock_page(page);
2145 continue;
2146 }
2147
2148 if (!wbc->range_cyclic && page->index > end) {
2149 done = 1;
2150 unlock_page(page);
2151 continue;
2152 }
2153
2154 if (wbc->sync_mode != WB_SYNC_NONE)
2155 wait_on_page_writeback(page);
2156
2157 if (PageWriteback(page) ||
2158 !clear_page_dirty_for_io(page)) {
2159 unlock_page(page);
2160 continue;
2161 }
2162
2163 ret = (*writepage)(page, wbc, data);
2164
2165 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
2166 unlock_page(page);
2167 ret = 0;
2168 }
2169 if (ret || (--(wbc->nr_to_write) <= 0))
2170 done = 1;
2171 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2172 wbc->encountered_congestion = 1;
2173 done = 1;
2174 }
2175 }
2176 pagevec_release(&pvec);
2177 cond_resched();
2178 }
2179 if (!scanned && !done) {
2180 /*
2181 * We hit the last page and there is more work to be done: wrap
2182 * back to the start of the file
2183 */
2184 scanned = 1;
2185 index = 0;
2186 goto retry;
2187 }
2188 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2189 mapping->writeback_index = index;
2190 return ret;
2191}
2192#endif
2193
2194int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2195 get_extent_t *get_extent,
2196 struct writeback_control *wbc)
2197{
2198 int ret;
2199 struct address_space *mapping = page->mapping;
2200 struct extent_page_data epd = {
2201 .bio = NULL,
2202 .tree = tree,
2203 .get_extent = get_extent,
2204 };
2205 struct writeback_control wbc_writepages = {
2206 .bdi = wbc->bdi,
2207 .sync_mode = WB_SYNC_NONE,
2208 .older_than_this = NULL,
2209 .nr_to_write = 64,
2210 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2211 .range_end = (loff_t)-1,
2212 };
2213
2214
2215 ret = __extent_writepage(page, wbc, &epd);
2216
2217 write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
2218 if (epd.bio) {
Chris Masonf1885912008-04-09 16:28:12 -04002219 submit_one_bio(WRITE, epd.bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002220 }
2221 return ret;
2222}
2223EXPORT_SYMBOL(extent_write_full_page);
2224
2225
2226int extent_writepages(struct extent_io_tree *tree,
2227 struct address_space *mapping,
2228 get_extent_t *get_extent,
2229 struct writeback_control *wbc)
2230{
2231 int ret = 0;
2232 struct extent_page_data epd = {
2233 .bio = NULL,
2234 .tree = tree,
2235 .get_extent = get_extent,
2236 };
2237
2238 ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
2239 if (epd.bio) {
Chris Masonf1885912008-04-09 16:28:12 -04002240 submit_one_bio(WRITE, epd.bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002241 }
2242 return ret;
2243}
2244EXPORT_SYMBOL(extent_writepages);
2245
2246int extent_readpages(struct extent_io_tree *tree,
2247 struct address_space *mapping,
2248 struct list_head *pages, unsigned nr_pages,
2249 get_extent_t get_extent)
2250{
2251 struct bio *bio = NULL;
2252 unsigned page_idx;
2253 struct pagevec pvec;
2254
2255 pagevec_init(&pvec, 0);
2256 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2257 struct page *page = list_entry(pages->prev, struct page, lru);
2258
2259 prefetchw(&page->flags);
2260 list_del(&page->lru);
2261 /*
2262 * what we want to do here is call add_to_page_cache_lru,
2263 * but that isn't exported, so we reproduce it here
2264 */
2265 if (!add_to_page_cache(page, mapping,
2266 page->index, GFP_KERNEL)) {
2267
2268 /* open coding of lru_cache_add, also not exported */
2269 page_cache_get(page);
2270 if (!pagevec_add(&pvec, page))
2271 __pagevec_lru_add(&pvec);
Chris Masonf1885912008-04-09 16:28:12 -04002272 __extent_read_full_page(tree, page, get_extent,
2273 &bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002274 }
2275 page_cache_release(page);
2276 }
2277 if (pagevec_count(&pvec))
2278 __pagevec_lru_add(&pvec);
2279 BUG_ON(!list_empty(pages));
2280 if (bio)
Chris Masonf1885912008-04-09 16:28:12 -04002281 submit_one_bio(READ, bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002282 return 0;
2283}
2284EXPORT_SYMBOL(extent_readpages);
2285
2286/*
2287 * basic invalidatepage code, this waits on any locked or writeback
2288 * ranges corresponding to the page, and then deletes any extent state
2289 * records from the tree
2290 */
2291int extent_invalidatepage(struct extent_io_tree *tree,
2292 struct page *page, unsigned long offset)
2293{
2294 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2295 u64 end = start + PAGE_CACHE_SIZE - 1;
2296 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
2297
2298 start += (offset + blocksize -1) & ~(blocksize - 1);
2299 if (start > end)
2300 return 0;
2301
2302 lock_extent(tree, start, end, GFP_NOFS);
2303 wait_on_extent_writeback(tree, start, end);
2304 clear_extent_bit(tree, start, end,
2305 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2306 1, 1, GFP_NOFS);
2307 return 0;
2308}
2309EXPORT_SYMBOL(extent_invalidatepage);
2310
2311/*
2312 * simple commit_write call, set_range_dirty is used to mark both
2313 * the pages and the extent records as dirty
2314 */
2315int extent_commit_write(struct extent_io_tree *tree,
2316 struct inode *inode, struct page *page,
2317 unsigned from, unsigned to)
2318{
2319 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2320
2321 set_page_extent_mapped(page);
2322 set_page_dirty(page);
2323
2324 if (pos > inode->i_size) {
2325 i_size_write(inode, pos);
2326 mark_inode_dirty(inode);
2327 }
2328 return 0;
2329}
2330EXPORT_SYMBOL(extent_commit_write);
2331
2332int extent_prepare_write(struct extent_io_tree *tree,
2333 struct inode *inode, struct page *page,
2334 unsigned from, unsigned to, get_extent_t *get_extent)
2335{
2336 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2337 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2338 u64 block_start;
2339 u64 orig_block_start;
2340 u64 block_end;
2341 u64 cur_end;
2342 struct extent_map *em;
2343 unsigned blocksize = 1 << inode->i_blkbits;
2344 size_t page_offset = 0;
2345 size_t block_off_start;
2346 size_t block_off_end;
2347 int err = 0;
2348 int iocount = 0;
2349 int ret = 0;
2350 int isnew;
2351
2352 set_page_extent_mapped(page);
2353
2354 block_start = (page_start + from) & ~((u64)blocksize - 1);
2355 block_end = (page_start + to - 1) | (blocksize - 1);
2356 orig_block_start = block_start;
2357
2358 lock_extent(tree, page_start, page_end, GFP_NOFS);
2359 while(block_start <= block_end) {
2360 em = get_extent(inode, page, page_offset, block_start,
2361 block_end - block_start + 1, 1);
2362 if (IS_ERR(em) || !em) {
2363 goto err;
2364 }
2365 cur_end = min(block_end, extent_map_end(em) - 1);
2366 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2367 block_off_end = block_off_start + blocksize;
2368 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2369
2370 if (!PageUptodate(page) && isnew &&
2371 (block_off_end > to || block_off_start < from)) {
2372 void *kaddr;
2373
2374 kaddr = kmap_atomic(page, KM_USER0);
2375 if (block_off_end > to)
2376 memset(kaddr + to, 0, block_off_end - to);
2377 if (block_off_start < from)
2378 memset(kaddr + block_off_start, 0,
2379 from - block_off_start);
2380 flush_dcache_page(page);
2381 kunmap_atomic(kaddr, KM_USER0);
2382 }
2383 if ((em->block_start != EXTENT_MAP_HOLE &&
2384 em->block_start != EXTENT_MAP_INLINE) &&
2385 !isnew && !PageUptodate(page) &&
2386 (block_off_end > to || block_off_start < from) &&
2387 !test_range_bit(tree, block_start, cur_end,
2388 EXTENT_UPTODATE, 1)) {
2389 u64 sector;
2390 u64 extent_offset = block_start - em->start;
2391 size_t iosize;
2392 sector = (em->block_start + extent_offset) >> 9;
2393 iosize = (cur_end - block_start + blocksize) &
2394 ~((u64)blocksize - 1);
2395 /*
2396 * we've already got the extent locked, but we
2397 * need to split the state such that our end_bio
2398 * handler can clear the lock.
2399 */
2400 set_extent_bit(tree, block_start,
2401 block_start + iosize - 1,
2402 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
2403 ret = submit_extent_page(READ, tree, page,
2404 sector, iosize, page_offset, em->bdev,
2405 NULL, 1,
Chris Masonf1885912008-04-09 16:28:12 -04002406 end_bio_extent_preparewrite, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002407 iocount++;
2408 block_start = block_start + iosize;
2409 } else {
2410 set_extent_uptodate(tree, block_start, cur_end,
2411 GFP_NOFS);
2412 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
2413 block_start = cur_end + 1;
2414 }
2415 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2416 free_extent_map(em);
2417 }
2418 if (iocount) {
2419 wait_extent_bit(tree, orig_block_start,
2420 block_end, EXTENT_LOCKED);
2421 }
2422 check_page_uptodate(tree, page);
2423err:
2424 /* FIXME, zero out newly allocated blocks on error */
2425 return err;
2426}
2427EXPORT_SYMBOL(extent_prepare_write);
2428
2429/*
Chris Mason7b13b7b2008-04-18 10:29:50 -04002430 * a helper for releasepage, this tests for areas of the page that
2431 * are locked or under IO and drops the related state bits if it is safe
2432 * to drop the page.
2433 */
2434int try_release_extent_state(struct extent_map_tree *map,
2435 struct extent_io_tree *tree, struct page *page,
2436 gfp_t mask)
2437{
2438 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2439 u64 end = start + PAGE_CACHE_SIZE - 1;
2440 int ret = 1;
2441
Chris Mason211f90e2008-07-18 11:56:15 -04002442 if (test_range_bit(tree, start, end,
2443 EXTENT_IOBITS | EXTENT_ORDERED, 0))
Chris Mason7b13b7b2008-04-18 10:29:50 -04002444 ret = 0;
2445 else {
2446 if ((mask & GFP_NOFS) == GFP_NOFS)
2447 mask = GFP_NOFS;
2448 clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
2449 1, 1, mask);
2450 }
2451 return ret;
2452}
2453EXPORT_SYMBOL(try_release_extent_state);
2454
2455/*
Chris Masond1310b22008-01-24 16:13:08 -05002456 * a helper for releasepage. As long as there are no locked extents
2457 * in the range corresponding to the page, both state records and extent
2458 * map records are removed
2459 */
2460int try_release_extent_mapping(struct extent_map_tree *map,
Chris Mason70dec802008-01-29 09:59:12 -05002461 struct extent_io_tree *tree, struct page *page,
2462 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05002463{
2464 struct extent_map *em;
2465 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2466 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04002467
Chris Mason70dec802008-01-29 09:59:12 -05002468 if ((mask & __GFP_WAIT) &&
2469 page->mapping->host->i_size > 16 * 1024 * 1024) {
Yan39b56372008-02-15 10:40:50 -05002470 u64 len;
Chris Mason70dec802008-01-29 09:59:12 -05002471 while (start <= end) {
Yan39b56372008-02-15 10:40:50 -05002472 len = end - start + 1;
Chris Mason70dec802008-01-29 09:59:12 -05002473 spin_lock(&map->lock);
Yan39b56372008-02-15 10:40:50 -05002474 em = lookup_extent_mapping(map, start, len);
Chris Mason70dec802008-01-29 09:59:12 -05002475 if (!em || IS_ERR(em)) {
2476 spin_unlock(&map->lock);
2477 break;
2478 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002479 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2480 em->start != start) {
Chris Mason70dec802008-01-29 09:59:12 -05002481 spin_unlock(&map->lock);
2482 free_extent_map(em);
2483 break;
2484 }
2485 if (!test_range_bit(tree, em->start,
2486 extent_map_end(em) - 1,
2487 EXTENT_LOCKED, 0)) {
2488 remove_extent_mapping(map, em);
2489 /* once for the rb tree */
2490 free_extent_map(em);
2491 }
2492 start = extent_map_end(em);
Chris Masond1310b22008-01-24 16:13:08 -05002493 spin_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05002494
2495 /* once for us */
Chris Masond1310b22008-01-24 16:13:08 -05002496 free_extent_map(em);
2497 }
Chris Masond1310b22008-01-24 16:13:08 -05002498 }
Chris Mason7b13b7b2008-04-18 10:29:50 -04002499 return try_release_extent_state(map, tree, page, mask);
Chris Masond1310b22008-01-24 16:13:08 -05002500}
2501EXPORT_SYMBOL(try_release_extent_mapping);
2502
2503sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2504 get_extent_t *get_extent)
2505{
2506 struct inode *inode = mapping->host;
2507 u64 start = iblock << inode->i_blkbits;
2508 sector_t sector = 0;
2509 struct extent_map *em;
2510
2511 em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
2512 if (!em || IS_ERR(em))
2513 return 0;
2514
2515 if (em->block_start == EXTENT_MAP_INLINE ||
2516 em->block_start == EXTENT_MAP_HOLE)
2517 goto out;
2518
2519 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
Chris Masond1310b22008-01-24 16:13:08 -05002520out:
2521 free_extent_map(em);
2522 return sector;
2523}
2524
Chris Masond1310b22008-01-24 16:13:08 -05002525static inline struct page *extent_buffer_page(struct extent_buffer *eb,
2526 unsigned long i)
2527{
2528 struct page *p;
2529 struct address_space *mapping;
2530
2531 if (i == 0)
2532 return eb->first_page;
2533 i += eb->start >> PAGE_CACHE_SHIFT;
2534 mapping = eb->first_page->mapping;
Chris Mason33958dc2008-07-30 10:29:12 -04002535 if (!mapping)
2536 return NULL;
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002537
2538 /*
2539 * extent_buffer_page is only called after pinning the page
2540 * by increasing the reference count. So we know the page must
2541 * be in the radix tree.
2542 */
2543#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2544 rcu_read_lock();
2545#else
Chris Masond1310b22008-01-24 16:13:08 -05002546 read_lock_irq(&mapping->tree_lock);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002547#endif
Chris Masond1310b22008-01-24 16:13:08 -05002548 p = radix_tree_lookup(&mapping->page_tree, i);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002549
2550#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2551 rcu_read_unlock();
2552#else
Chris Masond1310b22008-01-24 16:13:08 -05002553 read_unlock_irq(&mapping->tree_lock);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002554#endif
Chris Masond1310b22008-01-24 16:13:08 -05002555 return p;
2556}
2557
Chris Mason6af118ce2008-07-22 11:18:07 -04002558static inline unsigned long num_extent_pages(u64 start, u64 len)
Chris Masonce9adaa2008-04-09 16:28:12 -04002559{
Chris Mason6af118ce2008-07-22 11:18:07 -04002560 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
2561 (start >> PAGE_CACHE_SHIFT);
Chris Mason728131d2008-04-09 16:28:12 -04002562}
2563
Chris Masond1310b22008-01-24 16:13:08 -05002564static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2565 u64 start,
2566 unsigned long len,
2567 gfp_t mask)
2568{
2569 struct extent_buffer *eb = NULL;
Chris Mason2d2ae542008-03-26 16:24:23 -04002570 unsigned long flags;
Chris Masond1310b22008-01-24 16:13:08 -05002571
Chris Masond1310b22008-01-24 16:13:08 -05002572 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
Chris Masond1310b22008-01-24 16:13:08 -05002573 eb->start = start;
2574 eb->len = len;
Chris Masona61e6f22008-07-22 11:18:08 -04002575 mutex_init(&eb->mutex);
Chris Mason2d2ae542008-03-26 16:24:23 -04002576 spin_lock_irqsave(&leak_lock, flags);
2577 list_add(&eb->leak_list, &buffers);
2578 spin_unlock_irqrestore(&leak_lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -05002579 atomic_set(&eb->refs, 1);
2580
2581 return eb;
2582}
2583
2584static void __free_extent_buffer(struct extent_buffer *eb)
2585{
Chris Mason2d2ae542008-03-26 16:24:23 -04002586 unsigned long flags;
2587 spin_lock_irqsave(&leak_lock, flags);
2588 list_del(&eb->leak_list);
2589 spin_unlock_irqrestore(&leak_lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -05002590 kmem_cache_free(extent_buffer_cache, eb);
2591}
2592
2593struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
2594 u64 start, unsigned long len,
2595 struct page *page0,
2596 gfp_t mask)
2597{
2598 unsigned long num_pages = num_extent_pages(start, len);
2599 unsigned long i;
2600 unsigned long index = start >> PAGE_CACHE_SHIFT;
2601 struct extent_buffer *eb;
Chris Mason6af118ce2008-07-22 11:18:07 -04002602 struct extent_buffer *exists = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05002603 struct page *p;
2604 struct address_space *mapping = tree->mapping;
2605 int uptodate = 1;
2606
Chris Mason6af118ce2008-07-22 11:18:07 -04002607 spin_lock(&tree->buffer_lock);
2608 eb = buffer_search(tree, start);
2609 if (eb) {
2610 atomic_inc(&eb->refs);
2611 spin_unlock(&tree->buffer_lock);
2612 return eb;
2613 }
2614 spin_unlock(&tree->buffer_lock);
2615
Chris Masond1310b22008-01-24 16:13:08 -05002616 eb = __alloc_extent_buffer(tree, start, len, mask);
Peter2b114d12008-04-01 11:21:40 -04002617 if (!eb)
Chris Masond1310b22008-01-24 16:13:08 -05002618 return NULL;
2619
Chris Masond1310b22008-01-24 16:13:08 -05002620 if (page0) {
2621 eb->first_page = page0;
2622 i = 1;
2623 index++;
2624 page_cache_get(page0);
2625 mark_page_accessed(page0);
2626 set_page_extent_mapped(page0);
Chris Masond1310b22008-01-24 16:13:08 -05002627 set_page_extent_head(page0, len);
Chris Masonf1885912008-04-09 16:28:12 -04002628 uptodate = PageUptodate(page0);
Chris Masond1310b22008-01-24 16:13:08 -05002629 } else {
2630 i = 0;
2631 }
2632 for (; i < num_pages; i++, index++) {
2633 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
2634 if (!p) {
2635 WARN_ON(1);
Chris Mason6af118ce2008-07-22 11:18:07 -04002636 goto free_eb;
Chris Masond1310b22008-01-24 16:13:08 -05002637 }
2638 set_page_extent_mapped(p);
2639 mark_page_accessed(p);
2640 if (i == 0) {
2641 eb->first_page = p;
2642 set_page_extent_head(p, len);
2643 } else {
2644 set_page_private(p, EXTENT_PAGE_PRIVATE);
2645 }
2646 if (!PageUptodate(p))
2647 uptodate = 0;
2648 unlock_page(p);
2649 }
2650 if (uptodate)
2651 eb->flags |= EXTENT_UPTODATE;
2652 eb->flags |= EXTENT_BUFFER_FILLED;
2653
Chris Mason6af118ce2008-07-22 11:18:07 -04002654 spin_lock(&tree->buffer_lock);
2655 exists = buffer_tree_insert(tree, start, &eb->rb_node);
2656 if (exists) {
2657 /* add one reference for the caller */
2658 atomic_inc(&exists->refs);
2659 spin_unlock(&tree->buffer_lock);
2660 goto free_eb;
2661 }
2662 spin_unlock(&tree->buffer_lock);
2663
2664 /* add one reference for the tree */
2665 atomic_inc(&eb->refs);
Chris Masond1310b22008-01-24 16:13:08 -05002666 return eb;
2667
Chris Mason6af118ce2008-07-22 11:18:07 -04002668free_eb:
Chris Masond1310b22008-01-24 16:13:08 -05002669 if (!atomic_dec_and_test(&eb->refs))
Chris Mason6af118ce2008-07-22 11:18:07 -04002670 return exists;
2671 for (index = 1; index < i; index++)
Chris Masond1310b22008-01-24 16:13:08 -05002672 page_cache_release(extent_buffer_page(eb, index));
Chris Mason6af118ce2008-07-22 11:18:07 -04002673 page_cache_release(extent_buffer_page(eb, 0));
Chris Masond1310b22008-01-24 16:13:08 -05002674 __free_extent_buffer(eb);
Chris Mason6af118ce2008-07-22 11:18:07 -04002675 return exists;
Chris Masond1310b22008-01-24 16:13:08 -05002676}
2677EXPORT_SYMBOL(alloc_extent_buffer);
2678
2679struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
2680 u64 start, unsigned long len,
2681 gfp_t mask)
2682{
Chris Masond1310b22008-01-24 16:13:08 -05002683 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -05002684
Chris Mason6af118ce2008-07-22 11:18:07 -04002685 spin_lock(&tree->buffer_lock);
2686 eb = buffer_search(tree, start);
2687 if (eb)
2688 atomic_inc(&eb->refs);
2689 spin_unlock(&tree->buffer_lock);
Chris Masond1310b22008-01-24 16:13:08 -05002690
Chris Masond1310b22008-01-24 16:13:08 -05002691 return eb;
Chris Masond1310b22008-01-24 16:13:08 -05002692}
2693EXPORT_SYMBOL(find_extent_buffer);
2694
2695void free_extent_buffer(struct extent_buffer *eb)
2696{
Chris Masond1310b22008-01-24 16:13:08 -05002697 if (!eb)
2698 return;
2699
2700 if (!atomic_dec_and_test(&eb->refs))
2701 return;
2702
Chris Mason6af118ce2008-07-22 11:18:07 -04002703 WARN_ON(1);
Chris Masond1310b22008-01-24 16:13:08 -05002704}
2705EXPORT_SYMBOL(free_extent_buffer);
2706
2707int clear_extent_buffer_dirty(struct extent_io_tree *tree,
2708 struct extent_buffer *eb)
2709{
2710 int set;
2711 unsigned long i;
2712 unsigned long num_pages;
2713 struct page *page;
2714
2715 u64 start = eb->start;
2716 u64 end = start + eb->len - 1;
2717
2718 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
2719 num_pages = num_extent_pages(eb->start, eb->len);
2720
2721 for (i = 0; i < num_pages; i++) {
2722 page = extent_buffer_page(eb, i);
Chris Masona61e6f22008-07-22 11:18:08 -04002723 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002724 if (i == 0)
2725 set_page_extent_head(page, eb->len);
2726 else
2727 set_page_private(page, EXTENT_PAGE_PRIVATE);
2728
2729 /*
2730 * if we're on the last page or the first page and the
2731 * block isn't aligned on a page boundary, do extra checks
2732 * to make sure we don't clean page that is partially dirty
2733 */
2734 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2735 ((i == num_pages - 1) &&
2736 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2737 start = (u64)page->index << PAGE_CACHE_SHIFT;
2738 end = start + PAGE_CACHE_SIZE - 1;
2739 if (test_range_bit(tree, start, end,
2740 EXTENT_DIRTY, 0)) {
Chris Masona61e6f22008-07-22 11:18:08 -04002741 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002742 continue;
2743 }
2744 }
2745 clear_page_dirty_for_io(page);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002746#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2747 spin_lock_irq(&page->mapping->tree_lock);
2748#else
Chris Mason70dec802008-01-29 09:59:12 -05002749 read_lock_irq(&page->mapping->tree_lock);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002750#endif
Chris Masond1310b22008-01-24 16:13:08 -05002751 if (!PageDirty(page)) {
2752 radix_tree_tag_clear(&page->mapping->page_tree,
2753 page_index(page),
2754 PAGECACHE_TAG_DIRTY);
2755 }
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002756#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2757 spin_unlock_irq(&page->mapping->tree_lock);
2758#else
Chris Mason70dec802008-01-29 09:59:12 -05002759 read_unlock_irq(&page->mapping->tree_lock);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002760#endif
Chris Masona61e6f22008-07-22 11:18:08 -04002761 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002762 }
2763 return 0;
2764}
2765EXPORT_SYMBOL(clear_extent_buffer_dirty);
2766
2767int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
2768 struct extent_buffer *eb)
2769{
2770 return wait_on_extent_writeback(tree, eb->start,
2771 eb->start + eb->len - 1);
2772}
2773EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
2774
2775int set_extent_buffer_dirty(struct extent_io_tree *tree,
2776 struct extent_buffer *eb)
2777{
2778 unsigned long i;
2779 unsigned long num_pages;
2780
2781 num_pages = num_extent_pages(eb->start, eb->len);
2782 for (i = 0; i < num_pages; i++) {
2783 struct page *page = extent_buffer_page(eb, i);
2784 /* writepage may need to do something special for the
2785 * first page, we have to make sure page->private is
2786 * properly set. releasepage may drop page->private
2787 * on us if the page isn't already dirty.
2788 */
Chris Masona1b32a52008-09-05 16:09:51 -04002789 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002790 if (i == 0) {
Chris Masond1310b22008-01-24 16:13:08 -05002791 set_page_extent_head(page, eb->len);
2792 } else if (PagePrivate(page) &&
2793 page->private != EXTENT_PAGE_PRIVATE) {
Chris Masond1310b22008-01-24 16:13:08 -05002794 set_page_extent_mapped(page);
Chris Masond1310b22008-01-24 16:13:08 -05002795 }
2796 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
Chris Masona1b32a52008-09-05 16:09:51 -04002797 set_extent_dirty(tree, page_offset(page),
2798 page_offset(page) + PAGE_CACHE_SIZE -1,
2799 GFP_NOFS);
2800 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002801 }
Chris Masona1b32a52008-09-05 16:09:51 -04002802 return 0;
Chris Masond1310b22008-01-24 16:13:08 -05002803}
2804EXPORT_SYMBOL(set_extent_buffer_dirty);
2805
Chris Mason1259ab72008-05-12 13:39:03 -04002806int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
2807 struct extent_buffer *eb)
2808{
2809 unsigned long i;
2810 struct page *page;
2811 unsigned long num_pages;
2812
2813 num_pages = num_extent_pages(eb->start, eb->len);
2814 eb->flags &= ~EXTENT_UPTODATE;
2815
2816 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2817 GFP_NOFS);
2818 for (i = 0; i < num_pages; i++) {
2819 page = extent_buffer_page(eb, i);
Chris Mason33958dc2008-07-30 10:29:12 -04002820 if (page)
2821 ClearPageUptodate(page);
Chris Mason1259ab72008-05-12 13:39:03 -04002822 }
2823 return 0;
2824}
2825
Chris Masond1310b22008-01-24 16:13:08 -05002826int set_extent_buffer_uptodate(struct extent_io_tree *tree,
2827 struct extent_buffer *eb)
2828{
2829 unsigned long i;
2830 struct page *page;
2831 unsigned long num_pages;
2832
2833 num_pages = num_extent_pages(eb->start, eb->len);
2834
2835 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2836 GFP_NOFS);
2837 for (i = 0; i < num_pages; i++) {
2838 page = extent_buffer_page(eb, i);
2839 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2840 ((i == num_pages - 1) &&
2841 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2842 check_page_uptodate(tree, page);
2843 continue;
2844 }
2845 SetPageUptodate(page);
2846 }
2847 return 0;
2848}
2849EXPORT_SYMBOL(set_extent_buffer_uptodate);
2850
Chris Masonce9adaa2008-04-09 16:28:12 -04002851int extent_range_uptodate(struct extent_io_tree *tree,
2852 u64 start, u64 end)
2853{
2854 struct page *page;
2855 int ret;
2856 int pg_uptodate = 1;
2857 int uptodate;
2858 unsigned long index;
2859
2860 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
2861 if (ret)
2862 return 1;
2863 while(start <= end) {
2864 index = start >> PAGE_CACHE_SHIFT;
2865 page = find_get_page(tree->mapping, index);
2866 uptodate = PageUptodate(page);
2867 page_cache_release(page);
2868 if (!uptodate) {
2869 pg_uptodate = 0;
2870 break;
2871 }
2872 start += PAGE_CACHE_SIZE;
2873 }
2874 return pg_uptodate;
2875}
2876
Chris Masond1310b22008-01-24 16:13:08 -05002877int extent_buffer_uptodate(struct extent_io_tree *tree,
Chris Masonce9adaa2008-04-09 16:28:12 -04002878 struct extent_buffer *eb)
Chris Masond1310b22008-01-24 16:13:08 -05002879{
Chris Mason728131d2008-04-09 16:28:12 -04002880 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04002881 unsigned long num_pages;
2882 unsigned long i;
Chris Mason728131d2008-04-09 16:28:12 -04002883 struct page *page;
2884 int pg_uptodate = 1;
2885
Chris Masond1310b22008-01-24 16:13:08 -05002886 if (eb->flags & EXTENT_UPTODATE)
Chris Mason42352982008-04-28 16:40:52 -04002887 return 1;
Chris Mason728131d2008-04-09 16:28:12 -04002888
Chris Mason42352982008-04-28 16:40:52 -04002889 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
Chris Masond1310b22008-01-24 16:13:08 -05002890 EXTENT_UPTODATE, 1);
Chris Mason42352982008-04-28 16:40:52 -04002891 if (ret)
2892 return ret;
Chris Mason728131d2008-04-09 16:28:12 -04002893
2894 num_pages = num_extent_pages(eb->start, eb->len);
2895 for (i = 0; i < num_pages; i++) {
2896 page = extent_buffer_page(eb, i);
2897 if (!PageUptodate(page)) {
2898 pg_uptodate = 0;
2899 break;
2900 }
2901 }
Chris Mason42352982008-04-28 16:40:52 -04002902 return pg_uptodate;
Chris Masond1310b22008-01-24 16:13:08 -05002903}
2904EXPORT_SYMBOL(extent_buffer_uptodate);
2905
2906int read_extent_buffer_pages(struct extent_io_tree *tree,
2907 struct extent_buffer *eb,
Chris Masona86c12c2008-02-07 10:50:54 -05002908 u64 start, int wait,
Chris Masonf1885912008-04-09 16:28:12 -04002909 get_extent_t *get_extent, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05002910{
2911 unsigned long i;
2912 unsigned long start_i;
2913 struct page *page;
2914 int err;
2915 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04002916 int locked_pages = 0;
2917 int all_uptodate = 1;
2918 int inc_all_pages = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002919 unsigned long num_pages;
Chris Masona86c12c2008-02-07 10:50:54 -05002920 struct bio *bio = NULL;
2921
Chris Masond1310b22008-01-24 16:13:08 -05002922 if (eb->flags & EXTENT_UPTODATE)
2923 return 0;
2924
Chris Masonce9adaa2008-04-09 16:28:12 -04002925 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
Chris Masond1310b22008-01-24 16:13:08 -05002926 EXTENT_UPTODATE, 1)) {
2927 return 0;
2928 }
2929
2930 if (start) {
2931 WARN_ON(start < eb->start);
2932 start_i = (start >> PAGE_CACHE_SHIFT) -
2933 (eb->start >> PAGE_CACHE_SHIFT);
2934 } else {
2935 start_i = 0;
2936 }
2937
2938 num_pages = num_extent_pages(eb->start, eb->len);
2939 for (i = start_i; i < num_pages; i++) {
2940 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05002941 if (!wait) {
David Woodhouse2db04962008-08-07 11:19:43 -04002942 if (!trylock_page(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04002943 goto unlock_exit;
Chris Masond1310b22008-01-24 16:13:08 -05002944 } else {
2945 lock_page(page);
2946 }
Chris Masonce9adaa2008-04-09 16:28:12 -04002947 locked_pages++;
Chris Masond1310b22008-01-24 16:13:08 -05002948 if (!PageUptodate(page)) {
Chris Masonce9adaa2008-04-09 16:28:12 -04002949 all_uptodate = 0;
2950 }
2951 }
2952 if (all_uptodate) {
2953 if (start_i == 0)
2954 eb->flags |= EXTENT_UPTODATE;
Chris Masona1b32a52008-09-05 16:09:51 -04002955 if (ret) {
2956 printk("all up to date but ret is %d\n", ret);
2957 }
Chris Masonce9adaa2008-04-09 16:28:12 -04002958 goto unlock_exit;
2959 }
2960
2961 for (i = start_i; i < num_pages; i++) {
2962 page = extent_buffer_page(eb, i);
2963 if (inc_all_pages)
2964 page_cache_get(page);
2965 if (!PageUptodate(page)) {
2966 if (start_i == 0)
2967 inc_all_pages = 1;
Chris Masonf1885912008-04-09 16:28:12 -04002968 ClearPageError(page);
Chris Masona86c12c2008-02-07 10:50:54 -05002969 err = __extent_read_full_page(tree, page,
Chris Masonf1885912008-04-09 16:28:12 -04002970 get_extent, &bio,
2971 mirror_num);
Chris Masond1310b22008-01-24 16:13:08 -05002972 if (err) {
2973 ret = err;
Chris Masona1b32a52008-09-05 16:09:51 -04002974 printk("err %d from __extent_read_full_page\n", ret);
Chris Masond1310b22008-01-24 16:13:08 -05002975 }
2976 } else {
2977 unlock_page(page);
2978 }
2979 }
2980
Chris Masona86c12c2008-02-07 10:50:54 -05002981 if (bio)
Chris Masonf1885912008-04-09 16:28:12 -04002982 submit_one_bio(READ, bio, mirror_num);
Chris Masona86c12c2008-02-07 10:50:54 -05002983
Chris Masond1310b22008-01-24 16:13:08 -05002984 if (ret || !wait) {
Chris Masona1b32a52008-09-05 16:09:51 -04002985 if (ret)
2986 printk("ret %d wait %d returning\n", ret, wait);
Chris Masond1310b22008-01-24 16:13:08 -05002987 return ret;
2988 }
Chris Masond1310b22008-01-24 16:13:08 -05002989 for (i = start_i; i < num_pages; i++) {
2990 page = extent_buffer_page(eb, i);
2991 wait_on_page_locked(page);
2992 if (!PageUptodate(page)) {
Chris Masona1b32a52008-09-05 16:09:51 -04002993 printk("page not uptodate after wait_on_page_locked\n");
Chris Masond1310b22008-01-24 16:13:08 -05002994 ret = -EIO;
2995 }
2996 }
2997 if (!ret)
2998 eb->flags |= EXTENT_UPTODATE;
2999 return ret;
Chris Masonce9adaa2008-04-09 16:28:12 -04003000
3001unlock_exit:
3002 i = start_i;
3003 while(locked_pages > 0) {
3004 page = extent_buffer_page(eb, i);
3005 i++;
3006 unlock_page(page);
3007 locked_pages--;
3008 }
3009 return ret;
Chris Masond1310b22008-01-24 16:13:08 -05003010}
3011EXPORT_SYMBOL(read_extent_buffer_pages);
3012
3013void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3014 unsigned long start,
3015 unsigned long len)
3016{
3017 size_t cur;
3018 size_t offset;
3019 struct page *page;
3020 char *kaddr;
3021 char *dst = (char *)dstv;
3022 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3023 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05003024
3025 WARN_ON(start > eb->len);
3026 WARN_ON(start + len > eb->start + eb->len);
3027
3028 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3029
3030 while(len > 0) {
3031 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003032
3033 cur = min(len, (PAGE_CACHE_SIZE - offset));
3034 kaddr = kmap_atomic(page, KM_USER1);
3035 memcpy(dst, kaddr + offset, cur);
3036 kunmap_atomic(kaddr, KM_USER1);
3037
3038 dst += cur;
3039 len -= cur;
3040 offset = 0;
3041 i++;
3042 }
3043}
3044EXPORT_SYMBOL(read_extent_buffer);
3045
3046int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3047 unsigned long min_len, char **token, char **map,
3048 unsigned long *map_start,
3049 unsigned long *map_len, int km)
3050{
3051 size_t offset = start & (PAGE_CACHE_SIZE - 1);
3052 char *kaddr;
3053 struct page *p;
3054 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3055 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3056 unsigned long end_i = (start_offset + start + min_len - 1) >>
3057 PAGE_CACHE_SHIFT;
3058
3059 if (i != end_i)
3060 return -EINVAL;
3061
3062 if (i == 0) {
3063 offset = start_offset;
3064 *map_start = 0;
3065 } else {
3066 offset = 0;
3067 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
3068 }
3069 if (start + min_len > eb->len) {
3070printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
3071 WARN_ON(1);
3072 }
3073
3074 p = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003075 kaddr = kmap_atomic(p, km);
3076 *token = kaddr;
3077 *map = kaddr + offset;
3078 *map_len = PAGE_CACHE_SIZE - offset;
3079 return 0;
3080}
3081EXPORT_SYMBOL(map_private_extent_buffer);
3082
3083int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3084 unsigned long min_len,
3085 char **token, char **map,
3086 unsigned long *map_start,
3087 unsigned long *map_len, int km)
3088{
3089 int err;
3090 int save = 0;
3091 if (eb->map_token) {
3092 unmap_extent_buffer(eb, eb->map_token, km);
3093 eb->map_token = NULL;
3094 save = 1;
3095 }
3096 err = map_private_extent_buffer(eb, start, min_len, token, map,
3097 map_start, map_len, km);
3098 if (!err && save) {
3099 eb->map_token = *token;
3100 eb->kaddr = *map;
3101 eb->map_start = *map_start;
3102 eb->map_len = *map_len;
3103 }
3104 return err;
3105}
3106EXPORT_SYMBOL(map_extent_buffer);
3107
3108void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
3109{
3110 kunmap_atomic(token, km);
3111}
3112EXPORT_SYMBOL(unmap_extent_buffer);
3113
3114int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3115 unsigned long start,
3116 unsigned long len)
3117{
3118 size_t cur;
3119 size_t offset;
3120 struct page *page;
3121 char *kaddr;
3122 char *ptr = (char *)ptrv;
3123 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3124 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3125 int ret = 0;
3126
3127 WARN_ON(start > eb->len);
3128 WARN_ON(start + len > eb->start + eb->len);
3129
3130 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3131
3132 while(len > 0) {
3133 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003134
3135 cur = min(len, (PAGE_CACHE_SIZE - offset));
3136
3137 kaddr = kmap_atomic(page, KM_USER0);
3138 ret = memcmp(ptr, kaddr + offset, cur);
3139 kunmap_atomic(kaddr, KM_USER0);
3140 if (ret)
3141 break;
3142
3143 ptr += cur;
3144 len -= cur;
3145 offset = 0;
3146 i++;
3147 }
3148 return ret;
3149}
3150EXPORT_SYMBOL(memcmp_extent_buffer);
3151
3152void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
3153 unsigned long start, unsigned long len)
3154{
3155 size_t cur;
3156 size_t offset;
3157 struct page *page;
3158 char *kaddr;
3159 char *src = (char *)srcv;
3160 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3161 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3162
3163 WARN_ON(start > eb->len);
3164 WARN_ON(start + len > eb->start + eb->len);
3165
3166 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3167
3168 while(len > 0) {
3169 page = extent_buffer_page(eb, i);
3170 WARN_ON(!PageUptodate(page));
3171
3172 cur = min(len, PAGE_CACHE_SIZE - offset);
3173 kaddr = kmap_atomic(page, KM_USER1);
3174 memcpy(kaddr + offset, src, cur);
3175 kunmap_atomic(kaddr, KM_USER1);
3176
3177 src += cur;
3178 len -= cur;
3179 offset = 0;
3180 i++;
3181 }
3182}
3183EXPORT_SYMBOL(write_extent_buffer);
3184
3185void memset_extent_buffer(struct extent_buffer *eb, char c,
3186 unsigned long start, unsigned long len)
3187{
3188 size_t cur;
3189 size_t offset;
3190 struct page *page;
3191 char *kaddr;
3192 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3193 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3194
3195 WARN_ON(start > eb->len);
3196 WARN_ON(start + len > eb->start + eb->len);
3197
3198 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3199
3200 while(len > 0) {
3201 page = extent_buffer_page(eb, i);
3202 WARN_ON(!PageUptodate(page));
3203
3204 cur = min(len, PAGE_CACHE_SIZE - offset);
3205 kaddr = kmap_atomic(page, KM_USER0);
3206 memset(kaddr + offset, c, cur);
3207 kunmap_atomic(kaddr, KM_USER0);
3208
3209 len -= cur;
3210 offset = 0;
3211 i++;
3212 }
3213}
3214EXPORT_SYMBOL(memset_extent_buffer);
3215
3216void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
3217 unsigned long dst_offset, unsigned long src_offset,
3218 unsigned long len)
3219{
3220 u64 dst_len = dst->len;
3221 size_t cur;
3222 size_t offset;
3223 struct page *page;
3224 char *kaddr;
3225 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3226 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3227
3228 WARN_ON(src->len != dst_len);
3229
3230 offset = (start_offset + dst_offset) &
3231 ((unsigned long)PAGE_CACHE_SIZE - 1);
3232
3233 while(len > 0) {
3234 page = extent_buffer_page(dst, i);
3235 WARN_ON(!PageUptodate(page));
3236
3237 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
3238
3239 kaddr = kmap_atomic(page, KM_USER0);
3240 read_extent_buffer(src, kaddr + offset, src_offset, cur);
3241 kunmap_atomic(kaddr, KM_USER0);
3242
3243 src_offset += cur;
3244 len -= cur;
3245 offset = 0;
3246 i++;
3247 }
3248}
3249EXPORT_SYMBOL(copy_extent_buffer);
3250
3251static void move_pages(struct page *dst_page, struct page *src_page,
3252 unsigned long dst_off, unsigned long src_off,
3253 unsigned long len)
3254{
3255 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3256 if (dst_page == src_page) {
3257 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
3258 } else {
3259 char *src_kaddr = kmap_atomic(src_page, KM_USER1);
3260 char *p = dst_kaddr + dst_off + len;
3261 char *s = src_kaddr + src_off + len;
3262
3263 while (len--)
3264 *--p = *--s;
3265
3266 kunmap_atomic(src_kaddr, KM_USER1);
3267 }
3268 kunmap_atomic(dst_kaddr, KM_USER0);
3269}
3270
3271static void copy_pages(struct page *dst_page, struct page *src_page,
3272 unsigned long dst_off, unsigned long src_off,
3273 unsigned long len)
3274{
3275 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3276 char *src_kaddr;
3277
3278 if (dst_page != src_page)
3279 src_kaddr = kmap_atomic(src_page, KM_USER1);
3280 else
3281 src_kaddr = dst_kaddr;
3282
3283 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3284 kunmap_atomic(dst_kaddr, KM_USER0);
3285 if (dst_page != src_page)
3286 kunmap_atomic(src_kaddr, KM_USER1);
3287}
3288
3289void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3290 unsigned long src_offset, unsigned long len)
3291{
3292 size_t cur;
3293 size_t dst_off_in_page;
3294 size_t src_off_in_page;
3295 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3296 unsigned long dst_i;
3297 unsigned long src_i;
3298
3299 if (src_offset + len > dst->len) {
3300 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
3301 src_offset, len, dst->len);
3302 BUG_ON(1);
3303 }
3304 if (dst_offset + len > dst->len) {
3305 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
3306 dst_offset, len, dst->len);
3307 BUG_ON(1);
3308 }
3309
3310 while(len > 0) {
3311 dst_off_in_page = (start_offset + dst_offset) &
3312 ((unsigned long)PAGE_CACHE_SIZE - 1);
3313 src_off_in_page = (start_offset + src_offset) &
3314 ((unsigned long)PAGE_CACHE_SIZE - 1);
3315
3316 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3317 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
3318
3319 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
3320 src_off_in_page));
3321 cur = min_t(unsigned long, cur,
3322 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
3323
3324 copy_pages(extent_buffer_page(dst, dst_i),
3325 extent_buffer_page(dst, src_i),
3326 dst_off_in_page, src_off_in_page, cur);
3327
3328 src_offset += cur;
3329 dst_offset += cur;
3330 len -= cur;
3331 }
3332}
3333EXPORT_SYMBOL(memcpy_extent_buffer);
3334
3335void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3336 unsigned long src_offset, unsigned long len)
3337{
3338 size_t cur;
3339 size_t dst_off_in_page;
3340 size_t src_off_in_page;
3341 unsigned long dst_end = dst_offset + len - 1;
3342 unsigned long src_end = src_offset + len - 1;
3343 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3344 unsigned long dst_i;
3345 unsigned long src_i;
3346
3347 if (src_offset + len > dst->len) {
3348 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
3349 src_offset, len, dst->len);
3350 BUG_ON(1);
3351 }
3352 if (dst_offset + len > dst->len) {
3353 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
3354 dst_offset, len, dst->len);
3355 BUG_ON(1);
3356 }
3357 if (dst_offset < src_offset) {
3358 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3359 return;
3360 }
3361 while(len > 0) {
3362 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
3363 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
3364
3365 dst_off_in_page = (start_offset + dst_end) &
3366 ((unsigned long)PAGE_CACHE_SIZE - 1);
3367 src_off_in_page = (start_offset + src_end) &
3368 ((unsigned long)PAGE_CACHE_SIZE - 1);
3369
3370 cur = min_t(unsigned long, len, src_off_in_page + 1);
3371 cur = min(cur, dst_off_in_page + 1);
3372 move_pages(extent_buffer_page(dst, dst_i),
3373 extent_buffer_page(dst, src_i),
3374 dst_off_in_page - cur + 1,
3375 src_off_in_page - cur + 1, cur);
3376
3377 dst_end -= cur;
3378 src_end -= cur;
3379 len -= cur;
3380 }
3381}
3382EXPORT_SYMBOL(memmove_extent_buffer);
Chris Mason6af118ce2008-07-22 11:18:07 -04003383
3384int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3385{
3386 u64 start = page_offset(page);
3387 struct extent_buffer *eb;
3388 int ret = 1;
3389 unsigned long i;
3390 unsigned long num_pages;
3391
3392 spin_lock(&tree->buffer_lock);
3393 eb = buffer_search(tree, start);
3394 if (!eb)
3395 goto out;
3396
3397 if (atomic_read(&eb->refs) > 1) {
3398 ret = 0;
3399 goto out;
3400 }
3401 /* at this point we can safely release the extent buffer */
3402 num_pages = num_extent_pages(eb->start, eb->len);
Christoph Hellwigb2141072008-09-05 16:43:31 -04003403 for (i = 0; i < num_pages; i++)
3404 page_cache_release(extent_buffer_page(eb, i));
Chris Mason6af118ce2008-07-22 11:18:07 -04003405 rb_erase(&eb->rb_node, &tree->buffer);
3406 __free_extent_buffer(eb);
3407out:
3408 spin_unlock(&tree->buffer_lock);
3409 return ret;
3410}
3411EXPORT_SYMBOL(try_release_extent_buffer);