blob: 25d303646da707af77b924b394e190d4d3653ac3 [file] [log] [blame]
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +09001/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +09002 * fs/f2fs/node.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/mpage.h>
14#include <linux/backing-dev.h>
15#include <linux/blkdev.h>
16#include <linux/pagevec.h>
17#include <linux/swap.h>
18
19#include "f2fs.h"
20#include "node.h"
21#include "segment.h"
22
23static struct kmem_cache *nat_entry_slab;
24static struct kmem_cache *free_nid_slab;
25
26static void clear_node_page_dirty(struct page *page)
27{
28 struct address_space *mapping = page->mapping;
29 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
30 unsigned int long flags;
31
32 if (PageDirty(page)) {
33 spin_lock_irqsave(&mapping->tree_lock, flags);
34 radix_tree_tag_clear(&mapping->page_tree,
35 page_index(page),
36 PAGECACHE_TAG_DIRTY);
37 spin_unlock_irqrestore(&mapping->tree_lock, flags);
38
39 clear_page_dirty_for_io(page);
40 dec_page_count(sbi, F2FS_DIRTY_NODES);
41 }
42 ClearPageUptodate(page);
43}
44
45static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
46{
47 pgoff_t index = current_nat_addr(sbi, nid);
48 return get_meta_page(sbi, index);
49}
50
51static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
52{
53 struct page *src_page;
54 struct page *dst_page;
55 pgoff_t src_off;
56 pgoff_t dst_off;
57 void *src_addr;
58 void *dst_addr;
59 struct f2fs_nm_info *nm_i = NM_I(sbi);
60
61 src_off = current_nat_addr(sbi, nid);
62 dst_off = next_nat_addr(sbi, src_off);
63
64 /* get current nat block page with lock */
65 src_page = get_meta_page(sbi, src_off);
66
67 /* Dirty src_page means that it is already the new target NAT page. */
68 if (PageDirty(src_page))
69 return src_page;
70
71 dst_page = grab_meta_page(sbi, dst_off);
72
73 src_addr = page_address(src_page);
74 dst_addr = page_address(dst_page);
75 memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
76 set_page_dirty(dst_page);
77 f2fs_put_page(src_page, 1);
78
79 set_to_next_nat(nm_i, nid);
80
81 return dst_page;
82}
83
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +090084/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +090085 * Readahead NAT pages
86 */
87static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid)
88{
89 struct address_space *mapping = sbi->meta_inode->i_mapping;
90 struct f2fs_nm_info *nm_i = NM_I(sbi);
91 struct page *page;
92 pgoff_t index;
93 int i;
94
95 for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) {
96 if (nid >= nm_i->max_nid)
97 nid = 0;
98 index = current_nat_addr(sbi, nid);
99
100 page = grab_cache_page(mapping, index);
101 if (!page)
102 continue;
103 if (f2fs_readpage(sbi, page, index, READ)) {
104 f2fs_put_page(page, 1);
105 continue;
106 }
107 page_cache_release(page);
108 }
109}
110
111static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
112{
113 return radix_tree_lookup(&nm_i->nat_root, n);
114}
115
116static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
117 nid_t start, unsigned int nr, struct nat_entry **ep)
118{
119 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
120}
121
122static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
123{
124 list_del(&e->list);
125 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
126 nm_i->nat_cnt--;
127 kmem_cache_free(nat_entry_slab, e);
128}
129
130int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
131{
132 struct f2fs_nm_info *nm_i = NM_I(sbi);
133 struct nat_entry *e;
134 int is_cp = 1;
135
136 read_lock(&nm_i->nat_tree_lock);
137 e = __lookup_nat_cache(nm_i, nid);
138 if (e && !e->checkpointed)
139 is_cp = 0;
140 read_unlock(&nm_i->nat_tree_lock);
141 return is_cp;
142}
143
144static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
145{
146 struct nat_entry *new;
147
148 new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
149 if (!new)
150 return NULL;
151 if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
152 kmem_cache_free(nat_entry_slab, new);
153 return NULL;
154 }
155 memset(new, 0, sizeof(struct nat_entry));
156 nat_set_nid(new, nid);
157 list_add_tail(&new->list, &nm_i->nat_entries);
158 nm_i->nat_cnt++;
159 return new;
160}
161
162static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
163 struct f2fs_nat_entry *ne)
164{
165 struct nat_entry *e;
166retry:
167 write_lock(&nm_i->nat_tree_lock);
168 e = __lookup_nat_cache(nm_i, nid);
169 if (!e) {
170 e = grab_nat_entry(nm_i, nid);
171 if (!e) {
172 write_unlock(&nm_i->nat_tree_lock);
173 goto retry;
174 }
175 nat_set_blkaddr(e, le32_to_cpu(ne->block_addr));
176 nat_set_ino(e, le32_to_cpu(ne->ino));
177 nat_set_version(e, ne->version);
178 e->checkpointed = true;
179 }
180 write_unlock(&nm_i->nat_tree_lock);
181}
182
183static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
184 block_t new_blkaddr)
185{
186 struct f2fs_nm_info *nm_i = NM_I(sbi);
187 struct nat_entry *e;
188retry:
189 write_lock(&nm_i->nat_tree_lock);
190 e = __lookup_nat_cache(nm_i, ni->nid);
191 if (!e) {
192 e = grab_nat_entry(nm_i, ni->nid);
193 if (!e) {
194 write_unlock(&nm_i->nat_tree_lock);
195 goto retry;
196 }
197 e->ni = *ni;
198 e->checkpointed = true;
199 BUG_ON(ni->blk_addr == NEW_ADDR);
200 } else if (new_blkaddr == NEW_ADDR) {
201 /*
202 * when nid is reallocated,
203 * previous nat entry can be remained in nat cache.
204 * So, reinitialize it with new information.
205 */
206 e->ni = *ni;
207 BUG_ON(ni->blk_addr != NULL_ADDR);
208 }
209
210 if (new_blkaddr == NEW_ADDR)
211 e->checkpointed = false;
212
213 /* sanity check */
214 BUG_ON(nat_get_blkaddr(e) != ni->blk_addr);
215 BUG_ON(nat_get_blkaddr(e) == NULL_ADDR &&
216 new_blkaddr == NULL_ADDR);
217 BUG_ON(nat_get_blkaddr(e) == NEW_ADDR &&
218 new_blkaddr == NEW_ADDR);
219 BUG_ON(nat_get_blkaddr(e) != NEW_ADDR &&
220 nat_get_blkaddr(e) != NULL_ADDR &&
221 new_blkaddr == NEW_ADDR);
222
223 /* increament version no as node is removed */
224 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
225 unsigned char version = nat_get_version(e);
226 nat_set_version(e, inc_node_version(version));
227 }
228
229 /* change address */
230 nat_set_blkaddr(e, new_blkaddr);
231 __set_nat_cache_dirty(nm_i, e);
232 write_unlock(&nm_i->nat_tree_lock);
233}
234
235static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
236{
237 struct f2fs_nm_info *nm_i = NM_I(sbi);
238
239 if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD)
240 return 0;
241
242 write_lock(&nm_i->nat_tree_lock);
243 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
244 struct nat_entry *ne;
245 ne = list_first_entry(&nm_i->nat_entries,
246 struct nat_entry, list);
247 __del_from_nat_cache(nm_i, ne);
248 nr_shrink--;
249 }
250 write_unlock(&nm_i->nat_tree_lock);
251 return nr_shrink;
252}
253
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +0900254/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +0900255 * This function returns always success
256 */
257void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
258{
259 struct f2fs_nm_info *nm_i = NM_I(sbi);
260 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
261 struct f2fs_summary_block *sum = curseg->sum_blk;
262 nid_t start_nid = START_NID(nid);
263 struct f2fs_nat_block *nat_blk;
264 struct page *page = NULL;
265 struct f2fs_nat_entry ne;
266 struct nat_entry *e;
267 int i;
268
269 ni->nid = nid;
270
271 /* Check nat cache */
272 read_lock(&nm_i->nat_tree_lock);
273 e = __lookup_nat_cache(nm_i, nid);
274 if (e) {
275 ni->ino = nat_get_ino(e);
276 ni->blk_addr = nat_get_blkaddr(e);
277 ni->version = nat_get_version(e);
278 }
279 read_unlock(&nm_i->nat_tree_lock);
280 if (e)
281 return;
282
283 /* Check current segment summary */
284 mutex_lock(&curseg->curseg_mutex);
285 i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
286 if (i >= 0) {
287 ne = nat_in_journal(sum, i);
288 node_info_from_raw_nat(ni, &ne);
289 }
290 mutex_unlock(&curseg->curseg_mutex);
291 if (i >= 0)
292 goto cache;
293
294 /* Fill node_info from nat page */
295 page = get_current_nat_page(sbi, start_nid);
296 nat_blk = (struct f2fs_nat_block *)page_address(page);
297 ne = nat_blk->entries[nid - start_nid];
298 node_info_from_raw_nat(ni, &ne);
299 f2fs_put_page(page, 1);
300cache:
301 /* cache nat entry */
302 cache_nat_entry(NM_I(sbi), nid, &ne);
303}
304
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +0900305/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +0900306 * The maximum depth is four.
307 * Offset[0] will have raw inode offset.
308 */
309static int get_node_path(long block, int offset[4], unsigned int noffset[4])
310{
311 const long direct_index = ADDRS_PER_INODE;
312 const long direct_blks = ADDRS_PER_BLOCK;
313 const long dptrs_per_blk = NIDS_PER_BLOCK;
314 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
315 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
316 int n = 0;
317 int level = 0;
318
319 noffset[0] = 0;
320
321 if (block < direct_index) {
322 offset[n++] = block;
323 level = 0;
324 goto got;
325 }
326 block -= direct_index;
327 if (block < direct_blks) {
328 offset[n++] = NODE_DIR1_BLOCK;
329 noffset[n] = 1;
330 offset[n++] = block;
331 level = 1;
332 goto got;
333 }
334 block -= direct_blks;
335 if (block < direct_blks) {
336 offset[n++] = NODE_DIR2_BLOCK;
337 noffset[n] = 2;
338 offset[n++] = block;
339 level = 1;
340 goto got;
341 }
342 block -= direct_blks;
343 if (block < indirect_blks) {
344 offset[n++] = NODE_IND1_BLOCK;
345 noffset[n] = 3;
346 offset[n++] = block / direct_blks;
347 noffset[n] = 4 + offset[n - 1];
348 offset[n++] = block % direct_blks;
349 level = 2;
350 goto got;
351 }
352 block -= indirect_blks;
353 if (block < indirect_blks) {
354 offset[n++] = NODE_IND2_BLOCK;
355 noffset[n] = 4 + dptrs_per_blk;
356 offset[n++] = block / direct_blks;
357 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
358 offset[n++] = block % direct_blks;
359 level = 2;
360 goto got;
361 }
362 block -= indirect_blks;
363 if (block < dindirect_blks) {
364 offset[n++] = NODE_DIND_BLOCK;
365 noffset[n] = 5 + (dptrs_per_blk * 2);
366 offset[n++] = block / indirect_blks;
367 noffset[n] = 6 + (dptrs_per_blk * 2) +
368 offset[n - 1] * (dptrs_per_blk + 1);
369 offset[n++] = (block / direct_blks) % dptrs_per_blk;
370 noffset[n] = 7 + (dptrs_per_blk * 2) +
371 offset[n - 2] * (dptrs_per_blk + 1) +
372 offset[n - 1];
373 offset[n++] = block % direct_blks;
374 level = 3;
375 goto got;
376 } else {
377 BUG();
378 }
379got:
380 return level;
381}
382
383/*
384 * Caller should call f2fs_put_dnode(dn).
385 */
386int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro)
387{
388 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
389 struct page *npage[4];
390 struct page *parent;
391 int offset[4];
392 unsigned int noffset[4];
393 nid_t nids[4];
394 int level, i;
395 int err = 0;
396
397 level = get_node_path(index, offset, noffset);
398
399 nids[0] = dn->inode->i_ino;
400 npage[0] = get_node_page(sbi, nids[0]);
401 if (IS_ERR(npage[0]))
402 return PTR_ERR(npage[0]);
403
404 parent = npage[0];
405 nids[1] = get_nid(parent, offset[0], true);
406 dn->inode_page = npage[0];
407 dn->inode_page_locked = true;
408
409 /* get indirect or direct nodes */
410 for (i = 1; i <= level; i++) {
411 bool done = false;
412
413 if (!nids[i] && !ro) {
414 mutex_lock_op(sbi, NODE_NEW);
415
416 /* alloc new node */
417 if (!alloc_nid(sbi, &(nids[i]))) {
418 mutex_unlock_op(sbi, NODE_NEW);
419 err = -ENOSPC;
420 goto release_pages;
421 }
422
423 dn->nid = nids[i];
424 npage[i] = new_node_page(dn, noffset[i]);
425 if (IS_ERR(npage[i])) {
426 alloc_nid_failed(sbi, nids[i]);
427 mutex_unlock_op(sbi, NODE_NEW);
428 err = PTR_ERR(npage[i]);
429 goto release_pages;
430 }
431
432 set_nid(parent, offset[i - 1], nids[i], i == 1);
433 alloc_nid_done(sbi, nids[i]);
434 mutex_unlock_op(sbi, NODE_NEW);
435 done = true;
436 } else if (ro && i == level && level > 1) {
437 npage[i] = get_node_page_ra(parent, offset[i - 1]);
438 if (IS_ERR(npage[i])) {
439 err = PTR_ERR(npage[i]);
440 goto release_pages;
441 }
442 done = true;
443 }
444 if (i == 1) {
445 dn->inode_page_locked = false;
446 unlock_page(parent);
447 } else {
448 f2fs_put_page(parent, 1);
449 }
450
451 if (!done) {
452 npage[i] = get_node_page(sbi, nids[i]);
453 if (IS_ERR(npage[i])) {
454 err = PTR_ERR(npage[i]);
455 f2fs_put_page(npage[0], 0);
456 goto release_out;
457 }
458 }
459 if (i < level) {
460 parent = npage[i];
461 nids[i + 1] = get_nid(parent, offset[i], false);
462 }
463 }
464 dn->nid = nids[level];
465 dn->ofs_in_node = offset[level];
466 dn->node_page = npage[level];
467 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
468 return 0;
469
470release_pages:
471 f2fs_put_page(parent, 1);
472 if (i > 1)
473 f2fs_put_page(npage[0], 0);
474release_out:
475 dn->inode_page = NULL;
476 dn->node_page = NULL;
477 return err;
478}
479
480static void truncate_node(struct dnode_of_data *dn)
481{
482 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
483 struct node_info ni;
484
485 get_node_info(sbi, dn->nid, &ni);
486 BUG_ON(ni.blk_addr == NULL_ADDR);
487
488 if (ni.blk_addr != NULL_ADDR)
489 invalidate_blocks(sbi, ni.blk_addr);
490
491 /* Deallocate node address */
492 dec_valid_node_count(sbi, dn->inode, 1);
493 set_node_addr(sbi, &ni, NULL_ADDR);
494
495 if (dn->nid == dn->inode->i_ino) {
496 remove_orphan_inode(sbi, dn->nid);
497 dec_valid_inode_count(sbi);
498 } else {
499 sync_inode_page(dn);
500 }
501
502 clear_node_page_dirty(dn->node_page);
503 F2FS_SET_SB_DIRT(sbi);
504
505 f2fs_put_page(dn->node_page, 1);
506 dn->node_page = NULL;
507}
508
509static int truncate_dnode(struct dnode_of_data *dn)
510{
511 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
512 struct page *page;
513
514 if (dn->nid == 0)
515 return 1;
516
517 /* get direct node */
518 page = get_node_page(sbi, dn->nid);
519 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
520 return 1;
521 else if (IS_ERR(page))
522 return PTR_ERR(page);
523
524 /* Make dnode_of_data for parameter */
525 dn->node_page = page;
526 dn->ofs_in_node = 0;
527 truncate_data_blocks(dn);
528 truncate_node(dn);
529 return 1;
530}
531
532static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
533 int ofs, int depth)
534{
535 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
536 struct dnode_of_data rdn = *dn;
537 struct page *page;
538 struct f2fs_node *rn;
539 nid_t child_nid;
540 unsigned int child_nofs;
541 int freed = 0;
542 int i, ret;
543
544 if (dn->nid == 0)
545 return NIDS_PER_BLOCK + 1;
546
547 page = get_node_page(sbi, dn->nid);
548 if (IS_ERR(page))
549 return PTR_ERR(page);
550
551 rn = (struct f2fs_node *)page_address(page);
552 if (depth < 3) {
553 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
554 child_nid = le32_to_cpu(rn->in.nid[i]);
555 if (child_nid == 0)
556 continue;
557 rdn.nid = child_nid;
558 ret = truncate_dnode(&rdn);
559 if (ret < 0)
560 goto out_err;
561 set_nid(page, i, 0, false);
562 }
563 } else {
564 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
565 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
566 child_nid = le32_to_cpu(rn->in.nid[i]);
567 if (child_nid == 0) {
568 child_nofs += NIDS_PER_BLOCK + 1;
569 continue;
570 }
571 rdn.nid = child_nid;
572 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
573 if (ret == (NIDS_PER_BLOCK + 1)) {
574 set_nid(page, i, 0, false);
575 child_nofs += ret;
576 } else if (ret < 0 && ret != -ENOENT) {
577 goto out_err;
578 }
579 }
580 freed = child_nofs;
581 }
582
583 if (!ofs) {
584 /* remove current indirect node */
585 dn->node_page = page;
586 truncate_node(dn);
587 freed++;
588 } else {
589 f2fs_put_page(page, 1);
590 }
591 return freed;
592
593out_err:
594 f2fs_put_page(page, 1);
595 return ret;
596}
597
598static int truncate_partial_nodes(struct dnode_of_data *dn,
599 struct f2fs_inode *ri, int *offset, int depth)
600{
601 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
602 struct page *pages[2];
603 nid_t nid[3];
604 nid_t child_nid;
605 int err = 0;
606 int i;
607 int idx = depth - 2;
608
609 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
610 if (!nid[0])
611 return 0;
612
613 /* get indirect nodes in the path */
614 for (i = 0; i < depth - 1; i++) {
615 /* refernece count'll be increased */
616 pages[i] = get_node_page(sbi, nid[i]);
617 if (IS_ERR(pages[i])) {
618 depth = i + 1;
619 err = PTR_ERR(pages[i]);
620 goto fail;
621 }
622 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
623 }
624
625 /* free direct nodes linked to a partial indirect node */
626 for (i = offset[depth - 1]; i < NIDS_PER_BLOCK; i++) {
627 child_nid = get_nid(pages[idx], i, false);
628 if (!child_nid)
629 continue;
630 dn->nid = child_nid;
631 err = truncate_dnode(dn);
632 if (err < 0)
633 goto fail;
634 set_nid(pages[idx], i, 0, false);
635 }
636
637 if (offset[depth - 1] == 0) {
638 dn->node_page = pages[idx];
639 dn->nid = nid[idx];
640 truncate_node(dn);
641 } else {
642 f2fs_put_page(pages[idx], 1);
643 }
644 offset[idx]++;
645 offset[depth - 1] = 0;
646fail:
647 for (i = depth - 3; i >= 0; i--)
648 f2fs_put_page(pages[i], 1);
649 return err;
650}
651
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +0900652/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +0900653 * All the block addresses of data and nodes should be nullified.
654 */
655int truncate_inode_blocks(struct inode *inode, pgoff_t from)
656{
657 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
658 int err = 0, cont = 1;
659 int level, offset[4], noffset[4];
660 unsigned int nofs;
661 struct f2fs_node *rn;
662 struct dnode_of_data dn;
663 struct page *page;
664
665 level = get_node_path(from, offset, noffset);
666
667 page = get_node_page(sbi, inode->i_ino);
668 if (IS_ERR(page))
669 return PTR_ERR(page);
670
671 set_new_dnode(&dn, inode, page, NULL, 0);
672 unlock_page(page);
673
674 rn = page_address(page);
675 switch (level) {
676 case 0:
677 case 1:
678 nofs = noffset[1];
679 break;
680 case 2:
681 nofs = noffset[1];
682 if (!offset[level - 1])
683 goto skip_partial;
684 err = truncate_partial_nodes(&dn, &rn->i, offset, level);
685 if (err < 0 && err != -ENOENT)
686 goto fail;
687 nofs += 1 + NIDS_PER_BLOCK;
688 break;
689 case 3:
690 nofs = 5 + 2 * NIDS_PER_BLOCK;
691 if (!offset[level - 1])
692 goto skip_partial;
693 err = truncate_partial_nodes(&dn, &rn->i, offset, level);
694 if (err < 0 && err != -ENOENT)
695 goto fail;
696 break;
697 default:
698 BUG();
699 }
700
701skip_partial:
702 while (cont) {
703 dn.nid = le32_to_cpu(rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]);
704 switch (offset[0]) {
705 case NODE_DIR1_BLOCK:
706 case NODE_DIR2_BLOCK:
707 err = truncate_dnode(&dn);
708 break;
709
710 case NODE_IND1_BLOCK:
711 case NODE_IND2_BLOCK:
712 err = truncate_nodes(&dn, nofs, offset[1], 2);
713 break;
714
715 case NODE_DIND_BLOCK:
716 err = truncate_nodes(&dn, nofs, offset[1], 3);
717 cont = 0;
718 break;
719
720 default:
721 BUG();
722 }
723 if (err < 0 && err != -ENOENT)
724 goto fail;
725 if (offset[1] == 0 &&
726 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) {
727 lock_page(page);
728 wait_on_page_writeback(page);
729 rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
730 set_page_dirty(page);
731 unlock_page(page);
732 }
733 offset[1] = 0;
734 offset[0]++;
735 nofs += err;
736 }
737fail:
738 f2fs_put_page(page, 0);
739 return err > 0 ? 0 : err;
740}
741
742int remove_inode_page(struct inode *inode)
743{
744 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
745 struct page *page;
746 nid_t ino = inode->i_ino;
747 struct dnode_of_data dn;
748
749 mutex_lock_op(sbi, NODE_TRUNC);
750 page = get_node_page(sbi, ino);
751 if (IS_ERR(page)) {
752 mutex_unlock_op(sbi, NODE_TRUNC);
753 return PTR_ERR(page);
754 }
755
756 if (F2FS_I(inode)->i_xattr_nid) {
757 nid_t nid = F2FS_I(inode)->i_xattr_nid;
758 struct page *npage = get_node_page(sbi, nid);
759
760 if (IS_ERR(npage)) {
761 mutex_unlock_op(sbi, NODE_TRUNC);
762 return PTR_ERR(npage);
763 }
764
765 F2FS_I(inode)->i_xattr_nid = 0;
766 set_new_dnode(&dn, inode, page, npage, nid);
767 dn.inode_page_locked = 1;
768 truncate_node(&dn);
769 }
770 if (inode->i_blocks == 1) {
771 /* inernally call f2fs_put_page() */
772 set_new_dnode(&dn, inode, page, page, ino);
773 truncate_node(&dn);
774 } else if (inode->i_blocks == 0) {
775 struct node_info ni;
776 get_node_info(sbi, inode->i_ino, &ni);
777
778 /* called after f2fs_new_inode() is failed */
779 BUG_ON(ni.blk_addr != NULL_ADDR);
780 f2fs_put_page(page, 1);
781 } else {
782 BUG();
783 }
784 mutex_unlock_op(sbi, NODE_TRUNC);
785 return 0;
786}
787
788int new_inode_page(struct inode *inode, struct dentry *dentry)
789{
790 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
791 struct page *page;
792 struct dnode_of_data dn;
793
794 /* allocate inode page for new inode */
795 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
796 mutex_lock_op(sbi, NODE_NEW);
797 page = new_node_page(&dn, 0);
798 init_dent_inode(dentry, page);
799 mutex_unlock_op(sbi, NODE_NEW);
800 if (IS_ERR(page))
801 return PTR_ERR(page);
802 f2fs_put_page(page, 1);
803 return 0;
804}
805
806struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
807{
808 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
809 struct address_space *mapping = sbi->node_inode->i_mapping;
810 struct node_info old_ni, new_ni;
811 struct page *page;
812 int err;
813
814 if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
815 return ERR_PTR(-EPERM);
816
817 page = grab_cache_page(mapping, dn->nid);
818 if (!page)
819 return ERR_PTR(-ENOMEM);
820
821 get_node_info(sbi, dn->nid, &old_ni);
822
823 SetPageUptodate(page);
824 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
825
826 /* Reinitialize old_ni with new node page */
827 BUG_ON(old_ni.blk_addr != NULL_ADDR);
828 new_ni = old_ni;
829 new_ni.ino = dn->inode->i_ino;
830
831 if (!inc_valid_node_count(sbi, dn->inode, 1)) {
832 err = -ENOSPC;
833 goto fail;
834 }
835 set_node_addr(sbi, &new_ni, NEW_ADDR);
836
837 dn->node_page = page;
838 sync_inode_page(dn);
839 set_page_dirty(page);
840 set_cold_node(dn->inode, page);
841 if (ofs == 0)
842 inc_valid_inode_count(sbi);
843
844 return page;
845
846fail:
847 f2fs_put_page(page, 1);
848 return ERR_PTR(err);
849}
850
851static int read_node_page(struct page *page, int type)
852{
853 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
854 struct node_info ni;
855
856 get_node_info(sbi, page->index, &ni);
857
858 if (ni.blk_addr == NULL_ADDR)
859 return -ENOENT;
860 return f2fs_readpage(sbi, page, ni.blk_addr, type);
861}
862
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +0900863/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +0900864 * Readahead a node page
865 */
866void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
867{
868 struct address_space *mapping = sbi->node_inode->i_mapping;
869 struct page *apage;
870
871 apage = find_get_page(mapping, nid);
872 if (apage && PageUptodate(apage))
873 goto release_out;
874 f2fs_put_page(apage, 0);
875
876 apage = grab_cache_page(mapping, nid);
877 if (!apage)
878 return;
879
880 if (read_node_page(apage, READA))
881 goto unlock_out;
882
883 page_cache_release(apage);
884 return;
885
886unlock_out:
887 unlock_page(apage);
888release_out:
889 page_cache_release(apage);
890}
891
892struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
893{
894 int err;
895 struct page *page;
896 struct address_space *mapping = sbi->node_inode->i_mapping;
897
898 page = grab_cache_page(mapping, nid);
899 if (!page)
900 return ERR_PTR(-ENOMEM);
901
902 err = read_node_page(page, READ_SYNC);
903 if (err) {
904 f2fs_put_page(page, 1);
905 return ERR_PTR(err);
906 }
907
908 BUG_ON(nid != nid_of_node(page));
909 mark_page_accessed(page);
910 return page;
911}
912
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +0900913/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +0900914 * Return a locked page for the desired node page.
915 * And, readahead MAX_RA_NODE number of node pages.
916 */
917struct page *get_node_page_ra(struct page *parent, int start)
918{
919 struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb);
920 struct address_space *mapping = sbi->node_inode->i_mapping;
921 int i, end;
922 int err = 0;
923 nid_t nid;
924 struct page *page;
925
926 /* First, try getting the desired direct node. */
927 nid = get_nid(parent, start, false);
928 if (!nid)
929 return ERR_PTR(-ENOENT);
930
931 page = find_get_page(mapping, nid);
932 if (page && PageUptodate(page))
933 goto page_hit;
934 f2fs_put_page(page, 0);
935
936repeat:
937 page = grab_cache_page(mapping, nid);
938 if (!page)
939 return ERR_PTR(-ENOMEM);
940
941 err = read_node_page(page, READA);
942 if (err) {
943 f2fs_put_page(page, 1);
944 return ERR_PTR(err);
945 }
946
947 /* Then, try readahead for siblings of the desired node */
948 end = start + MAX_RA_NODE;
949 end = min(end, NIDS_PER_BLOCK);
950 for (i = start + 1; i < end; i++) {
951 nid = get_nid(parent, i, false);
952 if (!nid)
953 continue;
954 ra_node_page(sbi, nid);
955 }
956
957page_hit:
958 lock_page(page);
959 if (PageError(page)) {
960 f2fs_put_page(page, 1);
961 return ERR_PTR(-EIO);
962 }
963
964 /* Has the page been truncated? */
965 if (page->mapping != mapping) {
966 f2fs_put_page(page, 1);
967 goto repeat;
968 }
969 return page;
970}
971
972void sync_inode_page(struct dnode_of_data *dn)
973{
974 if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
975 update_inode(dn->inode, dn->node_page);
976 } else if (dn->inode_page) {
977 if (!dn->inode_page_locked)
978 lock_page(dn->inode_page);
979 update_inode(dn->inode, dn->inode_page);
980 if (!dn->inode_page_locked)
981 unlock_page(dn->inode_page);
982 } else {
983 f2fs_write_inode(dn->inode, NULL);
984 }
985}
986
987int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
988 struct writeback_control *wbc)
989{
990 struct address_space *mapping = sbi->node_inode->i_mapping;
991 pgoff_t index, end;
992 struct pagevec pvec;
993 int step = ino ? 2 : 0;
994 int nwritten = 0, wrote = 0;
995
996 pagevec_init(&pvec, 0);
997
998next_step:
999 index = 0;
1000 end = LONG_MAX;
1001
1002 while (index <= end) {
1003 int i, nr_pages;
1004 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1005 PAGECACHE_TAG_DIRTY,
1006 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1007 if (nr_pages == 0)
1008 break;
1009
1010 for (i = 0; i < nr_pages; i++) {
1011 struct page *page = pvec.pages[i];
1012
1013 /*
1014 * flushing sequence with step:
1015 * 0. indirect nodes
1016 * 1. dentry dnodes
1017 * 2. file dnodes
1018 */
1019 if (step == 0 && IS_DNODE(page))
1020 continue;
1021 if (step == 1 && (!IS_DNODE(page) ||
1022 is_cold_node(page)))
1023 continue;
1024 if (step == 2 && (!IS_DNODE(page) ||
1025 !is_cold_node(page)))
1026 continue;
1027
1028 /*
1029 * If an fsync mode,
1030 * we should not skip writing node pages.
1031 */
1032 if (ino && ino_of_node(page) == ino)
1033 lock_page(page);
1034 else if (!trylock_page(page))
1035 continue;
1036
1037 if (unlikely(page->mapping != mapping)) {
1038continue_unlock:
1039 unlock_page(page);
1040 continue;
1041 }
1042 if (ino && ino_of_node(page) != ino)
1043 goto continue_unlock;
1044
1045 if (!PageDirty(page)) {
1046 /* someone wrote it for us */
1047 goto continue_unlock;
1048 }
1049
1050 if (!clear_page_dirty_for_io(page))
1051 goto continue_unlock;
1052
1053 /* called by fsync() */
1054 if (ino && IS_DNODE(page)) {
1055 int mark = !is_checkpointed_node(sbi, ino);
1056 set_fsync_mark(page, 1);
1057 if (IS_INODE(page))
1058 set_dentry_mark(page, mark);
1059 nwritten++;
1060 } else {
1061 set_fsync_mark(page, 0);
1062 set_dentry_mark(page, 0);
1063 }
1064 mapping->a_ops->writepage(page, wbc);
1065 wrote++;
1066
1067 if (--wbc->nr_to_write == 0)
1068 break;
1069 }
1070 pagevec_release(&pvec);
1071 cond_resched();
1072
1073 if (wbc->nr_to_write == 0) {
1074 step = 2;
1075 break;
1076 }
1077 }
1078
1079 if (step < 2) {
1080 step++;
1081 goto next_step;
1082 }
1083
1084 if (wrote)
1085 f2fs_submit_bio(sbi, NODE, wbc->sync_mode == WB_SYNC_ALL);
1086
1087 return nwritten;
1088}
1089
1090static int f2fs_write_node_page(struct page *page,
1091 struct writeback_control *wbc)
1092{
1093 struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb);
1094 nid_t nid;
1095 unsigned int nofs;
1096 block_t new_addr;
1097 struct node_info ni;
1098
1099 if (wbc->for_reclaim) {
1100 dec_page_count(sbi, F2FS_DIRTY_NODES);
1101 wbc->pages_skipped++;
1102 set_page_dirty(page);
1103 return AOP_WRITEPAGE_ACTIVATE;
1104 }
1105
1106 wait_on_page_writeback(page);
1107
1108 mutex_lock_op(sbi, NODE_WRITE);
1109
1110 /* get old block addr of this node page */
1111 nid = nid_of_node(page);
1112 nofs = ofs_of_node(page);
1113 BUG_ON(page->index != nid);
1114
1115 get_node_info(sbi, nid, &ni);
1116
1117 /* This page is already truncated */
1118 if (ni.blk_addr == NULL_ADDR)
1119 return 0;
1120
1121 set_page_writeback(page);
1122
1123 /* insert node offset */
1124 write_node_page(sbi, page, nid, ni.blk_addr, &new_addr);
1125 set_node_addr(sbi, &ni, new_addr);
1126 dec_page_count(sbi, F2FS_DIRTY_NODES);
1127
1128 mutex_unlock_op(sbi, NODE_WRITE);
1129 unlock_page(page);
1130 return 0;
1131}
1132
1133static int f2fs_write_node_pages(struct address_space *mapping,
1134 struct writeback_control *wbc)
1135{
1136 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1137 struct block_device *bdev = sbi->sb->s_bdev;
1138 long nr_to_write = wbc->nr_to_write;
1139
1140 if (wbc->for_kupdate)
1141 return 0;
1142
1143 if (get_pages(sbi, F2FS_DIRTY_NODES) == 0)
1144 return 0;
1145
1146 if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) {
1147 write_checkpoint(sbi, false, false);
1148 return 0;
1149 }
1150
1151 /* if mounting is failed, skip writing node pages */
1152 wbc->nr_to_write = bio_get_nr_vecs(bdev);
1153 sync_node_pages(sbi, 0, wbc);
1154 wbc->nr_to_write = nr_to_write -
1155 (bio_get_nr_vecs(bdev) - wbc->nr_to_write);
1156 return 0;
1157}
1158
1159static int f2fs_set_node_page_dirty(struct page *page)
1160{
1161 struct address_space *mapping = page->mapping;
1162 struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
1163
1164 SetPageUptodate(page);
1165 if (!PageDirty(page)) {
1166 __set_page_dirty_nobuffers(page);
1167 inc_page_count(sbi, F2FS_DIRTY_NODES);
1168 SetPagePrivate(page);
1169 return 1;
1170 }
1171 return 0;
1172}
1173
1174static void f2fs_invalidate_node_page(struct page *page, unsigned long offset)
1175{
1176 struct inode *inode = page->mapping->host;
1177 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
1178 if (PageDirty(page))
1179 dec_page_count(sbi, F2FS_DIRTY_NODES);
1180 ClearPagePrivate(page);
1181}
1182
1183static int f2fs_release_node_page(struct page *page, gfp_t wait)
1184{
1185 ClearPagePrivate(page);
1186 return 0;
1187}
1188
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +09001189/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +09001190 * Structure of the f2fs node operations
1191 */
1192const struct address_space_operations f2fs_node_aops = {
1193 .writepage = f2fs_write_node_page,
1194 .writepages = f2fs_write_node_pages,
1195 .set_page_dirty = f2fs_set_node_page_dirty,
1196 .invalidatepage = f2fs_invalidate_node_page,
1197 .releasepage = f2fs_release_node_page,
1198};
1199
1200static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head)
1201{
1202 struct list_head *this;
1203 struct free_nid *i = NULL;
1204 list_for_each(this, head) {
1205 i = list_entry(this, struct free_nid, list);
1206 if (i->nid == n)
1207 break;
1208 i = NULL;
1209 }
1210 return i;
1211}
1212
1213static void __del_from_free_nid_list(struct free_nid *i)
1214{
1215 list_del(&i->list);
1216 kmem_cache_free(free_nid_slab, i);
1217}
1218
1219static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1220{
1221 struct free_nid *i;
1222
1223 if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
1224 return 0;
1225retry:
1226 i = kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1227 if (!i) {
1228 cond_resched();
1229 goto retry;
1230 }
1231 i->nid = nid;
1232 i->state = NID_NEW;
1233
1234 spin_lock(&nm_i->free_nid_list_lock);
1235 if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) {
1236 spin_unlock(&nm_i->free_nid_list_lock);
1237 kmem_cache_free(free_nid_slab, i);
1238 return 0;
1239 }
1240 list_add_tail(&i->list, &nm_i->free_nid_list);
1241 nm_i->fcnt++;
1242 spin_unlock(&nm_i->free_nid_list_lock);
1243 return 1;
1244}
1245
1246static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1247{
1248 struct free_nid *i;
1249 spin_lock(&nm_i->free_nid_list_lock);
1250 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1251 if (i && i->state == NID_NEW) {
1252 __del_from_free_nid_list(i);
1253 nm_i->fcnt--;
1254 }
1255 spin_unlock(&nm_i->free_nid_list_lock);
1256}
1257
1258static int scan_nat_page(struct f2fs_nm_info *nm_i,
1259 struct page *nat_page, nid_t start_nid)
1260{
1261 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1262 block_t blk_addr;
1263 int fcnt = 0;
1264 int i;
1265
1266 /* 0 nid should not be used */
1267 if (start_nid == 0)
1268 ++start_nid;
1269
1270 i = start_nid % NAT_ENTRY_PER_BLOCK;
1271
1272 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1273 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1274 BUG_ON(blk_addr == NEW_ADDR);
1275 if (blk_addr == NULL_ADDR)
1276 fcnt += add_free_nid(nm_i, start_nid);
1277 }
1278 return fcnt;
1279}
1280
1281static void build_free_nids(struct f2fs_sb_info *sbi)
1282{
1283 struct free_nid *fnid, *next_fnid;
1284 struct f2fs_nm_info *nm_i = NM_I(sbi);
1285 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1286 struct f2fs_summary_block *sum = curseg->sum_blk;
1287 nid_t nid = 0;
1288 bool is_cycled = false;
1289 int fcnt = 0;
1290 int i;
1291
1292 nid = nm_i->next_scan_nid;
1293 nm_i->init_scan_nid = nid;
1294
1295 ra_nat_pages(sbi, nid);
1296
1297 while (1) {
1298 struct page *page = get_current_nat_page(sbi, nid);
1299
1300 fcnt += scan_nat_page(nm_i, page, nid);
1301 f2fs_put_page(page, 1);
1302
1303 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1304
1305 if (nid >= nm_i->max_nid) {
1306 nid = 0;
1307 is_cycled = true;
1308 }
1309 if (fcnt > MAX_FREE_NIDS)
1310 break;
1311 if (is_cycled && nm_i->init_scan_nid <= nid)
1312 break;
1313 }
1314
1315 nm_i->next_scan_nid = nid;
1316
1317 /* find free nids from current sum_pages */
1318 mutex_lock(&curseg->curseg_mutex);
1319 for (i = 0; i < nats_in_cursum(sum); i++) {
1320 block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
1321 nid = le32_to_cpu(nid_in_journal(sum, i));
1322 if (addr == NULL_ADDR)
1323 add_free_nid(nm_i, nid);
1324 else
1325 remove_free_nid(nm_i, nid);
1326 }
1327 mutex_unlock(&curseg->curseg_mutex);
1328
1329 /* remove the free nids from current allocated nids */
1330 list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) {
1331 struct nat_entry *ne;
1332
1333 read_lock(&nm_i->nat_tree_lock);
1334 ne = __lookup_nat_cache(nm_i, fnid->nid);
1335 if (ne && nat_get_blkaddr(ne) != NULL_ADDR)
1336 remove_free_nid(nm_i, fnid->nid);
1337 read_unlock(&nm_i->nat_tree_lock);
1338 }
1339}
1340
1341/*
1342 * If this function returns success, caller can obtain a new nid
1343 * from second parameter of this function.
1344 * The returned nid could be used ino as well as nid when inode is created.
1345 */
1346bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1347{
1348 struct f2fs_nm_info *nm_i = NM_I(sbi);
1349 struct free_nid *i = NULL;
1350 struct list_head *this;
1351retry:
1352 mutex_lock(&nm_i->build_lock);
1353 if (!nm_i->fcnt) {
1354 /* scan NAT in order to build free nid list */
1355 build_free_nids(sbi);
1356 if (!nm_i->fcnt) {
1357 mutex_unlock(&nm_i->build_lock);
1358 return false;
1359 }
1360 }
1361 mutex_unlock(&nm_i->build_lock);
1362
1363 /*
1364 * We check fcnt again since previous check is racy as
1365 * we didn't hold free_nid_list_lock. So other thread
1366 * could consume all of free nids.
1367 */
1368 spin_lock(&nm_i->free_nid_list_lock);
1369 if (!nm_i->fcnt) {
1370 spin_unlock(&nm_i->free_nid_list_lock);
1371 goto retry;
1372 }
1373
1374 BUG_ON(list_empty(&nm_i->free_nid_list));
1375 list_for_each(this, &nm_i->free_nid_list) {
1376 i = list_entry(this, struct free_nid, list);
1377 if (i->state == NID_NEW)
1378 break;
1379 }
1380
1381 BUG_ON(i->state != NID_NEW);
1382 *nid = i->nid;
1383 i->state = NID_ALLOC;
1384 nm_i->fcnt--;
1385 spin_unlock(&nm_i->free_nid_list_lock);
1386 return true;
1387}
1388
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +09001389/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +09001390 * alloc_nid() should be called prior to this function.
1391 */
1392void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1393{
1394 struct f2fs_nm_info *nm_i = NM_I(sbi);
1395 struct free_nid *i;
1396
1397 spin_lock(&nm_i->free_nid_list_lock);
1398 i = __lookup_free_nid_list(nid, &nm_i->free_nid_list);
1399 if (i) {
1400 BUG_ON(i->state != NID_ALLOC);
1401 __del_from_free_nid_list(i);
1402 }
1403 spin_unlock(&nm_i->free_nid_list_lock);
1404}
1405
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +09001406/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +09001407 * alloc_nid() should be called prior to this function.
1408 */
1409void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1410{
1411 alloc_nid_done(sbi, nid);
1412 add_free_nid(NM_I(sbi), nid);
1413}
1414
1415void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
1416 struct f2fs_summary *sum, struct node_info *ni,
1417 block_t new_blkaddr)
1418{
1419 rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
1420 set_node_addr(sbi, ni, new_blkaddr);
1421 clear_node_page_dirty(page);
1422}
1423
1424int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
1425{
1426 struct address_space *mapping = sbi->node_inode->i_mapping;
1427 struct f2fs_node *src, *dst;
1428 nid_t ino = ino_of_node(page);
1429 struct node_info old_ni, new_ni;
1430 struct page *ipage;
1431
1432 ipage = grab_cache_page(mapping, ino);
1433 if (!ipage)
1434 return -ENOMEM;
1435
1436 /* Should not use this inode from free nid list */
1437 remove_free_nid(NM_I(sbi), ino);
1438
1439 get_node_info(sbi, ino, &old_ni);
1440 SetPageUptodate(ipage);
1441 fill_node_footer(ipage, ino, ino, 0, true);
1442
1443 src = (struct f2fs_node *)page_address(page);
1444 dst = (struct f2fs_node *)page_address(ipage);
1445
1446 memcpy(dst, src, (unsigned long)&src->i.i_ext - (unsigned long)&src->i);
1447 dst->i.i_size = 0;
Jaegeuk Kim25ca9232012-11-28 16:12:41 +09001448 dst->i.i_blocks = cpu_to_le64(1);
1449 dst->i.i_links = cpu_to_le32(1);
Jaegeuk Kime05df3b2012-11-02 17:08:50 +09001450 dst->i.i_xattr_nid = 0;
1451
1452 new_ni = old_ni;
1453 new_ni.ino = ino;
1454
1455 set_node_addr(sbi, &new_ni, NEW_ADDR);
1456 inc_valid_inode_count(sbi);
1457
1458 f2fs_put_page(ipage, 1);
1459 return 0;
1460}
1461
1462int restore_node_summary(struct f2fs_sb_info *sbi,
1463 unsigned int segno, struct f2fs_summary_block *sum)
1464{
1465 struct f2fs_node *rn;
1466 struct f2fs_summary *sum_entry;
1467 struct page *page;
1468 block_t addr;
1469 int i, last_offset;
1470
1471 /* alloc temporal page for read node */
1472 page = alloc_page(GFP_NOFS | __GFP_ZERO);
1473 if (IS_ERR(page))
1474 return PTR_ERR(page);
1475 lock_page(page);
1476
1477 /* scan the node segment */
1478 last_offset = sbi->blocks_per_seg;
1479 addr = START_BLOCK(sbi, segno);
1480 sum_entry = &sum->entries[0];
1481
1482 for (i = 0; i < last_offset; i++, sum_entry++) {
1483 if (f2fs_readpage(sbi, page, addr, READ_SYNC))
1484 goto out;
1485
1486 rn = (struct f2fs_node *)page_address(page);
1487 sum_entry->nid = rn->footer.nid;
1488 sum_entry->version = 0;
1489 sum_entry->ofs_in_node = 0;
1490 addr++;
1491
1492 /*
1493 * In order to read next node page,
1494 * we must clear PageUptodate flag.
1495 */
1496 ClearPageUptodate(page);
1497 }
1498out:
1499 unlock_page(page);
1500 __free_pages(page, 0);
1501 return 0;
1502}
1503
1504static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
1505{
1506 struct f2fs_nm_info *nm_i = NM_I(sbi);
1507 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1508 struct f2fs_summary_block *sum = curseg->sum_blk;
1509 int i;
1510
1511 mutex_lock(&curseg->curseg_mutex);
1512
1513 if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
1514 mutex_unlock(&curseg->curseg_mutex);
1515 return false;
1516 }
1517
1518 for (i = 0; i < nats_in_cursum(sum); i++) {
1519 struct nat_entry *ne;
1520 struct f2fs_nat_entry raw_ne;
1521 nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
1522
1523 raw_ne = nat_in_journal(sum, i);
1524retry:
1525 write_lock(&nm_i->nat_tree_lock);
1526 ne = __lookup_nat_cache(nm_i, nid);
1527 if (ne) {
1528 __set_nat_cache_dirty(nm_i, ne);
1529 write_unlock(&nm_i->nat_tree_lock);
1530 continue;
1531 }
1532 ne = grab_nat_entry(nm_i, nid);
1533 if (!ne) {
1534 write_unlock(&nm_i->nat_tree_lock);
1535 goto retry;
1536 }
1537 nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr));
1538 nat_set_ino(ne, le32_to_cpu(raw_ne.ino));
1539 nat_set_version(ne, raw_ne.version);
1540 __set_nat_cache_dirty(nm_i, ne);
1541 write_unlock(&nm_i->nat_tree_lock);
1542 }
1543 update_nats_in_cursum(sum, -i);
1544 mutex_unlock(&curseg->curseg_mutex);
1545 return true;
1546}
1547
Jaegeuk Kim0a8165d2012-11-29 13:28:09 +09001548/*
Jaegeuk Kime05df3b2012-11-02 17:08:50 +09001549 * This function is called during the checkpointing process.
1550 */
1551void flush_nat_entries(struct f2fs_sb_info *sbi)
1552{
1553 struct f2fs_nm_info *nm_i = NM_I(sbi);
1554 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
1555 struct f2fs_summary_block *sum = curseg->sum_blk;
1556 struct list_head *cur, *n;
1557 struct page *page = NULL;
1558 struct f2fs_nat_block *nat_blk = NULL;
1559 nid_t start_nid = 0, end_nid = 0;
1560 bool flushed;
1561
1562 flushed = flush_nats_in_journal(sbi);
1563
1564 if (!flushed)
1565 mutex_lock(&curseg->curseg_mutex);
1566
1567 /* 1) flush dirty nat caches */
1568 list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) {
1569 struct nat_entry *ne;
1570 nid_t nid;
1571 struct f2fs_nat_entry raw_ne;
1572 int offset = -1;
1573 block_t old_blkaddr, new_blkaddr;
1574
1575 ne = list_entry(cur, struct nat_entry, list);
1576 nid = nat_get_nid(ne);
1577
1578 if (nat_get_blkaddr(ne) == NEW_ADDR)
1579 continue;
1580 if (flushed)
1581 goto to_nat_page;
1582
1583 /* if there is room for nat enries in curseg->sumpage */
1584 offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
1585 if (offset >= 0) {
1586 raw_ne = nat_in_journal(sum, offset);
1587 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1588 goto flush_now;
1589 }
1590to_nat_page:
1591 if (!page || (start_nid > nid || nid > end_nid)) {
1592 if (page) {
1593 f2fs_put_page(page, 1);
1594 page = NULL;
1595 }
1596 start_nid = START_NID(nid);
1597 end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
1598
1599 /*
1600 * get nat block with dirty flag, increased reference
1601 * count, mapped and lock
1602 */
1603 page = get_next_nat_page(sbi, start_nid);
1604 nat_blk = page_address(page);
1605 }
1606
1607 BUG_ON(!nat_blk);
1608 raw_ne = nat_blk->entries[nid - start_nid];
1609 old_blkaddr = le32_to_cpu(raw_ne.block_addr);
1610flush_now:
1611 new_blkaddr = nat_get_blkaddr(ne);
1612
1613 raw_ne.ino = cpu_to_le32(nat_get_ino(ne));
1614 raw_ne.block_addr = cpu_to_le32(new_blkaddr);
1615 raw_ne.version = nat_get_version(ne);
1616
1617 if (offset < 0) {
1618 nat_blk->entries[nid - start_nid] = raw_ne;
1619 } else {
1620 nat_in_journal(sum, offset) = raw_ne;
1621 nid_in_journal(sum, offset) = cpu_to_le32(nid);
1622 }
1623
1624 if (nat_get_blkaddr(ne) == NULL_ADDR) {
1625 write_lock(&nm_i->nat_tree_lock);
1626 __del_from_nat_cache(nm_i, ne);
1627 write_unlock(&nm_i->nat_tree_lock);
1628
1629 /* We can reuse this freed nid at this point */
1630 add_free_nid(NM_I(sbi), nid);
1631 } else {
1632 write_lock(&nm_i->nat_tree_lock);
1633 __clear_nat_cache_dirty(nm_i, ne);
1634 ne->checkpointed = true;
1635 write_unlock(&nm_i->nat_tree_lock);
1636 }
1637 }
1638 if (!flushed)
1639 mutex_unlock(&curseg->curseg_mutex);
1640 f2fs_put_page(page, 1);
1641
1642 /* 2) shrink nat caches if necessary */
1643 try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD);
1644}
1645
1646static int init_node_manager(struct f2fs_sb_info *sbi)
1647{
1648 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
1649 struct f2fs_nm_info *nm_i = NM_I(sbi);
1650 unsigned char *version_bitmap;
1651 unsigned int nat_segs, nat_blocks;
1652
1653 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
1654
1655 /* segment_count_nat includes pair segment so divide to 2. */
1656 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
1657 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
1658 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
1659 nm_i->fcnt = 0;
1660 nm_i->nat_cnt = 0;
1661
1662 INIT_LIST_HEAD(&nm_i->free_nid_list);
1663 INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
1664 INIT_LIST_HEAD(&nm_i->nat_entries);
1665 INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
1666
1667 mutex_init(&nm_i->build_lock);
1668 spin_lock_init(&nm_i->free_nid_list_lock);
1669 rwlock_init(&nm_i->nat_tree_lock);
1670
1671 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
1672 nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1673 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
1674
1675 nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL);
1676 if (!nm_i->nat_bitmap)
1677 return -ENOMEM;
1678 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
1679 if (!version_bitmap)
1680 return -EFAULT;
1681
1682 /* copy version bitmap */
1683 memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size);
1684 return 0;
1685}
1686
1687int build_node_manager(struct f2fs_sb_info *sbi)
1688{
1689 int err;
1690
1691 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
1692 if (!sbi->nm_info)
1693 return -ENOMEM;
1694
1695 err = init_node_manager(sbi);
1696 if (err)
1697 return err;
1698
1699 build_free_nids(sbi);
1700 return 0;
1701}
1702
1703void destroy_node_manager(struct f2fs_sb_info *sbi)
1704{
1705 struct f2fs_nm_info *nm_i = NM_I(sbi);
1706 struct free_nid *i, *next_i;
1707 struct nat_entry *natvec[NATVEC_SIZE];
1708 nid_t nid = 0;
1709 unsigned int found;
1710
1711 if (!nm_i)
1712 return;
1713
1714 /* destroy free nid list */
1715 spin_lock(&nm_i->free_nid_list_lock);
1716 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
1717 BUG_ON(i->state == NID_ALLOC);
1718 __del_from_free_nid_list(i);
1719 nm_i->fcnt--;
1720 }
1721 BUG_ON(nm_i->fcnt);
1722 spin_unlock(&nm_i->free_nid_list_lock);
1723
1724 /* destroy nat cache */
1725 write_lock(&nm_i->nat_tree_lock);
1726 while ((found = __gang_lookup_nat_cache(nm_i,
1727 nid, NATVEC_SIZE, natvec))) {
1728 unsigned idx;
1729 for (idx = 0; idx < found; idx++) {
1730 struct nat_entry *e = natvec[idx];
1731 nid = nat_get_nid(e) + 1;
1732 __del_from_nat_cache(nm_i, e);
1733 }
1734 }
1735 BUG_ON(nm_i->nat_cnt);
1736 write_unlock(&nm_i->nat_tree_lock);
1737
1738 kfree(nm_i->nat_bitmap);
1739 sbi->nm_info = NULL;
1740 kfree(nm_i);
1741}
1742
1743int create_node_manager_caches(void)
1744{
1745 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
1746 sizeof(struct nat_entry), NULL);
1747 if (!nat_entry_slab)
1748 return -ENOMEM;
1749
1750 free_nid_slab = f2fs_kmem_cache_create("free_nid",
1751 sizeof(struct free_nid), NULL);
1752 if (!free_nid_slab) {
1753 kmem_cache_destroy(nat_entry_slab);
1754 return -ENOMEM;
1755 }
1756 return 0;
1757}
1758
1759void destroy_node_manager_caches(void)
1760{
1761 kmem_cache_destroy(free_nid_slab);
1762 kmem_cache_destroy(nat_entry_slab);
1763}