blob: c6ffdf8cb23fb90685b10b5d84ffedf0774838dd [file] [log] [blame]
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001/*
2 * fs/f2fs/node.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/mpage.h>
14#include <linux/backing-dev.h>
15#include <linux/blkdev.h>
16#include <linux/pagevec.h>
17#include <linux/swap.h>
18
19#include "f2fs.h"
20#include "node.h"
21#include "segment.h"
22#include "trace.h"
23#include <trace/events/f2fs.h>
24
25#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock)
26
Jaegeuk Kima6b9f222016-04-15 09:43:17 -070027#ifndef PTR_ERR_OR_ZERO
28static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
29{
30 if (IS_ERR(ptr))
31 return PTR_ERR(ptr);
32 else
33 return 0;
34}
35#endif
36
Jaegeuk Kim315f4552015-11-29 09:25:08 -080037static struct kmem_cache *nat_entry_slab;
38static struct kmem_cache *free_nid_slab;
39static struct kmem_cache *nat_entry_set_slab;
40
41bool available_free_memory(struct f2fs_sb_info *sbi, int type)
42{
43 struct f2fs_nm_info *nm_i = NM_I(sbi);
44 struct sysinfo val;
45 unsigned long avail_ram;
46 unsigned long mem_size = 0;
47 bool res = false;
48
49 si_meminfo(&val);
50
51 /* only uses low memory */
52 avail_ram = val.totalram - val.totalhigh;
53
54 /*
55 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
56 */
57 if (type == FREE_NIDS) {
58 mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -070059 PAGE_SHIFT;
Jaegeuk Kim315f4552015-11-29 09:25:08 -080060 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
61 } else if (type == NAT_ENTRIES) {
62 mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -070063 PAGE_SHIFT;
Jaegeuk Kim315f4552015-11-29 09:25:08 -080064 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
Jaegeuk Kimf8dad502016-06-02 15:24:24 -070065 if (excess_cached_nats(sbi))
66 res = false;
67 if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD)
68 res = false;
Jaegeuk Kim315f4552015-11-29 09:25:08 -080069 } else if (type == DIRTY_DENTS) {
70 if (sbi->sb->s_bdi->dirty_exceeded)
71 return false;
72 mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
73 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
74 } else if (type == INO_ENTRIES) {
75 int i;
76
77 for (i = 0; i <= UPDATE_INO; i++)
78 mem_size += (sbi->im[i].ino_num *
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -070079 sizeof(struct ino_entry)) >> PAGE_SHIFT;
Jaegeuk Kim315f4552015-11-29 09:25:08 -080080 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
81 } else if (type == EXTENT_CACHE) {
Jaegeuk Kim10aaaa92015-12-21 19:20:15 -080082 mem_size = (atomic_read(&sbi->total_ext_tree) *
83 sizeof(struct extent_tree) +
Jaegeuk Kim315f4552015-11-29 09:25:08 -080084 atomic_read(&sbi->total_ext_node) *
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -070085 sizeof(struct extent_node)) >> PAGE_SHIFT;
Jaegeuk Kim315f4552015-11-29 09:25:08 -080086 res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
87 } else {
Jaegeuk Kim3a7cfa22016-01-09 16:14:08 -080088 if (!sbi->sb->s_bdi->dirty_exceeded)
89 return true;
Jaegeuk Kim315f4552015-11-29 09:25:08 -080090 }
91 return res;
92}
93
94static void clear_node_page_dirty(struct page *page)
95{
96 struct address_space *mapping = page->mapping;
97 unsigned int long flags;
98
99 if (PageDirty(page)) {
100 spin_lock_irqsave(&mapping->tree_lock, flags);
101 radix_tree_tag_clear(&mapping->page_tree,
102 page_index(page),
103 PAGECACHE_TAG_DIRTY);
104 spin_unlock_irqrestore(&mapping->tree_lock, flags);
105
106 clear_page_dirty_for_io(page);
107 dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
108 }
109 ClearPageUptodate(page);
110}
111
112static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
113{
114 pgoff_t index = current_nat_addr(sbi, nid);
115 return get_meta_page(sbi, index);
116}
117
118static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
119{
120 struct page *src_page;
121 struct page *dst_page;
122 pgoff_t src_off;
123 pgoff_t dst_off;
124 void *src_addr;
125 void *dst_addr;
126 struct f2fs_nm_info *nm_i = NM_I(sbi);
127
128 src_off = current_nat_addr(sbi, nid);
129 dst_off = next_nat_addr(sbi, src_off);
130
131 /* get current nat block page with lock */
132 src_page = get_meta_page(sbi, src_off);
133 dst_page = grab_meta_page(sbi, dst_off);
134 f2fs_bug_on(sbi, PageDirty(src_page));
135
136 src_addr = page_address(src_page);
137 dst_addr = page_address(dst_page);
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -0700138 memcpy(dst_addr, src_addr, PAGE_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800139 set_page_dirty(dst_page);
140 f2fs_put_page(src_page, 1);
141
142 set_to_next_nat(nm_i, nid);
143
144 return dst_page;
145}
146
147static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
148{
149 return radix_tree_lookup(&nm_i->nat_root, n);
150}
151
152static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
153 nid_t start, unsigned int nr, struct nat_entry **ep)
154{
155 return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
156}
157
158static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
159{
160 list_del(&e->list);
161 radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
162 nm_i->nat_cnt--;
163 kmem_cache_free(nat_entry_slab, e);
164}
165
166static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
167 struct nat_entry *ne)
168{
169 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
170 struct nat_entry_set *head;
171
172 if (get_nat_flag(ne, IS_DIRTY))
173 return;
174
175 head = radix_tree_lookup(&nm_i->nat_set_root, set);
176 if (!head) {
177 head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
178
179 INIT_LIST_HEAD(&head->entry_list);
180 INIT_LIST_HEAD(&head->set_list);
181 head->set = set;
182 head->entry_cnt = 0;
183 f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
184 }
185 list_move_tail(&ne->list, &head->entry_list);
186 nm_i->dirty_nat_cnt++;
187 head->entry_cnt++;
188 set_nat_flag(ne, IS_DIRTY, true);
189}
190
191static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
192 struct nat_entry *ne)
193{
194 nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
195 struct nat_entry_set *head;
196
197 head = radix_tree_lookup(&nm_i->nat_set_root, set);
198 if (head) {
199 list_move_tail(&ne->list, &nm_i->nat_entries);
200 set_nat_flag(ne, IS_DIRTY, false);
201 head->entry_cnt--;
202 nm_i->dirty_nat_cnt--;
203 }
204}
205
206static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
207 nid_t start, unsigned int nr, struct nat_entry_set **ep)
208{
209 return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
210 start, nr);
211}
212
213int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
214{
215 struct f2fs_nm_info *nm_i = NM_I(sbi);
216 struct nat_entry *e;
217 bool need = false;
218
219 down_read(&nm_i->nat_tree_lock);
220 e = __lookup_nat_cache(nm_i, nid);
221 if (e) {
222 if (!get_nat_flag(e, IS_CHECKPOINTED) &&
223 !get_nat_flag(e, HAS_FSYNCED_INODE))
224 need = true;
225 }
226 up_read(&nm_i->nat_tree_lock);
227 return need;
228}
229
230bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
231{
232 struct f2fs_nm_info *nm_i = NM_I(sbi);
233 struct nat_entry *e;
234 bool is_cp = true;
235
236 down_read(&nm_i->nat_tree_lock);
237 e = __lookup_nat_cache(nm_i, nid);
238 if (e && !get_nat_flag(e, IS_CHECKPOINTED))
239 is_cp = false;
240 up_read(&nm_i->nat_tree_lock);
241 return is_cp;
242}
243
244bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
245{
246 struct f2fs_nm_info *nm_i = NM_I(sbi);
247 struct nat_entry *e;
248 bool need_update = true;
249
250 down_read(&nm_i->nat_tree_lock);
251 e = __lookup_nat_cache(nm_i, ino);
252 if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
253 (get_nat_flag(e, IS_CHECKPOINTED) ||
254 get_nat_flag(e, HAS_FSYNCED_INODE)))
255 need_update = false;
256 up_read(&nm_i->nat_tree_lock);
257 return need_update;
258}
259
260static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
261{
262 struct nat_entry *new;
263
264 new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
265 f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
266 memset(new, 0, sizeof(struct nat_entry));
267 nat_set_nid(new, nid);
268 nat_reset_flag(new);
269 list_add_tail(&new->list, &nm_i->nat_entries);
270 nm_i->nat_cnt++;
271 return new;
272}
273
Chao Yue80f2332016-02-19 18:12:28 +0800274static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800275 struct f2fs_nat_entry *ne)
276{
Chao Yue80f2332016-02-19 18:12:28 +0800277 struct f2fs_nm_info *nm_i = NM_I(sbi);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800278 struct nat_entry *e;
279
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800280 e = __lookup_nat_cache(nm_i, nid);
281 if (!e) {
282 e = grab_nat_entry(nm_i, nid);
283 node_info_from_raw_nat(&e->ni, ne);
Chao Yue80f2332016-02-19 18:12:28 +0800284 } else {
285 f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino ||
286 nat_get_blkaddr(e) != ne->block_addr ||
287 nat_get_version(e) != ne->version);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800288 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800289}
290
291static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
292 block_t new_blkaddr, bool fsync_done)
293{
294 struct f2fs_nm_info *nm_i = NM_I(sbi);
295 struct nat_entry *e;
296
297 down_write(&nm_i->nat_tree_lock);
298 e = __lookup_nat_cache(nm_i, ni->nid);
299 if (!e) {
300 e = grab_nat_entry(nm_i, ni->nid);
301 copy_node_info(&e->ni, ni);
302 f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
303 } else if (new_blkaddr == NEW_ADDR) {
304 /*
305 * when nid is reallocated,
306 * previous nat entry can be remained in nat cache.
307 * So, reinitialize it with new information.
308 */
309 copy_node_info(&e->ni, ni);
310 f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
311 }
312
313 /* sanity check */
314 f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
315 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
316 new_blkaddr == NULL_ADDR);
317 f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
318 new_blkaddr == NEW_ADDR);
319 f2fs_bug_on(sbi, nat_get_blkaddr(e) != NEW_ADDR &&
320 nat_get_blkaddr(e) != NULL_ADDR &&
321 new_blkaddr == NEW_ADDR);
322
323 /* increment version no as node is removed */
324 if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
325 unsigned char version = nat_get_version(e);
326 nat_set_version(e, inc_node_version(version));
327
328 /* in order to reuse the nid */
329 if (nm_i->next_scan_nid > ni->nid)
330 nm_i->next_scan_nid = ni->nid;
331 }
332
333 /* change address */
334 nat_set_blkaddr(e, new_blkaddr);
335 if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
336 set_nat_flag(e, IS_CHECKPOINTED, false);
337 __set_nat_cache_dirty(nm_i, e);
338
339 /* update fsync_mark if its inode nat entry is still alive */
340 if (ni->nid != ni->ino)
341 e = __lookup_nat_cache(nm_i, ni->ino);
342 if (e) {
343 if (fsync_done && ni->nid == ni->ino)
344 set_nat_flag(e, HAS_FSYNCED_INODE, true);
345 set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
346 }
347 up_write(&nm_i->nat_tree_lock);
348}
349
350int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
351{
352 struct f2fs_nm_info *nm_i = NM_I(sbi);
353 int nr = nr_shrink;
354
355 if (!down_write_trylock(&nm_i->nat_tree_lock))
356 return 0;
357
358 while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
359 struct nat_entry *ne;
360 ne = list_first_entry(&nm_i->nat_entries,
361 struct nat_entry, list);
362 __del_from_nat_cache(nm_i, ne);
363 nr_shrink--;
364 }
365 up_write(&nm_i->nat_tree_lock);
366 return nr - nr_shrink;
367}
368
369/*
370 * This function always returns success
371 */
372void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
373{
374 struct f2fs_nm_info *nm_i = NM_I(sbi);
375 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
Chao Yufac2b092016-02-19 18:08:46 +0800376 struct f2fs_journal *journal = curseg->journal;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800377 nid_t start_nid = START_NID(nid);
378 struct f2fs_nat_block *nat_blk;
379 struct page *page = NULL;
380 struct f2fs_nat_entry ne;
381 struct nat_entry *e;
382 int i;
383
384 ni->nid = nid;
385
386 /* Check nat cache */
387 down_read(&nm_i->nat_tree_lock);
388 e = __lookup_nat_cache(nm_i, nid);
389 if (e) {
390 ni->ino = nat_get_ino(e);
391 ni->blk_addr = nat_get_blkaddr(e);
392 ni->version = nat_get_version(e);
Chao Yue80f2332016-02-19 18:12:28 +0800393 up_read(&nm_i->nat_tree_lock);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800394 return;
Chao Yue80f2332016-02-19 18:12:28 +0800395 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800396
397 memset(&ne, 0, sizeof(struct f2fs_nat_entry));
398
399 /* Check current segment summary */
Chao Yufac2b092016-02-19 18:08:46 +0800400 down_read(&curseg->journal_rwsem);
Chao Yud59981d2016-02-14 18:50:40 +0800401 i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800402 if (i >= 0) {
Chao Yud59981d2016-02-14 18:50:40 +0800403 ne = nat_in_journal(journal, i);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800404 node_info_from_raw_nat(ni, &ne);
405 }
Chao Yufac2b092016-02-19 18:08:46 +0800406 up_read(&curseg->journal_rwsem);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800407 if (i >= 0)
408 goto cache;
409
410 /* Fill node_info from nat page */
411 page = get_current_nat_page(sbi, start_nid);
412 nat_blk = (struct f2fs_nat_block *)page_address(page);
413 ne = nat_blk->entries[nid - start_nid];
414 node_info_from_raw_nat(ni, &ne);
415 f2fs_put_page(page, 1);
416cache:
Chao Yue80f2332016-02-19 18:12:28 +0800417 up_read(&nm_i->nat_tree_lock);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800418 /* cache nat entry */
Chao Yue80f2332016-02-19 18:12:28 +0800419 down_write(&nm_i->nat_tree_lock);
420 cache_nat_entry(sbi, nid, &ne);
Jaegeuk Kim54579ba2016-01-02 09:19:41 -0800421 up_write(&nm_i->nat_tree_lock);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800422}
423
Jaegeuk Kim91237b72016-05-06 16:19:43 -0700424/*
425 * readahead MAX_RA_NODE number of node pages.
426 */
427static void ra_node_pages(struct page *parent, int start, int n)
428{
429 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
430 struct blk_plug plug;
431 int i, end;
432 nid_t nid;
433
434 blk_start_plug(&plug);
435
436 /* Then, try readahead for siblings of the desired node */
437 end = start + n;
438 end = min(end, NIDS_PER_BLOCK);
439 for (i = start; i < end; i++) {
440 nid = get_nid(parent, i, false);
441 ra_node_page(sbi, nid);
442 }
443
444 blk_finish_plug(&plug);
445}
446
Chao Yue9f96852016-01-26 15:40:44 +0800447pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
448{
449 const long direct_index = ADDRS_PER_INODE(dn->inode);
450 const long direct_blks = ADDRS_PER_BLOCK;
451 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
452 unsigned int skipped_unit = ADDRS_PER_BLOCK;
453 int cur_level = dn->cur_level;
454 int max_level = dn->max_level;
455 pgoff_t base = 0;
456
457 if (!dn->max_level)
458 return pgofs + 1;
459
460 while (max_level-- > cur_level)
461 skipped_unit *= NIDS_PER_BLOCK;
462
463 switch (dn->max_level) {
464 case 3:
465 base += 2 * indirect_blks;
466 case 2:
467 base += 2 * direct_blks;
468 case 1:
469 base += direct_index;
470 break;
471 default:
472 f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
473 }
474
475 return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
476}
477
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800478/*
479 * The maximum depth is four.
480 * Offset[0] will have raw inode offset.
481 */
Chao Yu9ff6fa22016-01-26 15:39:35 +0800482static int get_node_path(struct inode *inode, long block,
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800483 int offset[4], unsigned int noffset[4])
484{
Chao Yu9ff6fa22016-01-26 15:39:35 +0800485 const long direct_index = ADDRS_PER_INODE(inode);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800486 const long direct_blks = ADDRS_PER_BLOCK;
487 const long dptrs_per_blk = NIDS_PER_BLOCK;
488 const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
489 const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
490 int n = 0;
491 int level = 0;
492
493 noffset[0] = 0;
494
495 if (block < direct_index) {
496 offset[n] = block;
497 goto got;
498 }
499 block -= direct_index;
500 if (block < direct_blks) {
501 offset[n++] = NODE_DIR1_BLOCK;
502 noffset[n] = 1;
503 offset[n] = block;
504 level = 1;
505 goto got;
506 }
507 block -= direct_blks;
508 if (block < direct_blks) {
509 offset[n++] = NODE_DIR2_BLOCK;
510 noffset[n] = 2;
511 offset[n] = block;
512 level = 1;
513 goto got;
514 }
515 block -= direct_blks;
516 if (block < indirect_blks) {
517 offset[n++] = NODE_IND1_BLOCK;
518 noffset[n] = 3;
519 offset[n++] = block / direct_blks;
520 noffset[n] = 4 + offset[n - 1];
521 offset[n] = block % direct_blks;
522 level = 2;
523 goto got;
524 }
525 block -= indirect_blks;
526 if (block < indirect_blks) {
527 offset[n++] = NODE_IND2_BLOCK;
528 noffset[n] = 4 + dptrs_per_blk;
529 offset[n++] = block / direct_blks;
530 noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
531 offset[n] = block % direct_blks;
532 level = 2;
533 goto got;
534 }
535 block -= indirect_blks;
536 if (block < dindirect_blks) {
537 offset[n++] = NODE_DIND_BLOCK;
538 noffset[n] = 5 + (dptrs_per_blk * 2);
539 offset[n++] = block / indirect_blks;
540 noffset[n] = 6 + (dptrs_per_blk * 2) +
541 offset[n - 1] * (dptrs_per_blk + 1);
542 offset[n++] = (block / direct_blks) % dptrs_per_blk;
543 noffset[n] = 7 + (dptrs_per_blk * 2) +
544 offset[n - 2] * (dptrs_per_blk + 1) +
545 offset[n - 1];
546 offset[n] = block % direct_blks;
547 level = 3;
548 goto got;
549 } else {
550 BUG();
551 }
552got:
553 return level;
554}
555
556/*
557 * Caller should call f2fs_put_dnode(dn).
558 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
559 * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
560 * In the case of RDONLY_NODE, we don't need to care about mutex.
561 */
562int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
563{
564 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
565 struct page *npage[4];
566 struct page *parent = NULL;
567 int offset[4];
568 unsigned int noffset[4];
569 nid_t nids[4];
Chao Yue9f96852016-01-26 15:40:44 +0800570 int level, i = 0;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800571 int err = 0;
572
Chao Yu9ff6fa22016-01-26 15:39:35 +0800573 level = get_node_path(dn->inode, index, offset, noffset);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800574
575 nids[0] = dn->inode->i_ino;
576 npage[0] = dn->inode_page;
577
578 if (!npage[0]) {
579 npage[0] = get_node_page(sbi, nids[0]);
580 if (IS_ERR(npage[0]))
581 return PTR_ERR(npage[0]);
582 }
583
584 /* if inline_data is set, should not report any block indices */
585 if (f2fs_has_inline_data(dn->inode) && index) {
586 err = -ENOENT;
587 f2fs_put_page(npage[0], 1);
588 goto release_out;
589 }
590
591 parent = npage[0];
592 if (level != 0)
593 nids[1] = get_nid(parent, offset[0], true);
594 dn->inode_page = npage[0];
595 dn->inode_page_locked = true;
596
597 /* get indirect or direct nodes */
598 for (i = 1; i <= level; i++) {
599 bool done = false;
600
601 if (!nids[i] && mode == ALLOC_NODE) {
602 /* alloc new node */
603 if (!alloc_nid(sbi, &(nids[i]))) {
604 err = -ENOSPC;
605 goto release_pages;
606 }
607
608 dn->nid = nids[i];
609 npage[i] = new_node_page(dn, noffset[i], NULL);
610 if (IS_ERR(npage[i])) {
611 alloc_nid_failed(sbi, nids[i]);
612 err = PTR_ERR(npage[i]);
613 goto release_pages;
614 }
615
616 set_nid(parent, offset[i - 1], nids[i], i == 1);
617 alloc_nid_done(sbi, nids[i]);
618 done = true;
619 } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
620 npage[i] = get_node_page_ra(parent, offset[i - 1]);
621 if (IS_ERR(npage[i])) {
622 err = PTR_ERR(npage[i]);
623 goto release_pages;
624 }
625 done = true;
626 }
627 if (i == 1) {
628 dn->inode_page_locked = false;
629 unlock_page(parent);
630 } else {
631 f2fs_put_page(parent, 1);
632 }
633
634 if (!done) {
635 npage[i] = get_node_page(sbi, nids[i]);
636 if (IS_ERR(npage[i])) {
637 err = PTR_ERR(npage[i]);
638 f2fs_put_page(npage[0], 0);
639 goto release_out;
640 }
641 }
642 if (i < level) {
643 parent = npage[i];
644 nids[i + 1] = get_nid(parent, offset[i], false);
645 }
646 }
647 dn->nid = nids[level];
648 dn->ofs_in_node = offset[level];
649 dn->node_page = npage[level];
650 dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
651 return 0;
652
653release_pages:
654 f2fs_put_page(parent, 1);
655 if (i > 1)
656 f2fs_put_page(npage[0], 0);
657release_out:
658 dn->inode_page = NULL;
659 dn->node_page = NULL;
Chao Yue9f96852016-01-26 15:40:44 +0800660 if (err == -ENOENT) {
661 dn->cur_level = i;
662 dn->max_level = level;
Jaegeuk Kimb121fb62016-07-08 17:42:21 -0700663 dn->ofs_in_node = offset[level];
Chao Yue9f96852016-01-26 15:40:44 +0800664 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800665 return err;
666}
667
668static void truncate_node(struct dnode_of_data *dn)
669{
670 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
671 struct node_info ni;
672
673 get_node_info(sbi, dn->nid, &ni);
674 if (dn->inode->i_blocks == 0) {
675 f2fs_bug_on(sbi, ni.blk_addr != NULL_ADDR);
676 goto invalidate;
677 }
678 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
679
680 /* Deallocate node address */
681 invalidate_blocks(sbi, ni.blk_addr);
682 dec_valid_node_count(sbi, dn->inode);
683 set_node_addr(sbi, &ni, NULL_ADDR, false);
684
685 if (dn->nid == dn->inode->i_ino) {
686 remove_orphan_inode(sbi, dn->nid);
687 dec_valid_inode_count(sbi);
Jaegeuk Kim94494a82016-05-20 11:10:10 -0700688 f2fs_inode_synced(dn->inode);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800689 }
690invalidate:
691 clear_node_page_dirty(dn->node_page);
692 set_sbi_flag(sbi, SBI_IS_DIRTY);
693
694 f2fs_put_page(dn->node_page, 1);
695
696 invalidate_mapping_pages(NODE_MAPPING(sbi),
697 dn->node_page->index, dn->node_page->index);
698
699 dn->node_page = NULL;
700 trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
701}
702
703static int truncate_dnode(struct dnode_of_data *dn)
704{
705 struct page *page;
706
707 if (dn->nid == 0)
708 return 1;
709
710 /* get direct node */
711 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
712 if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
713 return 1;
714 else if (IS_ERR(page))
715 return PTR_ERR(page);
716
717 /* Make dnode_of_data for parameter */
718 dn->node_page = page;
719 dn->ofs_in_node = 0;
720 truncate_data_blocks(dn);
721 truncate_node(dn);
722 return 1;
723}
724
725static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
726 int ofs, int depth)
727{
728 struct dnode_of_data rdn = *dn;
729 struct page *page;
730 struct f2fs_node *rn;
731 nid_t child_nid;
732 unsigned int child_nofs;
733 int freed = 0;
734 int i, ret;
735
736 if (dn->nid == 0)
737 return NIDS_PER_BLOCK + 1;
738
739 trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
740
741 page = get_node_page(F2FS_I_SB(dn->inode), dn->nid);
742 if (IS_ERR(page)) {
743 trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
744 return PTR_ERR(page);
745 }
746
Jaegeuk Kim91237b72016-05-06 16:19:43 -0700747 ra_node_pages(page, ofs, NIDS_PER_BLOCK);
748
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800749 rn = F2FS_NODE(page);
750 if (depth < 3) {
751 for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
752 child_nid = le32_to_cpu(rn->in.nid[i]);
753 if (child_nid == 0)
754 continue;
755 rdn.nid = child_nid;
756 ret = truncate_dnode(&rdn);
757 if (ret < 0)
758 goto out_err;
Jaegeuk Kim58818b92016-01-07 13:23:12 -0800759 if (set_nid(page, i, 0, false))
760 dn->node_changed = true;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800761 }
762 } else {
763 child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
764 for (i = ofs; i < NIDS_PER_BLOCK; i++) {
765 child_nid = le32_to_cpu(rn->in.nid[i]);
766 if (child_nid == 0) {
767 child_nofs += NIDS_PER_BLOCK + 1;
768 continue;
769 }
770 rdn.nid = child_nid;
771 ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
772 if (ret == (NIDS_PER_BLOCK + 1)) {
Jaegeuk Kim58818b92016-01-07 13:23:12 -0800773 if (set_nid(page, i, 0, false))
774 dn->node_changed = true;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800775 child_nofs += ret;
776 } else if (ret < 0 && ret != -ENOENT) {
777 goto out_err;
778 }
779 }
780 freed = child_nofs;
781 }
782
783 if (!ofs) {
784 /* remove current indirect node */
785 dn->node_page = page;
786 truncate_node(dn);
787 freed++;
788 } else {
789 f2fs_put_page(page, 1);
790 }
791 trace_f2fs_truncate_nodes_exit(dn->inode, freed);
792 return freed;
793
794out_err:
795 f2fs_put_page(page, 1);
796 trace_f2fs_truncate_nodes_exit(dn->inode, ret);
797 return ret;
798}
799
800static int truncate_partial_nodes(struct dnode_of_data *dn,
801 struct f2fs_inode *ri, int *offset, int depth)
802{
803 struct page *pages[2];
804 nid_t nid[3];
805 nid_t child_nid;
806 int err = 0;
807 int i;
808 int idx = depth - 2;
809
810 nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
811 if (!nid[0])
812 return 0;
813
814 /* get indirect nodes in the path */
815 for (i = 0; i < idx + 1; i++) {
816 /* reference count'll be increased */
817 pages[i] = get_node_page(F2FS_I_SB(dn->inode), nid[i]);
818 if (IS_ERR(pages[i])) {
819 err = PTR_ERR(pages[i]);
820 idx = i - 1;
821 goto fail;
822 }
823 nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
824 }
825
Jaegeuk Kim91237b72016-05-06 16:19:43 -0700826 ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
827
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800828 /* free direct nodes linked to a partial indirect node */
829 for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
830 child_nid = get_nid(pages[idx], i, false);
831 if (!child_nid)
832 continue;
833 dn->nid = child_nid;
834 err = truncate_dnode(dn);
835 if (err < 0)
836 goto fail;
Jaegeuk Kim58818b92016-01-07 13:23:12 -0800837 if (set_nid(pages[idx], i, 0, false))
838 dn->node_changed = true;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800839 }
840
841 if (offset[idx + 1] == 0) {
842 dn->node_page = pages[idx];
843 dn->nid = nid[idx];
844 truncate_node(dn);
845 } else {
846 f2fs_put_page(pages[idx], 1);
847 }
848 offset[idx]++;
849 offset[idx + 1] = 0;
850 idx--;
851fail:
852 for (i = idx; i >= 0; i--)
853 f2fs_put_page(pages[i], 1);
854
855 trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
856
857 return err;
858}
859
860/*
861 * All the block addresses of data and nodes should be nullified.
862 */
863int truncate_inode_blocks(struct inode *inode, pgoff_t from)
864{
865 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
866 int err = 0, cont = 1;
867 int level, offset[4], noffset[4];
868 unsigned int nofs = 0;
869 struct f2fs_inode *ri;
870 struct dnode_of_data dn;
871 struct page *page;
872
873 trace_f2fs_truncate_inode_blocks_enter(inode, from);
874
Chao Yu9ff6fa22016-01-26 15:39:35 +0800875 level = get_node_path(inode, from, offset, noffset);
Jaegeuk Kimebfefd22016-03-29 16:13:45 -0700876
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800877 page = get_node_page(sbi, inode->i_ino);
878 if (IS_ERR(page)) {
879 trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
880 return PTR_ERR(page);
881 }
882
883 set_new_dnode(&dn, inode, page, NULL, 0);
884 unlock_page(page);
885
886 ri = F2FS_INODE(page);
887 switch (level) {
888 case 0:
889 case 1:
890 nofs = noffset[1];
891 break;
892 case 2:
893 nofs = noffset[1];
894 if (!offset[level - 1])
895 goto skip_partial;
896 err = truncate_partial_nodes(&dn, ri, offset, level);
897 if (err < 0 && err != -ENOENT)
898 goto fail;
899 nofs += 1 + NIDS_PER_BLOCK;
900 break;
901 case 3:
902 nofs = 5 + 2 * NIDS_PER_BLOCK;
903 if (!offset[level - 1])
904 goto skip_partial;
905 err = truncate_partial_nodes(&dn, ri, offset, level);
906 if (err < 0 && err != -ENOENT)
907 goto fail;
908 break;
909 default:
910 BUG();
911 }
912
913skip_partial:
914 while (cont) {
915 dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
916 switch (offset[0]) {
917 case NODE_DIR1_BLOCK:
918 case NODE_DIR2_BLOCK:
919 err = truncate_dnode(&dn);
920 break;
921
922 case NODE_IND1_BLOCK:
923 case NODE_IND2_BLOCK:
924 err = truncate_nodes(&dn, nofs, offset[1], 2);
925 break;
926
927 case NODE_DIND_BLOCK:
928 err = truncate_nodes(&dn, nofs, offset[1], 3);
929 cont = 0;
930 break;
931
932 default:
933 BUG();
934 }
935 if (err < 0 && err != -ENOENT)
936 goto fail;
937 if (offset[1] == 0 &&
938 ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
939 lock_page(page);
Jaegeuk Kimebfefd22016-03-29 16:13:45 -0700940 BUG_ON(page->mapping != NODE_MAPPING(sbi));
Jaegeuk Kim3e0b2f42016-01-20 23:43:51 +0800941 f2fs_wait_on_page_writeback(page, NODE, true);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800942 ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
943 set_page_dirty(page);
944 unlock_page(page);
945 }
946 offset[1] = 0;
947 offset[0]++;
948 nofs += err;
949 }
950fail:
951 f2fs_put_page(page, 0);
952 trace_f2fs_truncate_inode_blocks_exit(inode, err);
953 return err > 0 ? 0 : err;
954}
955
956int truncate_xattr_node(struct inode *inode, struct page *page)
957{
958 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
959 nid_t nid = F2FS_I(inode)->i_xattr_nid;
960 struct dnode_of_data dn;
961 struct page *npage;
962
963 if (!nid)
964 return 0;
965
966 npage = get_node_page(sbi, nid);
967 if (IS_ERR(npage))
968 return PTR_ERR(npage);
969
Jaegeuk Kim69dbd022016-05-20 09:52:20 -0700970 f2fs_i_xnid_write(inode, 0);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800971
972 /* need to do checkpoint during fsync */
973 F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
974
975 set_new_dnode(&dn, inode, page, npage, nid);
976
977 if (page)
978 dn.inode_page_locked = true;
979 truncate_node(&dn);
980 return 0;
981}
982
983/*
984 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
985 * f2fs_unlock_op().
986 */
987int remove_inode_page(struct inode *inode)
988{
989 struct dnode_of_data dn;
990 int err;
991
992 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
993 err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
994 if (err)
995 return err;
996
997 err = truncate_xattr_node(inode, dn.inode_page);
998 if (err) {
999 f2fs_put_dnode(&dn);
1000 return err;
1001 }
1002
1003 /* remove potential inline_data blocks */
1004 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1005 S_ISLNK(inode->i_mode))
1006 truncate_data_blocks_range(&dn, 1);
1007
1008 /* 0 is possible, after f2fs_new_inode() has failed */
1009 f2fs_bug_on(F2FS_I_SB(inode),
1010 inode->i_blocks != 0 && inode->i_blocks != 1);
1011
1012 /* will put inode & node pages */
1013 truncate_node(&dn);
1014 return 0;
1015}
1016
1017struct page *new_inode_page(struct inode *inode)
1018{
1019 struct dnode_of_data dn;
1020
1021 /* allocate inode page for new inode */
1022 set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
1023
1024 /* caller should f2fs_put_page(page, 1); */
1025 return new_node_page(&dn, 0, NULL);
1026}
1027
1028struct page *new_node_page(struct dnode_of_data *dn,
1029 unsigned int ofs, struct page *ipage)
1030{
1031 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1032 struct node_info old_ni, new_ni;
1033 struct page *page;
1034 int err;
1035
Jaegeuk Kim5e3a5ba2016-05-20 10:13:22 -07001036 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001037 return ERR_PTR(-EPERM);
1038
Jaegeuk Kim5b305dc2016-04-29 16:11:53 -07001039 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001040 if (!page)
1041 return ERR_PTR(-ENOMEM);
1042
1043 if (unlikely(!inc_valid_node_count(sbi, dn->inode))) {
1044 err = -ENOSPC;
1045 goto fail;
1046 }
1047
1048 get_node_info(sbi, dn->nid, &old_ni);
1049
1050 /* Reinitialize old_ni with new node page */
1051 f2fs_bug_on(sbi, old_ni.blk_addr != NULL_ADDR);
1052 new_ni = old_ni;
1053 new_ni.ino = dn->inode->i_ino;
1054 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1055
Jaegeuk Kim3e0b2f42016-01-20 23:43:51 +08001056 f2fs_wait_on_page_writeback(page, NODE, true);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001057 fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
1058 set_cold_node(dn->inode, page);
Jaegeuk Kimb1cf93a2016-06-30 18:49:15 -07001059 if (!PageUptodate(page))
1060 SetPageUptodate(page);
Jaegeuk Kim58818b92016-01-07 13:23:12 -08001061 if (set_page_dirty(page))
1062 dn->node_changed = true;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001063
1064 if (f2fs_has_xattr_block(ofs))
Jaegeuk Kim69dbd022016-05-20 09:52:20 -07001065 f2fs_i_xnid_write(dn->inode, dn->nid);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001066
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001067 if (ofs == 0)
1068 inc_valid_inode_count(sbi);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001069 return page;
1070
1071fail:
1072 clear_node_page_dirty(page);
1073 f2fs_put_page(page, 1);
1074 return ERR_PTR(err);
1075}
1076
1077/*
1078 * Caller should do after getting the following values.
1079 * 0: f2fs_put_page(page, 0)
1080 * LOCKED_PAGE or error: f2fs_put_page(page, 1)
1081 */
1082static int read_node_page(struct page *page, int rw)
1083{
1084 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1085 struct node_info ni;
1086 struct f2fs_io_info fio = {
1087 .sbi = sbi,
1088 .type = NODE,
1089 .rw = rw,
1090 .page = page,
1091 .encrypted_page = NULL,
1092 };
1093
Jaegeuk Kim8a5b75c2016-06-30 19:04:16 -07001094 if (PageUptodate(page))
1095 return LOCKED_PAGE;
1096
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001097 get_node_info(sbi, page->index, &ni);
1098
1099 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1100 ClearPageUptodate(page);
1101 return -ENOENT;
1102 }
1103
Chao Yua83a52c2016-02-22 18:36:38 +08001104 fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001105 return f2fs_submit_page_bio(&fio);
1106}
1107
1108/*
1109 * Readahead a node page
1110 */
1111void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1112{
1113 struct page *apage;
1114 int err;
1115
Chao Yu7534acf2016-01-08 20:13:37 +08001116 if (!nid)
1117 return;
1118 f2fs_bug_on(sbi, check_nid_range(sbi, nid));
1119
Fan Li6897b592016-02-29 14:29:51 +08001120 rcu_read_lock();
1121 apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
1122 rcu_read_unlock();
1123 if (apage)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001124 return;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001125
Jaegeuk Kim5b305dc2016-04-29 16:11:53 -07001126 apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001127 if (!apage)
1128 return;
1129
1130 err = read_node_page(apage, READA);
1131 f2fs_put_page(apage, err ? 1 : 0);
1132}
1133
Jaegeuk Kima0cb5992016-03-08 09:04:35 -08001134static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
Chao Yufbeddf12016-01-05 16:52:46 +08001135 struct page *parent, int start)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001136{
1137 struct page *page;
1138 int err;
Jaegeuk Kim05b5af92015-12-23 14:17:47 -08001139
1140 if (!nid)
1141 return ERR_PTR(-ENOENT);
1142 f2fs_bug_on(sbi, check_nid_range(sbi, nid));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001143repeat:
Jaegeuk Kim5b305dc2016-04-29 16:11:53 -07001144 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001145 if (!page)
1146 return ERR_PTR(-ENOMEM);
1147
1148 err = read_node_page(page, READ_SYNC);
1149 if (err < 0) {
1150 f2fs_put_page(page, 1);
1151 return ERR_PTR(err);
Chao Yube3cf612015-12-11 16:08:22 +08001152 } else if (err == LOCKED_PAGE) {
1153 goto page_hit;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001154 }
1155
Chao Yufbeddf12016-01-05 16:52:46 +08001156 if (parent)
Jaegeuk Kim91237b72016-05-06 16:19:43 -07001157 ra_node_pages(parent, start + 1, MAX_RA_NODE);
Chao Yufbeddf12016-01-05 16:52:46 +08001158
Chao Yube3cf612015-12-11 16:08:22 +08001159 lock_page(page);
1160
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001161 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1162 f2fs_put_page(page, 1);
1163 goto repeat;
1164 }
Chao Yu3186de22016-07-03 22:05:12 +08001165
1166 if (unlikely(!PageUptodate(page)))
1167 goto out_err;
Chao Yube3cf612015-12-11 16:08:22 +08001168page_hit:
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001169 mark_page_accessed(page);
Yunlong Song98680002016-05-26 19:40:29 +08001170
1171 if(unlikely(nid != nid_of_node(page))) {
1172 f2fs_bug_on(sbi, 1);
1173 ClearPageUptodate(page);
1174out_err:
1175 f2fs_put_page(page, 1);
1176 return ERR_PTR(-EIO);
1177 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001178 return page;
1179}
1180
Chao Yufbeddf12016-01-05 16:52:46 +08001181struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
1182{
1183 return __get_node_page(sbi, nid, NULL, 0);
1184}
1185
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001186struct page *get_node_page_ra(struct page *parent, int start)
1187{
1188 struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
Chao Yufbeddf12016-01-05 16:52:46 +08001189 nid_t nid = get_nid(parent, start, false);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001190
Chao Yufbeddf12016-01-05 16:52:46 +08001191 return __get_node_page(sbi, nid, parent, start);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001192}
1193
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001194static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1195{
1196 struct inode *inode;
1197 struct page *page;
Chao Yu609f1db2016-05-21 00:11:09 +08001198 int ret;
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001199
1200 /* should flush inline_data before evict_inode */
1201 inode = ilookup(sbi->sb, ino);
1202 if (!inode)
1203 return;
1204
Chao Yube3118b2016-02-26 09:33:04 +08001205 page = find_get_page(inode->i_mapping, 0);
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001206 if (!page)
1207 goto iput_out;
1208
Chao Yube3118b2016-02-26 09:33:04 +08001209 if (!trylock_page(page))
1210 goto release_out;
1211
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001212 if (!PageUptodate(page))
1213 goto page_out;
1214
1215 if (!PageDirty(page))
1216 goto page_out;
1217
1218 if (!clear_page_dirty_for_io(page))
1219 goto page_out;
1220
Chao Yu609f1db2016-05-21 00:11:09 +08001221 ret = f2fs_write_inline_data(inode, page);
1222 inode_dec_dirty_pages(inode);
1223 if (ret)
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001224 set_page_dirty(page);
1225page_out:
Chao Yube3118b2016-02-26 09:33:04 +08001226 unlock_page(page);
1227release_out:
1228 f2fs_put_page(page, 0);
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001229iput_out:
1230 iput(inode);
1231}
1232
Chao Yu9548ad22016-04-27 21:40:15 +08001233void move_node_page(struct page *node_page, int gc_type)
1234{
1235 if (gc_type == FG_GC) {
1236 struct f2fs_sb_info *sbi = F2FS_P_SB(node_page);
1237 struct writeback_control wbc = {
1238 .sync_mode = WB_SYNC_ALL,
1239 .nr_to_write = 1,
1240 .for_reclaim = 0,
1241 };
1242
1243 set_page_dirty(node_page);
1244 f2fs_wait_on_page_writeback(node_page, NODE, true);
1245
1246 f2fs_bug_on(sbi, PageWriteback(node_page));
1247 if (!clear_page_dirty_for_io(node_page))
1248 goto out_page;
1249
1250 if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc))
1251 unlock_page(node_page);
1252 goto release_page;
1253 } else {
1254 /* set page dirty and write it */
1255 if (!PageWriteback(node_page))
1256 set_page_dirty(node_page);
1257 }
1258out_page:
1259 unlock_page(node_page);
1260release_page:
1261 f2fs_put_page(node_page, 0);
1262}
1263
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001264static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001265{
1266 pgoff_t index, end;
1267 struct pagevec pvec;
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001268 struct page *last_page = NULL;
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001269
1270 pagevec_init(&pvec, 0);
1271 index = 0;
1272 end = ULONG_MAX;
1273
1274 while (index <= end) {
1275 int i, nr_pages;
1276 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1277 PAGECACHE_TAG_DIRTY,
1278 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1279 if (nr_pages == 0)
1280 break;
1281
1282 for (i = 0; i < nr_pages; i++) {
1283 struct page *page = pvec.pages[i];
1284
1285 if (unlikely(f2fs_cp_error(sbi))) {
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001286 f2fs_put_page(last_page, 0);
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001287 pagevec_release(&pvec);
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001288 return ERR_PTR(-EIO);
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001289 }
1290
1291 if (!IS_DNODE(page) || !is_cold_node(page))
1292 continue;
1293 if (ino_of_node(page) != ino)
1294 continue;
1295
1296 lock_page(page);
1297
1298 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1299continue_unlock:
1300 unlock_page(page);
1301 continue;
1302 }
1303 if (ino_of_node(page) != ino)
1304 goto continue_unlock;
1305
1306 if (!PageDirty(page)) {
1307 /* someone wrote it for us */
1308 goto continue_unlock;
1309 }
1310
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001311 if (last_page)
1312 f2fs_put_page(last_page, 0);
1313
1314 get_page(page);
1315 last_page = page;
1316 unlock_page(page);
1317 }
1318 pagevec_release(&pvec);
1319 cond_resched();
1320 }
1321 return last_page;
1322}
1323
Jaegeuk Kime7fab122016-05-20 20:42:37 -07001324int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001325 struct writeback_control *wbc, bool atomic)
1326{
1327 pgoff_t index, end;
1328 struct pagevec pvec;
1329 int ret = 0;
1330 struct page *last_page = NULL;
1331 bool marked = false;
Jaegeuk Kime7fab122016-05-20 20:42:37 -07001332 nid_t ino = inode->i_ino;
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001333
1334 if (atomic) {
1335 last_page = last_fsync_dnode(sbi, ino);
1336 if (IS_ERR_OR_NULL(last_page))
1337 return PTR_ERR_OR_ZERO(last_page);
1338 }
1339retry:
1340 pagevec_init(&pvec, 0);
1341 index = 0;
1342 end = ULONG_MAX;
1343
1344 while (index <= end) {
1345 int i, nr_pages;
1346 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1347 PAGECACHE_TAG_DIRTY,
1348 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1349 if (nr_pages == 0)
1350 break;
1351
1352 for (i = 0; i < nr_pages; i++) {
1353 struct page *page = pvec.pages[i];
1354
1355 if (unlikely(f2fs_cp_error(sbi))) {
1356 f2fs_put_page(last_page, 0);
1357 pagevec_release(&pvec);
1358 return -EIO;
1359 }
1360
1361 if (!IS_DNODE(page) || !is_cold_node(page))
1362 continue;
1363 if (ino_of_node(page) != ino)
1364 continue;
1365
1366 lock_page(page);
1367
1368 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1369continue_unlock:
1370 unlock_page(page);
1371 continue;
1372 }
1373 if (ino_of_node(page) != ino)
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001374 goto continue_unlock;
1375
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001376 if (!PageDirty(page) && page != last_page) {
1377 /* someone wrote it for us */
1378 goto continue_unlock;
1379 }
1380
1381 f2fs_wait_on_page_writeback(page, NODE, true);
1382 BUG_ON(PageWriteback(page));
1383
1384 if (!atomic || page == last_page) {
1385 set_fsync_mark(page, 1);
Jaegeuk Kime7fab122016-05-20 20:42:37 -07001386 if (IS_INODE(page)) {
1387 if (is_inode_flag_set(inode,
1388 FI_DIRTY_INODE))
1389 update_inode(inode, page);
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001390 set_dentry_mark(page,
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001391 need_dentry_mark(sbi, ino));
Jaegeuk Kime7fab122016-05-20 20:42:37 -07001392 }
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001393 /* may be written by other thread */
1394 if (!PageDirty(page))
1395 set_page_dirty(page);
1396 }
1397
1398 if (!clear_page_dirty_for_io(page))
1399 goto continue_unlock;
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001400
Jaegeuk Kim17b88912016-04-15 09:25:04 -07001401 ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
1402 if (ret) {
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001403 unlock_page(page);
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001404 f2fs_put_page(last_page, 0);
1405 break;
1406 }
1407 if (page == last_page) {
1408 f2fs_put_page(page, 0);
1409 marked = true;
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001410 break;
Jaegeuk Kim17b88912016-04-15 09:25:04 -07001411 }
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001412 }
1413 pagevec_release(&pvec);
1414 cond_resched();
1415
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001416 if (ret || marked)
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001417 break;
1418 }
Jaegeuk Kima6b9f222016-04-15 09:43:17 -07001419 if (!ret && atomic && !marked) {
1420 f2fs_msg(sbi->sb, KERN_DEBUG,
1421 "Retry to write fsync mark: ino=%u, idx=%lx",
1422 ino, last_page->index);
1423 lock_page(last_page);
1424 set_page_dirty(last_page);
1425 unlock_page(last_page);
1426 goto retry;
1427 }
Jaegeuk Kim17b88912016-04-15 09:25:04 -07001428 return ret ? -EIO: 0;
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001429}
1430
1431int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
1432{
1433 pgoff_t index, end;
1434 struct pagevec pvec;
1435 int step = 0;
Jaegeuk Kimd0055362016-03-11 15:33:22 -08001436 int nwritten = 0;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001437
1438 pagevec_init(&pvec, 0);
1439
1440next_step:
1441 index = 0;
Chao Yufab3b542016-02-24 17:20:44 +08001442 end = ULONG_MAX;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001443
1444 while (index <= end) {
1445 int i, nr_pages;
1446 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1447 PAGECACHE_TAG_DIRTY,
1448 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1449 if (nr_pages == 0)
1450 break;
1451
1452 for (i = 0; i < nr_pages; i++) {
1453 struct page *page = pvec.pages[i];
1454
Chao Yu167a3162015-12-24 18:04:56 +08001455 if (unlikely(f2fs_cp_error(sbi))) {
1456 pagevec_release(&pvec);
1457 return -EIO;
1458 }
1459
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001460 /*
1461 * flushing sequence with step:
1462 * 0. indirect nodes
1463 * 1. dentry dnodes
1464 * 2. file dnodes
1465 */
1466 if (step == 0 && IS_DNODE(page))
1467 continue;
1468 if (step == 1 && (!IS_DNODE(page) ||
1469 is_cold_node(page)))
1470 continue;
1471 if (step == 2 && (!IS_DNODE(page) ||
1472 !is_cold_node(page)))
1473 continue;
Chao Yudb8eb9d2016-02-22 18:35:46 +08001474lock_node:
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001475 if (!trylock_page(page))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001476 continue;
1477
1478 if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1479continue_unlock:
1480 unlock_page(page);
1481 continue;
1482 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001483
1484 if (!PageDirty(page)) {
1485 /* someone wrote it for us */
1486 goto continue_unlock;
1487 }
1488
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001489 /* flush inline_data */
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001490 if (is_inline_node(page)) {
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001491 clear_inline_node(page);
1492 unlock_page(page);
1493 flush_inline_data(sbi, ino_of_node(page));
Chao Yudb8eb9d2016-02-22 18:35:46 +08001494 goto lock_node;
Jaegeuk Kimd04c5192016-01-25 05:57:05 -08001495 }
1496
Jaegeuk Kimdeed9ad2016-01-28 11:48:52 -08001497 f2fs_wait_on_page_writeback(page, NODE, true);
1498
1499 BUG_ON(PageWriteback(page));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001500 if (!clear_page_dirty_for_io(page))
1501 goto continue_unlock;
1502
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001503 set_fsync_mark(page, 0);
1504 set_dentry_mark(page, 0);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001505
1506 if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
1507 unlock_page(page);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001508
1509 if (--wbc->nr_to_write == 0)
1510 break;
1511 }
1512 pagevec_release(&pvec);
1513 cond_resched();
1514
1515 if (wbc->nr_to_write == 0) {
1516 step = 2;
1517 break;
1518 }
1519 }
1520
1521 if (step < 2) {
1522 step++;
1523 goto next_step;
1524 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001525 return nwritten;
1526}
1527
1528int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
1529{
Chao Yufab3b542016-02-24 17:20:44 +08001530 pgoff_t index = 0, end = ULONG_MAX;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001531 struct pagevec pvec;
1532 int ret2 = 0, ret = 0;
1533
1534 pagevec_init(&pvec, 0);
1535
1536 while (index <= end) {
1537 int i, nr_pages;
1538 nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1539 PAGECACHE_TAG_WRITEBACK,
1540 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
1541 if (nr_pages == 0)
1542 break;
1543
1544 for (i = 0; i < nr_pages; i++) {
1545 struct page *page = pvec.pages[i];
1546
1547 /* until radix tree lookup accepts end_index */
1548 if (unlikely(page->index > end))
1549 continue;
1550
1551 if (ino && ino_of_node(page) == ino) {
Jaegeuk Kim3e0b2f42016-01-20 23:43:51 +08001552 f2fs_wait_on_page_writeback(page, NODE, true);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001553 if (TestClearPageError(page))
1554 ret = -EIO;
1555 }
1556 }
1557 pagevec_release(&pvec);
1558 cond_resched();
1559 }
1560
1561 if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
1562 ret2 = -ENOSPC;
1563 if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
1564 ret2 = -EIO;
1565 if (!ret)
1566 ret = ret2;
1567 return ret;
1568}
1569
1570static int f2fs_write_node_page(struct page *page,
1571 struct writeback_control *wbc)
1572{
1573 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1574 nid_t nid;
1575 struct node_info ni;
1576 struct f2fs_io_info fio = {
1577 .sbi = sbi,
1578 .type = NODE,
1579 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1580 .page = page,
1581 .encrypted_page = NULL,
1582 };
1583
1584 trace_f2fs_writepage(page, NODE);
1585
1586 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1587 goto redirty_out;
1588 if (unlikely(f2fs_cp_error(sbi)))
1589 goto redirty_out;
1590
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001591 /* get old block addr of this node page */
1592 nid = nid_of_node(page);
1593 f2fs_bug_on(sbi, page->index != nid);
1594
1595 if (wbc->for_reclaim) {
1596 if (!down_read_trylock(&sbi->node_write))
1597 goto redirty_out;
1598 } else {
1599 down_read(&sbi->node_write);
1600 }
1601
1602 get_node_info(sbi, nid, &ni);
1603
1604 /* This page is already truncated */
1605 if (unlikely(ni.blk_addr == NULL_ADDR)) {
1606 ClearPageUptodate(page);
1607 dec_page_count(sbi, F2FS_DIRTY_NODES);
1608 up_read(&sbi->node_write);
1609 unlock_page(page);
1610 return 0;
1611 }
1612
1613 set_page_writeback(page);
Chao Yua83a52c2016-02-22 18:36:38 +08001614 fio.old_blkaddr = ni.blk_addr;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001615 write_node_page(nid, &fio);
Chao Yua83a52c2016-02-22 18:36:38 +08001616 set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001617 dec_page_count(sbi, F2FS_DIRTY_NODES);
1618 up_read(&sbi->node_write);
Chao Yufd2bbb12016-01-18 18:28:11 +08001619
1620 if (wbc->for_reclaim)
1621 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
1622
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001623 unlock_page(page);
1624
Chao Yufd2bbb12016-01-18 18:28:11 +08001625 if (unlikely(f2fs_cp_error(sbi)))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001626 f2fs_submit_merged_bio(sbi, NODE, WRITE);
1627
1628 return 0;
1629
1630redirty_out:
1631 redirty_page_for_writepage(wbc, page);
1632 return AOP_WRITEPAGE_ACTIVATE;
1633}
1634
1635static int f2fs_write_node_pages(struct address_space *mapping,
1636 struct writeback_control *wbc)
1637{
1638 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
Jaegeuk Kima6550b42016-07-13 19:33:19 -07001639 struct blk_plug plug;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001640 long diff;
1641
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001642 /* balancing f2fs's metadata in background */
1643 f2fs_balance_fs_bg(sbi);
1644
1645 /* collect a number of dirty node pages and write together */
1646 if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1647 goto skip_write;
1648
Yunlei He4dcaaa22016-02-04 16:14:00 +08001649 trace_f2fs_writepages(mapping->host, wbc, NODE);
1650
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001651 diff = nr_pages_to_write(sbi, NODE, wbc);
1652 wbc->sync_mode = WB_SYNC_NONE;
Jaegeuk Kima6550b42016-07-13 19:33:19 -07001653 blk_start_plug(&plug);
Jaegeuk Kimac6e0112016-04-13 16:24:44 -07001654 sync_node_pages(sbi, wbc);
Jaegeuk Kima6550b42016-07-13 19:33:19 -07001655 blk_finish_plug(&plug);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001656 wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1657 return 0;
1658
1659skip_write:
1660 wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
Yunlei He4dcaaa22016-02-04 16:14:00 +08001661 trace_f2fs_writepages(mapping->host, wbc, NODE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001662 return 0;
1663}
1664
1665static int f2fs_set_node_page_dirty(struct page *page)
1666{
1667 trace_f2fs_set_page_dirty(page, NODE);
1668
Jaegeuk Kimb1cf93a2016-06-30 18:49:15 -07001669 if (!PageUptodate(page))
1670 SetPageUptodate(page);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001671 if (!PageDirty(page)) {
Jaegeuk Kim4b149902016-06-30 18:40:10 -07001672 f2fs_set_page_dirty_nobuffers(page);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001673 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
1674 SetPagePrivate(page);
1675 f2fs_trace_pid(page);
1676 return 1;
1677 }
1678 return 0;
1679}
1680
1681/*
1682 * Structure of the f2fs node operations
1683 */
1684const struct address_space_operations f2fs_node_aops = {
1685 .writepage = f2fs_write_node_page,
1686 .writepages = f2fs_write_node_pages,
1687 .set_page_dirty = f2fs_set_node_page_dirty,
1688 .invalidatepage = f2fs_invalidate_page,
1689 .releasepage = f2fs_release_page,
1690};
1691
1692static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
1693 nid_t n)
1694{
1695 return radix_tree_lookup(&nm_i->free_nid_root, n);
1696}
1697
1698static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
1699 struct free_nid *i)
1700{
1701 list_del(&i->list);
1702 radix_tree_delete(&nm_i->free_nid_root, i->nid);
1703}
1704
1705static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
1706{
1707 struct f2fs_nm_info *nm_i = NM_I(sbi);
1708 struct free_nid *i;
1709 struct nat_entry *ne;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001710
1711 if (!available_free_memory(sbi, FREE_NIDS))
1712 return -1;
1713
1714 /* 0 nid should not be used */
1715 if (unlikely(nid == 0))
1716 return 0;
1717
1718 if (build) {
1719 /* do not add allocated nids */
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001720 ne = __lookup_nat_cache(nm_i, nid);
Jaegeuk Kim54579ba2016-01-02 09:19:41 -08001721 if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001722 nat_get_blkaddr(ne) != NULL_ADDR))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001723 return 0;
1724 }
1725
1726 i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
1727 i->nid = nid;
1728 i->state = NID_NEW;
1729
1730 if (radix_tree_preload(GFP_NOFS)) {
1731 kmem_cache_free(free_nid_slab, i);
1732 return 0;
1733 }
1734
1735 spin_lock(&nm_i->free_nid_list_lock);
1736 if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
1737 spin_unlock(&nm_i->free_nid_list_lock);
1738 radix_tree_preload_end();
1739 kmem_cache_free(free_nid_slab, i);
1740 return 0;
1741 }
1742 list_add_tail(&i->list, &nm_i->free_nid_list);
1743 nm_i->fcnt++;
1744 spin_unlock(&nm_i->free_nid_list_lock);
1745 radix_tree_preload_end();
1746 return 1;
1747}
1748
1749static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
1750{
1751 struct free_nid *i;
1752 bool need_free = false;
1753
1754 spin_lock(&nm_i->free_nid_list_lock);
1755 i = __lookup_free_nid_list(nm_i, nid);
1756 if (i && i->state == NID_NEW) {
1757 __del_from_free_nid_list(nm_i, i);
1758 nm_i->fcnt--;
1759 need_free = true;
1760 }
1761 spin_unlock(&nm_i->free_nid_list_lock);
1762
1763 if (need_free)
1764 kmem_cache_free(free_nid_slab, i);
1765}
1766
1767static void scan_nat_page(struct f2fs_sb_info *sbi,
1768 struct page *nat_page, nid_t start_nid)
1769{
1770 struct f2fs_nm_info *nm_i = NM_I(sbi);
1771 struct f2fs_nat_block *nat_blk = page_address(nat_page);
1772 block_t blk_addr;
1773 int i;
1774
1775 i = start_nid % NAT_ENTRY_PER_BLOCK;
1776
1777 for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
1778
1779 if (unlikely(start_nid >= nm_i->max_nid))
1780 break;
1781
1782 blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
1783 f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
1784 if (blk_addr == NULL_ADDR) {
1785 if (add_free_nid(sbi, start_nid, true) < 0)
1786 break;
1787 }
1788 }
1789}
1790
Jaegeuk Kimca758582016-06-16 16:41:49 -07001791void build_free_nids(struct f2fs_sb_info *sbi)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001792{
1793 struct f2fs_nm_info *nm_i = NM_I(sbi);
1794 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08001795 struct f2fs_journal *journal = curseg->journal;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001796 int i = 0;
1797 nid_t nid = nm_i->next_scan_nid;
1798
1799 /* Enough entries */
Jaegeuk Kimca758582016-06-16 16:41:49 -07001800 if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001801 return;
1802
1803 /* readahead nat pages to be scanned */
1804 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
1805 META_NAT, true);
1806
Jaegeuk Kim54579ba2016-01-02 09:19:41 -08001807 down_read(&nm_i->nat_tree_lock);
1808
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001809 while (1) {
1810 struct page *page = get_current_nat_page(sbi, nid);
1811
1812 scan_nat_page(sbi, page, nid);
1813 f2fs_put_page(page, 1);
1814
1815 nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
1816 if (unlikely(nid >= nm_i->max_nid))
1817 nid = 0;
1818
1819 if (++i >= FREE_NID_PAGES)
1820 break;
1821 }
1822
1823 /* go to the next free nat pages to find free nids abundantly */
1824 nm_i->next_scan_nid = nid;
1825
1826 /* find free nids from current sum_pages */
Chao Yufac2b092016-02-19 18:08:46 +08001827 down_read(&curseg->journal_rwsem);
Chao Yud59981d2016-02-14 18:50:40 +08001828 for (i = 0; i < nats_in_cursum(journal); i++) {
1829 block_t addr;
1830
1831 addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
1832 nid = le32_to_cpu(nid_in_journal(journal, i));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001833 if (addr == NULL_ADDR)
1834 add_free_nid(sbi, nid, true);
1835 else
1836 remove_free_nid(nm_i, nid);
1837 }
Chao Yufac2b092016-02-19 18:08:46 +08001838 up_read(&curseg->journal_rwsem);
Jaegeuk Kim54579ba2016-01-02 09:19:41 -08001839 up_read(&nm_i->nat_tree_lock);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001840
1841 ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
1842 nm_i->ra_nid_pages, META_NAT, false);
1843}
1844
1845/*
1846 * If this function returns success, caller can obtain a new nid
1847 * from second parameter of this function.
1848 * The returned nid could be used ino as well as nid when inode is created.
1849 */
1850bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
1851{
1852 struct f2fs_nm_info *nm_i = NM_I(sbi);
1853 struct free_nid *i = NULL;
1854retry:
Jaegeuk Kim98ffffd2016-04-29 16:29:22 -07001855#ifdef CONFIG_F2FS_FAULT_INJECTION
1856 if (time_to_inject(FAULT_ALLOC_NID))
1857 return false;
1858#endif
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001859 if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
1860 return false;
1861
1862 spin_lock(&nm_i->free_nid_list_lock);
1863
1864 /* We should not use stale free nids created by build_free_nids */
1865 if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001866 f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
1867 list_for_each_entry(i, &nm_i->free_nid_list, list)
1868 if (i->state == NID_NEW)
1869 break;
1870
1871 f2fs_bug_on(sbi, i->state != NID_NEW);
1872 *nid = i->nid;
1873 i->state = NID_ALLOC;
1874 nm_i->fcnt--;
1875 spin_unlock(&nm_i->free_nid_list_lock);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001876 return true;
1877 }
1878 spin_unlock(&nm_i->free_nid_list_lock);
1879
1880 /* Let's scan nat pages and its caches to get free nids */
1881 mutex_lock(&nm_i->build_lock);
1882 build_free_nids(sbi);
1883 mutex_unlock(&nm_i->build_lock);
1884 goto retry;
1885}
1886
1887/*
1888 * alloc_nid() should be called prior to this function.
1889 */
1890void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
1891{
1892 struct f2fs_nm_info *nm_i = NM_I(sbi);
1893 struct free_nid *i;
1894
1895 spin_lock(&nm_i->free_nid_list_lock);
1896 i = __lookup_free_nid_list(nm_i, nid);
1897 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
1898 __del_from_free_nid_list(nm_i, i);
1899 spin_unlock(&nm_i->free_nid_list_lock);
1900
1901 kmem_cache_free(free_nid_slab, i);
1902}
1903
1904/*
1905 * alloc_nid() should be called prior to this function.
1906 */
1907void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
1908{
1909 struct f2fs_nm_info *nm_i = NM_I(sbi);
1910 struct free_nid *i;
1911 bool need_free = false;
1912
1913 if (!nid)
1914 return;
1915
1916 spin_lock(&nm_i->free_nid_list_lock);
1917 i = __lookup_free_nid_list(nm_i, nid);
1918 f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
1919 if (!available_free_memory(sbi, FREE_NIDS)) {
1920 __del_from_free_nid_list(nm_i, i);
1921 need_free = true;
1922 } else {
1923 i->state = NID_NEW;
1924 nm_i->fcnt++;
1925 }
1926 spin_unlock(&nm_i->free_nid_list_lock);
1927
1928 if (need_free)
1929 kmem_cache_free(free_nid_slab, i);
1930}
1931
1932int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
1933{
1934 struct f2fs_nm_info *nm_i = NM_I(sbi);
1935 struct free_nid *i, *next;
1936 int nr = nr_shrink;
1937
Jaegeuk Kimca758582016-06-16 16:41:49 -07001938 if (nm_i->fcnt <= MAX_FREE_NIDS)
1939 return 0;
1940
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001941 if (!mutex_trylock(&nm_i->build_lock))
1942 return 0;
1943
1944 spin_lock(&nm_i->free_nid_list_lock);
1945 list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
Jaegeuk Kimca758582016-06-16 16:41:49 -07001946 if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001947 break;
1948 if (i->state == NID_ALLOC)
1949 continue;
1950 __del_from_free_nid_list(nm_i, i);
1951 kmem_cache_free(free_nid_slab, i);
1952 nm_i->fcnt--;
1953 nr_shrink--;
1954 }
1955 spin_unlock(&nm_i->free_nid_list_lock);
1956 mutex_unlock(&nm_i->build_lock);
1957
1958 return nr - nr_shrink;
1959}
1960
1961void recover_inline_xattr(struct inode *inode, struct page *page)
1962{
1963 void *src_addr, *dst_addr;
1964 size_t inline_size;
1965 struct page *ipage;
1966 struct f2fs_inode *ri;
1967
1968 ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
1969 f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
1970
1971 ri = F2FS_INODE(page);
1972 if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
Jaegeuk Kim5e3a5ba2016-05-20 10:13:22 -07001973 clear_inode_flag(inode, FI_INLINE_XATTR);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001974 goto update_inode;
1975 }
1976
1977 dst_addr = inline_xattr_addr(ipage);
1978 src_addr = inline_xattr_addr(page);
1979 inline_size = inline_xattr_size(inode);
1980
Jaegeuk Kim3e0b2f42016-01-20 23:43:51 +08001981 f2fs_wait_on_page_writeback(ipage, NODE, true);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001982 memcpy(dst_addr, src_addr, inline_size);
1983update_inode:
1984 update_inode(inode, ipage);
1985 f2fs_put_page(ipage, 1);
1986}
1987
1988void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
1989{
1990 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1991 nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
1992 nid_t new_xnid = nid_of_node(page);
1993 struct node_info ni;
1994
1995 /* 1: invalidate the previous xattr nid */
1996 if (!prev_xnid)
1997 goto recover_xnid;
1998
1999 /* Deallocate node address */
2000 get_node_info(sbi, prev_xnid, &ni);
2001 f2fs_bug_on(sbi, ni.blk_addr == NULL_ADDR);
2002 invalidate_blocks(sbi, ni.blk_addr);
2003 dec_valid_node_count(sbi, inode);
2004 set_node_addr(sbi, &ni, NULL_ADDR, false);
2005
2006recover_xnid:
2007 /* 2: allocate new xattr nid */
2008 if (unlikely(!inc_valid_node_count(sbi, inode)))
2009 f2fs_bug_on(sbi, 1);
2010
2011 remove_free_nid(NM_I(sbi), new_xnid);
2012 get_node_info(sbi, new_xnid, &ni);
2013 ni.ino = inode->i_ino;
2014 set_node_addr(sbi, &ni, NEW_ADDR, false);
Jaegeuk Kim69dbd022016-05-20 09:52:20 -07002015 f2fs_i_xnid_write(inode, new_xnid);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002016
2017 /* 3: update xattr blkaddr */
2018 refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
2019 set_node_addr(sbi, &ni, blkaddr, false);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002020}
2021
2022int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
2023{
2024 struct f2fs_inode *src, *dst;
2025 nid_t ino = ino_of_node(page);
2026 struct node_info old_ni, new_ni;
2027 struct page *ipage;
2028
2029 get_node_info(sbi, ino, &old_ni);
2030
2031 if (unlikely(old_ni.blk_addr != NULL_ADDR))
2032 return -EINVAL;
2033
Jaegeuk Kim5b305dc2016-04-29 16:11:53 -07002034 ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002035 if (!ipage)
2036 return -ENOMEM;
2037
2038 /* Should not use this inode from free nid list */
2039 remove_free_nid(NM_I(sbi), ino);
2040
Jaegeuk Kimb1cf93a2016-06-30 18:49:15 -07002041 if (!PageUptodate(ipage))
2042 SetPageUptodate(ipage);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002043 fill_node_footer(ipage, ino, ino, 0, true);
2044
2045 src = F2FS_INODE(page);
2046 dst = F2FS_INODE(ipage);
2047
2048 memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
2049 dst->i_size = 0;
2050 dst->i_blocks = cpu_to_le64(1);
2051 dst->i_links = cpu_to_le32(1);
2052 dst->i_xattr_nid = 0;
2053 dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
2054
2055 new_ni = old_ni;
2056 new_ni.ino = ino;
2057
2058 if (unlikely(!inc_valid_node_count(sbi, NULL)))
2059 WARN_ON(1);
2060 set_node_addr(sbi, &new_ni, NEW_ADDR, false);
2061 inc_valid_inode_count(sbi);
2062 set_page_dirty(ipage);
2063 f2fs_put_page(ipage, 1);
2064 return 0;
2065}
2066
2067int restore_node_summary(struct f2fs_sb_info *sbi,
2068 unsigned int segno, struct f2fs_summary_block *sum)
2069{
2070 struct f2fs_node *rn;
2071 struct f2fs_summary *sum_entry;
2072 block_t addr;
2073 int bio_blocks = MAX_BIO_BLOCKS(sbi);
2074 int i, idx, last_offset, nrpages;
2075
2076 /* scan the node segment */
2077 last_offset = sbi->blocks_per_seg;
2078 addr = START_BLOCK(sbi, segno);
2079 sum_entry = &sum->entries[0];
2080
2081 for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
2082 nrpages = min(last_offset - i, bio_blocks);
2083
2084 /* readahead node pages */
2085 ra_meta_pages(sbi, addr, nrpages, META_POR, true);
2086
2087 for (idx = addr; idx < addr + nrpages; idx++) {
2088 struct page *page = get_tmp_page(sbi, idx);
2089
2090 rn = F2FS_NODE(page);
2091 sum_entry->nid = rn->footer.nid;
2092 sum_entry->version = 0;
2093 sum_entry->ofs_in_node = 0;
2094 sum_entry++;
2095 f2fs_put_page(page, 1);
2096 }
2097
2098 invalidate_mapping_pages(META_MAPPING(sbi), addr,
2099 addr + nrpages);
2100 }
2101 return 0;
2102}
2103
2104static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
2105{
2106 struct f2fs_nm_info *nm_i = NM_I(sbi);
2107 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08002108 struct f2fs_journal *journal = curseg->journal;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002109 int i;
2110
Chao Yufac2b092016-02-19 18:08:46 +08002111 down_write(&curseg->journal_rwsem);
Chao Yud59981d2016-02-14 18:50:40 +08002112 for (i = 0; i < nats_in_cursum(journal); i++) {
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002113 struct nat_entry *ne;
2114 struct f2fs_nat_entry raw_ne;
Chao Yud59981d2016-02-14 18:50:40 +08002115 nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002116
Chao Yud59981d2016-02-14 18:50:40 +08002117 raw_ne = nat_in_journal(journal, i);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002118
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002119 ne = __lookup_nat_cache(nm_i, nid);
2120 if (!ne) {
2121 ne = grab_nat_entry(nm_i, nid);
2122 node_info_from_raw_nat(&ne->ni, &raw_ne);
2123 }
2124 __set_nat_cache_dirty(nm_i, ne);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002125 }
Chao Yud59981d2016-02-14 18:50:40 +08002126 update_nats_in_cursum(journal, -i);
Chao Yufac2b092016-02-19 18:08:46 +08002127 up_write(&curseg->journal_rwsem);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002128}
2129
2130static void __adjust_nat_entry_set(struct nat_entry_set *nes,
2131 struct list_head *head, int max)
2132{
2133 struct nat_entry_set *cur;
2134
2135 if (nes->entry_cnt >= max)
2136 goto add_out;
2137
2138 list_for_each_entry(cur, head, set_list) {
2139 if (cur->entry_cnt >= nes->entry_cnt) {
2140 list_add(&nes->set_list, cur->set_list.prev);
2141 return;
2142 }
2143 }
2144add_out:
2145 list_add_tail(&nes->set_list, head);
2146}
2147
2148static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2149 struct nat_entry_set *set)
2150{
2151 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08002152 struct f2fs_journal *journal = curseg->journal;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002153 nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
2154 bool to_journal = true;
2155 struct f2fs_nat_block *nat_blk;
2156 struct nat_entry *ne, *cur;
2157 struct page *page = NULL;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002158
2159 /*
2160 * there are two steps to flush nat entries:
2161 * #1, flush nat entries to journal in current hot data summary block.
2162 * #2, flush nat entries to nat page.
2163 */
Chao Yud59981d2016-02-14 18:50:40 +08002164 if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002165 to_journal = false;
2166
2167 if (to_journal) {
Chao Yufac2b092016-02-19 18:08:46 +08002168 down_write(&curseg->journal_rwsem);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002169 } else {
2170 page = get_next_nat_page(sbi, start_nid);
2171 nat_blk = page_address(page);
2172 f2fs_bug_on(sbi, !nat_blk);
2173 }
2174
2175 /* flush dirty nats in nat entry set */
2176 list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
2177 struct f2fs_nat_entry *raw_ne;
2178 nid_t nid = nat_get_nid(ne);
2179 int offset;
2180
2181 if (nat_get_blkaddr(ne) == NEW_ADDR)
2182 continue;
2183
2184 if (to_journal) {
Chao Yud59981d2016-02-14 18:50:40 +08002185 offset = lookup_journal_in_cursum(journal,
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002186 NAT_JOURNAL, nid, 1);
2187 f2fs_bug_on(sbi, offset < 0);
Chao Yud59981d2016-02-14 18:50:40 +08002188 raw_ne = &nat_in_journal(journal, offset);
2189 nid_in_journal(journal, offset) = cpu_to_le32(nid);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002190 } else {
2191 raw_ne = &nat_blk->entries[nid - start_nid];
2192 }
2193 raw_nat_from_node_info(raw_ne, &ne->ni);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002194 nat_reset_flag(ne);
2195 __clear_nat_cache_dirty(NM_I(sbi), ne);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002196 if (nat_get_blkaddr(ne) == NULL_ADDR)
2197 add_free_nid(sbi, nid, false);
2198 }
2199
2200 if (to_journal)
Chao Yufac2b092016-02-19 18:08:46 +08002201 up_write(&curseg->journal_rwsem);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002202 else
2203 f2fs_put_page(page, 1);
2204
2205 f2fs_bug_on(sbi, set->entry_cnt);
2206
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002207 radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002208 kmem_cache_free(nat_entry_set_slab, set);
2209}
2210
2211/*
2212 * This function is called during the checkpointing process.
2213 */
2214void flush_nat_entries(struct f2fs_sb_info *sbi)
2215{
2216 struct f2fs_nm_info *nm_i = NM_I(sbi);
2217 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08002218 struct f2fs_journal *journal = curseg->journal;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002219 struct nat_entry_set *setvec[SETVEC_SIZE];
2220 struct nat_entry_set *set, *tmp;
2221 unsigned int found;
2222 nid_t set_idx = 0;
2223 LIST_HEAD(sets);
2224
2225 if (!nm_i->dirty_nat_cnt)
2226 return;
Jaegeuk Kim54579ba2016-01-02 09:19:41 -08002227
2228 down_write(&nm_i->nat_tree_lock);
2229
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002230 /*
2231 * if there are no enough space in journal to store dirty nat
2232 * entries, remove all entries from journal and merge them
2233 * into nat entry set.
2234 */
Chao Yud59981d2016-02-14 18:50:40 +08002235 if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002236 remove_nats_in_journal(sbi);
2237
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002238 while ((found = __gang_lookup_nat_set(nm_i,
2239 set_idx, SETVEC_SIZE, setvec))) {
2240 unsigned idx;
2241 set_idx = setvec[found - 1]->set + 1;
2242 for (idx = 0; idx < found; idx++)
2243 __adjust_nat_entry_set(setvec[idx], &sets,
Chao Yud59981d2016-02-14 18:50:40 +08002244 MAX_NAT_JENTRIES(journal));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002245 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002246
2247 /* flush dirty nats in nat entry set */
2248 list_for_each_entry_safe(set, tmp, &sets, set_list)
2249 __flush_nat_entry_set(sbi, set);
2250
Jaegeuk Kim54579ba2016-01-02 09:19:41 -08002251 up_write(&nm_i->nat_tree_lock);
2252
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002253 f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
2254}
2255
2256static int init_node_manager(struct f2fs_sb_info *sbi)
2257{
2258 struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
2259 struct f2fs_nm_info *nm_i = NM_I(sbi);
2260 unsigned char *version_bitmap;
2261 unsigned int nat_segs, nat_blocks;
2262
2263 nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
2264
2265 /* segment_count_nat includes pair segment so divide to 2. */
2266 nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
2267 nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
2268
2269 nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
2270
2271 /* not used nids: 0, node, meta, (and root counted as valid node) */
2272 nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
2273 nm_i->fcnt = 0;
2274 nm_i->nat_cnt = 0;
2275 nm_i->ram_thresh = DEF_RAM_THRESHOLD;
2276 nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
Chao Yu69e1bba2016-01-18 18:32:58 +08002277 nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002278
2279 INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
2280 INIT_LIST_HEAD(&nm_i->free_nid_list);
2281 INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
2282 INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
2283 INIT_LIST_HEAD(&nm_i->nat_entries);
2284
2285 mutex_init(&nm_i->build_lock);
2286 spin_lock_init(&nm_i->free_nid_list_lock);
2287 init_rwsem(&nm_i->nat_tree_lock);
2288
2289 nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
2290 nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
2291 version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
2292 if (!version_bitmap)
2293 return -EFAULT;
2294
2295 nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
2296 GFP_KERNEL);
2297 if (!nm_i->nat_bitmap)
2298 return -ENOMEM;
2299 return 0;
2300}
2301
2302int build_node_manager(struct f2fs_sb_info *sbi)
2303{
2304 int err;
2305
2306 sbi->nm_info = kzalloc(sizeof(struct f2fs_nm_info), GFP_KERNEL);
2307 if (!sbi->nm_info)
2308 return -ENOMEM;
2309
2310 err = init_node_manager(sbi);
2311 if (err)
2312 return err;
2313
2314 build_free_nids(sbi);
2315 return 0;
2316}
2317
2318void destroy_node_manager(struct f2fs_sb_info *sbi)
2319{
2320 struct f2fs_nm_info *nm_i = NM_I(sbi);
2321 struct free_nid *i, *next_i;
2322 struct nat_entry *natvec[NATVEC_SIZE];
2323 struct nat_entry_set *setvec[SETVEC_SIZE];
2324 nid_t nid = 0;
2325 unsigned int found;
2326
2327 if (!nm_i)
2328 return;
2329
2330 /* destroy free nid list */
2331 spin_lock(&nm_i->free_nid_list_lock);
2332 list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
2333 f2fs_bug_on(sbi, i->state == NID_ALLOC);
2334 __del_from_free_nid_list(nm_i, i);
2335 nm_i->fcnt--;
2336 spin_unlock(&nm_i->free_nid_list_lock);
2337 kmem_cache_free(free_nid_slab, i);
2338 spin_lock(&nm_i->free_nid_list_lock);
2339 }
2340 f2fs_bug_on(sbi, nm_i->fcnt);
2341 spin_unlock(&nm_i->free_nid_list_lock);
2342
2343 /* destroy nat cache */
2344 down_write(&nm_i->nat_tree_lock);
2345 while ((found = __gang_lookup_nat_cache(nm_i,
2346 nid, NATVEC_SIZE, natvec))) {
2347 unsigned idx;
2348
2349 nid = nat_get_nid(natvec[found - 1]) + 1;
2350 for (idx = 0; idx < found; idx++)
2351 __del_from_nat_cache(nm_i, natvec[idx]);
2352 }
2353 f2fs_bug_on(sbi, nm_i->nat_cnt);
2354
2355 /* destroy nat set cache */
2356 nid = 0;
2357 while ((found = __gang_lookup_nat_set(nm_i,
2358 nid, SETVEC_SIZE, setvec))) {
2359 unsigned idx;
2360
2361 nid = setvec[found - 1]->set + 1;
2362 for (idx = 0; idx < found; idx++) {
2363 /* entry_cnt is not zero, when cp_error was occurred */
2364 f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
2365 radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
2366 kmem_cache_free(nat_entry_set_slab, setvec[idx]);
2367 }
2368 }
2369 up_write(&nm_i->nat_tree_lock);
2370
2371 kfree(nm_i->nat_bitmap);
2372 sbi->nm_info = NULL;
2373 kfree(nm_i);
2374}
2375
2376int __init create_node_manager_caches(void)
2377{
2378 nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
2379 sizeof(struct nat_entry));
2380 if (!nat_entry_slab)
2381 goto fail;
2382
2383 free_nid_slab = f2fs_kmem_cache_create("free_nid",
2384 sizeof(struct free_nid));
2385 if (!free_nid_slab)
2386 goto destroy_nat_entry;
2387
2388 nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
2389 sizeof(struct nat_entry_set));
2390 if (!nat_entry_set_slab)
2391 goto destroy_free_nid;
2392 return 0;
2393
2394destroy_free_nid:
2395 kmem_cache_destroy(free_nid_slab);
2396destroy_nat_entry:
2397 kmem_cache_destroy(nat_entry_slab);
2398fail:
2399 return -ENOMEM;
2400}
2401
2402void destroy_node_manager_caches(void)
2403{
2404 kmem_cache_destroy(nat_entry_set_slab);
2405 kmem_cache_destroy(free_nid_slab);
2406 kmem_cache_destroy(nat_entry_slab);
2407}