blob: 011646af340c739eb17177173328da8b4323ac41 [file] [log] [blame]
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001/*
2 * fs/f2fs/segment.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/fs.h>
12#include <linux/f2fs_fs.h>
13#include <linux/bio.h>
14#include <linux/blkdev.h>
15#include <linux/prefetch.h>
16#include <linux/kthread.h>
17#include <linux/swap.h>
18#include <linux/timer.h>
19
20#include "f2fs.h"
21#include "segment.h"
22#include "node.h"
23#include "trace.h"
24#include <trace/events/f2fs.h>
25
26#define __reverse_ffz(x) __reverse_ffs(~(x))
27
28static struct kmem_cache *discard_entry_slab;
29static struct kmem_cache *sit_entry_set_slab;
30static struct kmem_cache *inmem_entry_slab;
31
32static unsigned long __reverse_ulong(unsigned char *str)
33{
34 unsigned long tmp = 0;
35 int shift = 24, idx = 0;
36
37#if BITS_PER_LONG == 64
38 shift = 56;
39#endif
40 while (shift >= 0) {
41 tmp |= (unsigned long)str[idx++] << shift;
42 shift -= BITS_PER_BYTE;
43 }
44 return tmp;
45}
46
47/**
48 * Copied from latest lib/llist.c
49 * llist_for_each_entry_safe - iterate over some deleted entries of
50 * lock-less list of given type
51 * safe against removal of list entry
52 * @pos: the type * to use as a loop cursor.
53 * @n: another type * to use as temporary storage
54 * @node: the first entry of deleted list entries.
55 * @member: the name of the llist_node with the struct.
56 *
57 * In general, some entries of the lock-less list can be traversed
58 * safely only after being removed from list, so start with an entry
59 * instead of list head.
60 *
61 * If being used on entries deleted from lock-less list directly, the
62 * traverse order is from the newest to the oldest added entry. If
63 * you want to traverse from the oldest to the newest, you must
64 * reverse the order by yourself before traversing.
65 */
66#define llist_for_each_entry_safe(pos, n, node, member) \
67 for (pos = llist_entry((node), typeof(*pos), member); \
68 &pos->member != NULL && \
69 (n = llist_entry(pos->member.next, typeof(*n), member), true); \
70 pos = n)
71
72/**
73 * Copied from latest lib/llist.c
74 * llist_reverse_order - reverse order of a llist chain
75 * @head: first item of the list to be reversed
76 *
77 * Reverse the order of a chain of llist entries and return the
78 * new first entry.
79 */
80struct llist_node *llist_reverse_order(struct llist_node *head)
81{
82 struct llist_node *new_head = NULL;
83
84 while (head) {
85 struct llist_node *tmp = head;
86 head = head->next;
87 tmp->next = new_head;
88 new_head = tmp;
89 }
90
91 return new_head;
92}
93
94/**
95 * Copied from latest linux/list.h
96 * list_last_entry - get the last element from a list
97 * @ptr: the list head to take the element from.
98 * @type: the type of the struct this is embedded in.
99 * @member: the name of the list_struct within the struct.
100 *
101 * Note, that list is expected to be not empty.
102 */
103#define list_last_entry(ptr, type, member) \
104 list_entry((ptr)->prev, type, member)
105
106/*
107 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
108 * MSB and LSB are reversed in a byte by f2fs_set_bit.
109 */
110static inline unsigned long __reverse_ffs(unsigned long word)
111{
112 int num = 0;
113
114#if BITS_PER_LONG == 64
115 if ((word & 0xffffffff00000000UL) == 0)
116 num += 32;
117 else
118 word >>= 32;
119#endif
120 if ((word & 0xffff0000) == 0)
121 num += 16;
122 else
123 word >>= 16;
124
125 if ((word & 0xff00) == 0)
126 num += 8;
127 else
128 word >>= 8;
129
130 if ((word & 0xf0) == 0)
131 num += 4;
132 else
133 word >>= 4;
134
135 if ((word & 0xc) == 0)
136 num += 2;
137 else
138 word >>= 2;
139
140 if ((word & 0x2) == 0)
141 num += 1;
142 return num;
143}
144
145/*
146 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
147 * f2fs_set_bit makes MSB and LSB reversed in a byte.
Fan Lia161b2a2015-11-12 08:43:04 +0800148 * @size must be integral times of unsigned long.
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800149 * Example:
150 * MSB <--> LSB
151 * f2fs_set_bit(0, bitmap) => 1000 0000
152 * f2fs_set_bit(7, bitmap) => 0000 0001
153 */
154static unsigned long __find_rev_next_bit(const unsigned long *addr,
155 unsigned long size, unsigned long offset)
156{
157 const unsigned long *p = addr + BIT_WORD(offset);
Fan Lia161b2a2015-11-12 08:43:04 +0800158 unsigned long result = size;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800159 unsigned long tmp;
160
161 if (offset >= size)
162 return size;
163
Fan Lia161b2a2015-11-12 08:43:04 +0800164 size -= (offset & ~(BITS_PER_LONG - 1));
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800165 offset %= BITS_PER_LONG;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800166
Fan Lia161b2a2015-11-12 08:43:04 +0800167 while (1) {
168 if (*p == 0)
169 goto pass;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800170
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800171 tmp = __reverse_ulong((unsigned char *)p);
Fan Lia161b2a2015-11-12 08:43:04 +0800172
173 tmp &= ~0UL >> offset;
174 if (size < BITS_PER_LONG)
175 tmp &= (~0UL << (BITS_PER_LONG - size));
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800176 if (tmp)
Fan Lia161b2a2015-11-12 08:43:04 +0800177 goto found;
178pass:
179 if (size <= BITS_PER_LONG)
180 break;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800181 size -= BITS_PER_LONG;
Fan Lia161b2a2015-11-12 08:43:04 +0800182 offset = 0;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800183 p++;
184 }
Fan Lia161b2a2015-11-12 08:43:04 +0800185 return result;
186found:
187 return result - size + __reverse_ffs(tmp);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800188}
189
190static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
191 unsigned long size, unsigned long offset)
192{
193 const unsigned long *p = addr + BIT_WORD(offset);
Jaegeuk Kim664a0422015-12-04 16:51:13 -0800194 unsigned long result = size;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800195 unsigned long tmp;
196
197 if (offset >= size)
198 return size;
199
Jaegeuk Kim664a0422015-12-04 16:51:13 -0800200 size -= (offset & ~(BITS_PER_LONG - 1));
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800201 offset %= BITS_PER_LONG;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800202
Jaegeuk Kim664a0422015-12-04 16:51:13 -0800203 while (1) {
204 if (*p == ~0UL)
205 goto pass;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800206
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800207 tmp = __reverse_ulong((unsigned char *)p);
Jaegeuk Kim664a0422015-12-04 16:51:13 -0800208
209 if (offset)
210 tmp |= ~0UL << (BITS_PER_LONG - offset);
211 if (size < BITS_PER_LONG)
212 tmp |= ~0UL >> size;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800213 if (tmp != ~0UL)
Jaegeuk Kim664a0422015-12-04 16:51:13 -0800214 goto found;
215pass:
216 if (size <= BITS_PER_LONG)
217 break;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800218 size -= BITS_PER_LONG;
Jaegeuk Kim664a0422015-12-04 16:51:13 -0800219 offset = 0;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800220 p++;
221 }
Jaegeuk Kim664a0422015-12-04 16:51:13 -0800222 return result;
223found:
224 return result - size + __reverse_ffz(tmp);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800225}
226
227void register_inmem_page(struct inode *inode, struct page *page)
228{
229 struct f2fs_inode_info *fi = F2FS_I(inode);
230 struct inmem_pages *new;
231
232 f2fs_trace_pid(page);
233
234 set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
235 SetPagePrivate(page);
236
237 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
238
239 /* add atomic page indices to the list */
240 new->page = page;
241 INIT_LIST_HEAD(&new->list);
242
243 /* increase reference count with clean state */
244 mutex_lock(&fi->inmem_lock);
245 get_page(page);
246 list_add_tail(&new->list, &fi->inmem_pages);
247 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
248 mutex_unlock(&fi->inmem_lock);
249
250 trace_f2fs_register_inmem_page(page, INMEM);
251}
252
Chao Yu7cc3c202016-02-06 14:40:34 +0800253static int __revoke_inmem_pages(struct inode *inode,
254 struct list_head *head, bool drop, bool recover)
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800255{
Chao Yu7cc3c202016-02-06 14:40:34 +0800256 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800257 struct inmem_pages *cur, *tmp;
Chao Yu7cc3c202016-02-06 14:40:34 +0800258 int err = 0;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800259
Chao Yub99c7432016-02-06 14:38:29 +0800260 list_for_each_entry_safe(cur, tmp, head, list) {
Chao Yu7cc3c202016-02-06 14:40:34 +0800261 struct page *page = cur->page;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800262
Chao Yu7cc3c202016-02-06 14:40:34 +0800263 if (drop)
264 trace_f2fs_commit_inmem_page(page, INMEM_DROP);
265
266 lock_page(page);
267
268 if (recover) {
269 struct dnode_of_data dn;
270 struct node_info ni;
271
272 trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
273
274 set_new_dnode(&dn, inode, NULL, NULL, 0);
275 if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) {
276 err = -EAGAIN;
277 goto next;
278 }
279 get_node_info(sbi, dn.nid, &ni);
280 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
281 cur->old_addr, ni.version, true, true);
282 f2fs_put_dnode(&dn);
283 }
284next:
Jaegeuk Kim32f1c182016-04-12 14:11:03 -0700285 /* we don't need to invalidate this in the sccessful status */
286 if (drop || recover)
287 ClearPageUptodate(page);
Chao Yu7cc3c202016-02-06 14:40:34 +0800288 set_page_private(page, 0);
Chao Yua32924f2016-04-29 20:13:36 +0800289 ClearPagePrivate(page);
Chao Yu7cc3c202016-02-06 14:40:34 +0800290 f2fs_put_page(page, 1);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800291
292 list_del(&cur->list);
293 kmem_cache_free(inmem_entry_slab, cur);
294 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
295 }
Chao Yu7cc3c202016-02-06 14:40:34 +0800296 return err;
Chao Yub99c7432016-02-06 14:38:29 +0800297}
298
299void drop_inmem_pages(struct inode *inode)
300{
301 struct f2fs_inode_info *fi = F2FS_I(inode);
302
Jaegeuk Kim5e3a5ba2016-05-20 10:13:22 -0700303 clear_inode_flag(inode, FI_ATOMIC_FILE);
Jaegeuk Kim7f8ac3f2016-04-11 13:15:10 -0700304
Chao Yub99c7432016-02-06 14:38:29 +0800305 mutex_lock(&fi->inmem_lock);
Chao Yu7cc3c202016-02-06 14:40:34 +0800306 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
Chao Yub99c7432016-02-06 14:38:29 +0800307 mutex_unlock(&fi->inmem_lock);
308}
309
Chao Yu7cc3c202016-02-06 14:40:34 +0800310static int __commit_inmem_pages(struct inode *inode,
311 struct list_head *revoke_list)
Chao Yub99c7432016-02-06 14:38:29 +0800312{
313 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
314 struct f2fs_inode_info *fi = F2FS_I(inode);
315 struct inmem_pages *cur, *tmp;
316 struct f2fs_io_info fio = {
317 .sbi = sbi,
318 .type = DATA,
319 .rw = WRITE_SYNC | REQ_PRIO,
320 .encrypted_page = NULL,
321 };
322 bool submit_bio = false;
323 int err = 0;
324
325 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
Chao Yu7cc3c202016-02-06 14:40:34 +0800326 struct page *page = cur->page;
327
328 lock_page(page);
329 if (page->mapping == inode->i_mapping) {
330 trace_f2fs_commit_inmem_page(page, INMEM);
331
332 set_page_dirty(page);
333 f2fs_wait_on_page_writeback(page, DATA, true);
334 if (clear_page_dirty_for_io(page))
Chao Yub99c7432016-02-06 14:38:29 +0800335 inode_dec_dirty_pages(inode);
Chao Yu7cc3c202016-02-06 14:40:34 +0800336
337 fio.page = page;
Chao Yub99c7432016-02-06 14:38:29 +0800338 err = do_write_data_page(&fio);
339 if (err) {
Chao Yu7cc3c202016-02-06 14:40:34 +0800340 unlock_page(page);
Chao Yub99c7432016-02-06 14:38:29 +0800341 break;
342 }
Chao Yu7cc3c202016-02-06 14:40:34 +0800343
344 /* record old blkaddr for revoking */
345 cur->old_addr = fio.old_blkaddr;
346
347 clear_cold_data(page);
Chao Yub99c7432016-02-06 14:38:29 +0800348 submit_bio = true;
349 }
Chao Yu7cc3c202016-02-06 14:40:34 +0800350 unlock_page(page);
351 list_move_tail(&cur->list, revoke_list);
Chao Yub99c7432016-02-06 14:38:29 +0800352 }
353
354 if (submit_bio)
355 f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
Chao Yu7cc3c202016-02-06 14:40:34 +0800356
357 if (!err)
358 __revoke_inmem_pages(inode, revoke_list, false, false);
359
Chao Yub99c7432016-02-06 14:38:29 +0800360 return err;
361}
362
363int commit_inmem_pages(struct inode *inode)
364{
365 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
366 struct f2fs_inode_info *fi = F2FS_I(inode);
Chao Yu7cc3c202016-02-06 14:40:34 +0800367 struct list_head revoke_list;
368 int err;
Chao Yub99c7432016-02-06 14:38:29 +0800369
Chao Yu7cc3c202016-02-06 14:40:34 +0800370 INIT_LIST_HEAD(&revoke_list);
Chao Yub99c7432016-02-06 14:38:29 +0800371 f2fs_balance_fs(sbi, true);
372 f2fs_lock_op(sbi);
373
374 mutex_lock(&fi->inmem_lock);
Chao Yu7cc3c202016-02-06 14:40:34 +0800375 err = __commit_inmem_pages(inode, &revoke_list);
376 if (err) {
377 int ret;
378 /*
379 * try to revoke all committed pages, but still we could fail
380 * due to no memory or other reason, if that happened, EAGAIN
381 * will be returned, which means in such case, transaction is
382 * already not integrity, caller should use journal to do the
383 * recovery or rewrite & commit last transaction. For other
384 * error number, revoking was done by filesystem itself.
385 */
386 ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
387 if (ret)
388 err = ret;
389
390 /* drop all uncommitted pages */
391 __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
392 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800393 mutex_unlock(&fi->inmem_lock);
394
Chao Yub99c7432016-02-06 14:38:29 +0800395 f2fs_unlock_op(sbi);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800396 return err;
397}
398
399/*
400 * This function balances dirty node and dentry pages.
401 * In addition, it controls garbage collection.
402 */
Jaegeuk Kimab405972016-01-07 14:15:04 -0800403void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800404{
Jaegeuk Kimab405972016-01-07 14:15:04 -0800405 if (!need)
406 return;
Jaegeuk Kimf8dad502016-06-02 15:24:24 -0700407
408 /* balance_fs_bg is able to be pending */
409 if (excess_cached_nats(sbi))
410 f2fs_balance_fs_bg(sbi);
411
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800412 /*
413 * We should do GC or end up with checkpoint, if there are so many dirty
414 * dir/node pages without enough free segments.
415 */
416 if (has_not_enough_free_secs(sbi, 0)) {
417 mutex_lock(&sbi->gc_mutex);
418 f2fs_gc(sbi, false);
419 }
420}
421
422void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
423{
424 /* try to shrink extent cache when there is no enough memory */
425 if (!available_free_memory(sbi, EXTENT_CACHE))
426 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
427
428 /* check the # of cached NAT entries */
429 if (!available_free_memory(sbi, NAT_ENTRIES))
430 try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
431
432 if (!available_free_memory(sbi, FREE_NIDS))
Jaegeuk Kimca758582016-06-16 16:41:49 -0700433 try_to_free_nids(sbi, MAX_FREE_NIDS);
434 else
435 build_free_nids(sbi);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800436
437 /* checkpoint is the only way to shrink partial cached entries */
438 if (!available_free_memory(sbi, NAT_ENTRIES) ||
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800439 !available_free_memory(sbi, INO_ENTRIES) ||
Chao Yu16e20ca2016-01-18 18:31:18 +0800440 excess_prefree_segs(sbi) ||
441 excess_dirty_nats(sbi) ||
Jaegeuk Kim2c88a922016-01-08 16:57:48 -0800442 (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
Jaegeuk Kima6550b42016-07-13 19:33:19 -0700443 if (test_opt(sbi, DATA_FLUSH)) {
444 struct blk_plug plug;
445
446 blk_start_plug(&plug);
Chao Yud48c5702015-12-17 17:13:28 +0800447 sync_dirty_inodes(sbi, FILE_INODE);
Jaegeuk Kima6550b42016-07-13 19:33:19 -0700448 blk_finish_plug(&plug);
449 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800450 f2fs_sync_fs(sbi->sb, true);
Jaegeuk Kim4c572292016-01-09 13:45:17 -0800451 stat_inc_bg_cp_count(sbi->stat_info);
Chao Yud48c5702015-12-17 17:13:28 +0800452 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800453}
454
455struct __submit_bio_ret {
456 struct completion event;
457 int error;
458};
459
460static void __submit_bio_wait_endio(struct bio *bio, int error)
461{
462 struct __submit_bio_ret *ret = bio->bi_private;
463
464 ret->error = error;
465 complete(&ret->event);
466}
467
468static int __submit_bio_wait(int rw, struct bio *bio)
469{
470 struct __submit_bio_ret ret;
471
472 rw |= REQ_SYNC;
473 init_completion(&ret.event);
474 bio->bi_private = &ret;
475 bio->bi_end_io = __submit_bio_wait_endio;
476 submit_bio(rw, bio);
477 wait_for_completion(&ret.event);
478
479 return ret.error;
480}
481
482static int issue_flush_thread(void *data)
483{
484 struct f2fs_sb_info *sbi = data;
485 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
486 wait_queue_head_t *q = &fcc->flush_wait_queue;
487repeat:
488 if (kthread_should_stop())
489 return 0;
490
491 if (!llist_empty(&fcc->issue_list)) {
492 struct bio *bio;
493 struct flush_cmd *cmd, *next;
494 int ret;
495
496 bio = f2fs_bio_alloc(0);
497
498 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
499 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
500
501 bio->bi_bdev = sbi->sb->s_bdev;
502 ret = __submit_bio_wait(WRITE_FLUSH, bio);
503
504 llist_for_each_entry_safe(cmd, next,
505 fcc->dispatch_list, llnode) {
506 cmd->ret = ret;
507 complete(&cmd->wait);
508 }
509 bio_put(bio);
510 fcc->dispatch_list = NULL;
511 }
512
513 wait_event_interruptible(*q,
514 kthread_should_stop() || !llist_empty(&fcc->issue_list));
515 goto repeat;
516}
517
518int f2fs_issue_flush(struct f2fs_sb_info *sbi)
519{
520 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
521 struct flush_cmd cmd;
522
523 trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
524 test_opt(sbi, FLUSH_MERGE));
525
526 if (test_opt(sbi, NOBARRIER))
527 return 0;
528
Jaegeuk Kim9a3d6c62016-05-23 12:04:56 -0700529 if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) {
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800530 struct bio *bio = f2fs_bio_alloc(0);
531 int ret;
532
Jaegeuk Kim9a3d6c62016-05-23 12:04:56 -0700533 atomic_inc(&fcc->submit_flush);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800534 bio->bi_bdev = sbi->sb->s_bdev;
535 ret = __submit_bio_wait(WRITE_FLUSH, bio);
Jaegeuk Kim9a3d6c62016-05-23 12:04:56 -0700536 atomic_dec(&fcc->submit_flush);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800537 bio_put(bio);
538 return ret;
539 }
540
541 init_completion(&cmd.wait);
542
Jaegeuk Kim9a3d6c62016-05-23 12:04:56 -0700543 atomic_inc(&fcc->submit_flush);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800544 llist_add(&cmd.llnode, &fcc->issue_list);
545
546 if (!fcc->dispatch_list)
547 wake_up(&fcc->flush_wait_queue);
548
549 wait_for_completion(&cmd.wait);
Jaegeuk Kim9a3d6c62016-05-23 12:04:56 -0700550 atomic_dec(&fcc->submit_flush);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800551
552 return cmd.ret;
553}
554
555int create_flush_cmd_control(struct f2fs_sb_info *sbi)
556{
557 dev_t dev = sbi->sb->s_bdev->bd_dev;
558 struct flush_cmd_control *fcc;
559 int err = 0;
560
561 fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
562 if (!fcc)
563 return -ENOMEM;
Jaegeuk Kim9a3d6c62016-05-23 12:04:56 -0700564 atomic_set(&fcc->submit_flush, 0);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800565 init_waitqueue_head(&fcc->flush_wait_queue);
566 init_llist_head(&fcc->issue_list);
567 SM_I(sbi)->cmd_control_info = fcc;
568 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
569 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
570 if (IS_ERR(fcc->f2fs_issue_flush)) {
571 err = PTR_ERR(fcc->f2fs_issue_flush);
572 kfree(fcc);
573 SM_I(sbi)->cmd_control_info = NULL;
574 return err;
575 }
576
577 return err;
578}
579
580void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
581{
582 struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
583
584 if (fcc && fcc->f2fs_issue_flush)
585 kthread_stop(fcc->f2fs_issue_flush);
586 kfree(fcc);
587 SM_I(sbi)->cmd_control_info = NULL;
588}
589
590static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
591 enum dirty_type dirty_type)
592{
593 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
594
595 /* need not be added */
596 if (IS_CURSEG(sbi, segno))
597 return;
598
599 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
600 dirty_i->nr_dirty[dirty_type]++;
601
602 if (dirty_type == DIRTY) {
603 struct seg_entry *sentry = get_seg_entry(sbi, segno);
604 enum dirty_type t = sentry->type;
605
606 if (unlikely(t >= DIRTY)) {
607 f2fs_bug_on(sbi, 1);
608 return;
609 }
610 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
611 dirty_i->nr_dirty[t]++;
612 }
613}
614
615static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
616 enum dirty_type dirty_type)
617{
618 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
619
620 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
621 dirty_i->nr_dirty[dirty_type]--;
622
623 if (dirty_type == DIRTY) {
624 struct seg_entry *sentry = get_seg_entry(sbi, segno);
625 enum dirty_type t = sentry->type;
626
627 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
628 dirty_i->nr_dirty[t]--;
629
630 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
631 clear_bit(GET_SECNO(sbi, segno),
632 dirty_i->victim_secmap);
633 }
634}
635
636/*
637 * Should not occur error such as -ENOMEM.
638 * Adding dirty entry into seglist is not critical operation.
639 * If a given segment is one of current working segments, it won't be added.
640 */
641static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
642{
643 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
644 unsigned short valid_blocks;
645
646 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
647 return;
648
649 mutex_lock(&dirty_i->seglist_lock);
650
651 valid_blocks = get_valid_blocks(sbi, segno, 0);
652
653 if (valid_blocks == 0) {
654 __locate_dirty_segment(sbi, segno, PRE);
655 __remove_dirty_segment(sbi, segno, DIRTY);
656 } else if (valid_blocks < sbi->blocks_per_seg) {
657 __locate_dirty_segment(sbi, segno, DIRTY);
658 } else {
659 /* Recovery routine with SSR needs this */
660 __remove_dirty_segment(sbi, segno, DIRTY);
661 }
662
663 mutex_unlock(&dirty_i->seglist_lock);
664}
665
666static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
667 block_t blkstart, block_t blklen)
668{
669 sector_t start = SECTOR_FROM_BLOCK(blkstart);
670 sector_t len = SECTOR_FROM_BLOCK(blklen);
671 struct seg_entry *se;
672 unsigned int offset;
673 block_t i;
674
675 for (i = blkstart; i < blkstart + blklen; i++) {
676 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
677 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
678
679 if (!f2fs_test_and_set_bit(offset, se->discard_map))
680 sbi->discard_blks--;
681 }
682 trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
683 return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
684}
685
686bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
687{
Jaegeuk Kimd334aeb2016-02-09 10:24:31 -0800688 int err = -EOPNOTSUPP;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800689
690 if (test_opt(sbi, DISCARD)) {
691 struct seg_entry *se = get_seg_entry(sbi,
692 GET_SEGNO(sbi, blkaddr));
693 unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
694
695 if (f2fs_test_bit(offset, se->discard_map))
696 return false;
697
698 err = f2fs_issue_discard(sbi, blkaddr, 1);
699 }
700
701 if (err) {
702 update_meta_page(sbi, NULL, blkaddr);
703 return true;
704 }
705 return false;
706}
707
708static void __add_discard_entry(struct f2fs_sb_info *sbi,
709 struct cp_control *cpc, struct seg_entry *se,
710 unsigned int start, unsigned int end)
711{
712 struct list_head *head = &SM_I(sbi)->discard_list;
713 struct discard_entry *new, *last;
714
715 if (!list_empty(head)) {
716 last = list_last_entry(head, struct discard_entry, list);
717 if (START_BLOCK(sbi, cpc->trim_start) + start ==
718 last->blkaddr + last->len) {
719 last->len += end - start;
720 goto done;
721 }
722 }
723
724 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
725 INIT_LIST_HEAD(&new->list);
726 new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
727 new->len = end - start;
728 list_add_tail(&new->list, head);
729done:
730 SM_I(sbi)->nr_discards += end - start;
731}
732
733static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
734{
735 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
736 int max_blocks = sbi->blocks_per_seg;
737 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
738 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
739 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
740 unsigned long *discard_map = (unsigned long *)se->discard_map;
741 unsigned long *dmap = SIT_I(sbi)->tmp_map;
742 unsigned int start = 0, end = -1;
743 bool force = (cpc->reason == CP_DISCARD);
744 int i;
745
746 if (se->valid_blocks == max_blocks)
747 return;
748
749 if (!force) {
750 if (!test_opt(sbi, DISCARD) || !se->valid_blocks ||
751 SM_I(sbi)->nr_discards >= SM_I(sbi)->max_discards)
752 return;
753 }
754
755 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
756 for (i = 0; i < entries; i++)
757 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
758 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
759
760 while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
761 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
762 if (start >= max_blocks)
763 break;
764
765 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
Yunlei He1c4f2242016-07-07 12:13:33 +0800766 if (force && start && end != max_blocks
767 && (end - start) < cpc->trim_minlen)
768 continue;
769
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800770 __add_discard_entry(sbi, cpc, se, start, end);
771 }
772}
773
774void release_discard_addrs(struct f2fs_sb_info *sbi)
775{
776 struct list_head *head = &(SM_I(sbi)->discard_list);
777 struct discard_entry *entry, *this;
778
779 /* drop caches */
780 list_for_each_entry_safe(entry, this, head, list) {
781 list_del(&entry->list);
782 kmem_cache_free(discard_entry_slab, entry);
783 }
784}
785
786/*
787 * Should call clear_prefree_segments after checkpoint is done.
788 */
789static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
790{
791 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
792 unsigned int segno;
793
794 mutex_lock(&dirty_i->seglist_lock);
795 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
796 __set_test_and_free(sbi, segno);
797 mutex_unlock(&dirty_i->seglist_lock);
798}
799
800void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc)
801{
802 struct list_head *head = &(SM_I(sbi)->discard_list);
803 struct discard_entry *entry, *this;
804 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
805 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
806 unsigned int start = 0, end = -1;
Jaegeuk Kimb017edd2016-06-03 19:29:38 -0700807 unsigned int secno, start_segno;
Chao Yu69777222016-07-07 22:46:55 +0800808 bool force = (cpc->reason == CP_DISCARD);
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800809
810 mutex_lock(&dirty_i->seglist_lock);
811
812 while (1) {
813 int i;
814 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
815 if (start >= MAIN_SEGS(sbi))
816 break;
817 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
818 start + 1);
819
820 for (i = start; i < end; i++)
821 clear_bit(i, prefree_map);
822
823 dirty_i->nr_dirty[PRE] -= end - start;
824
Chao Yu69777222016-07-07 22:46:55 +0800825 if (force || !test_opt(sbi, DISCARD))
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800826 continue;
827
Jaegeuk Kimb017edd2016-06-03 19:29:38 -0700828 if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
829 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800830 (end - start) << sbi->log_blocks_per_seg);
Jaegeuk Kimb017edd2016-06-03 19:29:38 -0700831 continue;
832 }
833next:
834 secno = GET_SECNO(sbi, start);
835 start_segno = secno * sbi->segs_per_sec;
836 if (!IS_CURSEC(sbi, secno) &&
837 !get_valid_blocks(sbi, start, sbi->segs_per_sec))
838 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
839 sbi->segs_per_sec << sbi->log_blocks_per_seg);
840
841 start = start_segno + sbi->segs_per_sec;
842 if (start < end)
843 goto next;
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800844 }
845 mutex_unlock(&dirty_i->seglist_lock);
846
847 /* send small discards */
848 list_for_each_entry_safe(entry, this, head, list) {
Chao Yu69777222016-07-07 22:46:55 +0800849 if (force && entry->len < cpc->trim_minlen)
Jaegeuk Kim315f4552015-11-29 09:25:08 -0800850 goto skip;
851 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
852 cpc->trimmed += entry->len;
853skip:
854 list_del(&entry->list);
855 SM_I(sbi)->nr_discards -= entry->len;
856 kmem_cache_free(discard_entry_slab, entry);
857 }
858}
859
860static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
861{
862 struct sit_info *sit_i = SIT_I(sbi);
863
864 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
865 sit_i->dirty_sentries++;
866 return false;
867 }
868
869 return true;
870}
871
872static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
873 unsigned int segno, int modified)
874{
875 struct seg_entry *se = get_seg_entry(sbi, segno);
876 se->type = type;
877 if (modified)
878 __mark_sit_entry_dirty(sbi, segno);
879}
880
881static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
882{
883 struct seg_entry *se;
884 unsigned int segno, offset;
885 long int new_vblocks;
886
887 segno = GET_SEGNO(sbi, blkaddr);
888
889 se = get_seg_entry(sbi, segno);
890 new_vblocks = se->valid_blocks + del;
891 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
892
893 f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
894 (new_vblocks > sbi->blocks_per_seg)));
895
896 se->valid_blocks = new_vblocks;
897 se->mtime = get_mtime(sbi);
898 SIT_I(sbi)->max_mtime = se->mtime;
899
900 /* Update valid block bitmap */
901 if (del > 0) {
902 if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
903 f2fs_bug_on(sbi, 1);
904 if (!f2fs_test_and_set_bit(offset, se->discard_map))
905 sbi->discard_blks--;
906 } else {
907 if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
908 f2fs_bug_on(sbi, 1);
909 if (f2fs_test_and_clear_bit(offset, se->discard_map))
910 sbi->discard_blks++;
911 }
912 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
913 se->ckpt_valid_blocks += del;
914
915 __mark_sit_entry_dirty(sbi, segno);
916
917 /* update total number of valid blocks to be written in ckpt area */
918 SIT_I(sbi)->written_valid_blocks += del;
919
920 if (sbi->segs_per_sec > 1)
921 get_sec_entry(sbi, segno)->valid_blocks += del;
922}
923
924void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
925{
926 update_sit_entry(sbi, new, 1);
927 if (GET_SEGNO(sbi, old) != NULL_SEGNO)
928 update_sit_entry(sbi, old, -1);
929
930 locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
931 locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
932}
933
934void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
935{
936 unsigned int segno = GET_SEGNO(sbi, addr);
937 struct sit_info *sit_i = SIT_I(sbi);
938
939 f2fs_bug_on(sbi, addr == NULL_ADDR);
940 if (addr == NEW_ADDR)
941 return;
942
943 /* add it into sit main buffer */
944 mutex_lock(&sit_i->sentry_lock);
945
946 update_sit_entry(sbi, addr, -1);
947
948 /* add it into dirty seglist */
949 locate_dirty_segment(sbi, segno);
950
951 mutex_unlock(&sit_i->sentry_lock);
952}
953
954bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
955{
956 struct sit_info *sit_i = SIT_I(sbi);
957 unsigned int segno, offset;
958 struct seg_entry *se;
959 bool is_cp = false;
960
961 if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
962 return true;
963
964 mutex_lock(&sit_i->sentry_lock);
965
966 segno = GET_SEGNO(sbi, blkaddr);
967 se = get_seg_entry(sbi, segno);
968 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
969
970 if (f2fs_test_bit(offset, se->ckpt_valid_map))
971 is_cp = true;
972
973 mutex_unlock(&sit_i->sentry_lock);
974
975 return is_cp;
976}
977
978/*
979 * This function should be resided under the curseg_mutex lock
980 */
981static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
982 struct f2fs_summary *sum)
983{
984 struct curseg_info *curseg = CURSEG_I(sbi, type);
985 void *addr = curseg->sum_blk;
986 addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
987 memcpy(addr, sum, sizeof(struct f2fs_summary));
988}
989
990/*
991 * Calculate the number of current summary pages for writing
992 */
993int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
994{
995 int valid_sum_count = 0;
996 int i, sum_in_page;
997
998 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
999 if (sbi->ckpt->alloc_type[i] == SSR)
1000 valid_sum_count += sbi->blocks_per_seg;
1001 else {
1002 if (for_ra)
1003 valid_sum_count += le16_to_cpu(
1004 F2FS_CKPT(sbi)->cur_data_blkoff[i]);
1005 else
1006 valid_sum_count += curseg_blkoff(sbi, i);
1007 }
1008 }
1009
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -07001010 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001011 SUM_FOOTER_SIZE) / SUMMARY_SIZE;
1012 if (valid_sum_count <= sum_in_page)
1013 return 1;
1014 else if ((valid_sum_count - sum_in_page) <=
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -07001015 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001016 return 2;
1017 return 3;
1018}
1019
1020/*
1021 * Caller should put this summary page
1022 */
1023struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
1024{
1025 return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
1026}
1027
1028void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
1029{
1030 struct page *page = grab_meta_page(sbi, blk_addr);
1031 void *dst = page_address(page);
1032
1033 if (src)
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -07001034 memcpy(dst, src, PAGE_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001035 else
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -07001036 memset(dst, 0, PAGE_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001037 set_page_dirty(page);
1038 f2fs_put_page(page, 1);
1039}
1040
1041static void write_sum_page(struct f2fs_sb_info *sbi,
1042 struct f2fs_summary_block *sum_blk, block_t blk_addr)
1043{
1044 update_meta_page(sbi, (void *)sum_blk, blk_addr);
1045}
1046
Chao Yufac2b092016-02-19 18:08:46 +08001047static void write_current_sum_page(struct f2fs_sb_info *sbi,
1048 int type, block_t blk_addr)
1049{
1050 struct curseg_info *curseg = CURSEG_I(sbi, type);
1051 struct page *page = grab_meta_page(sbi, blk_addr);
1052 struct f2fs_summary_block *src = curseg->sum_blk;
1053 struct f2fs_summary_block *dst;
1054
1055 dst = (struct f2fs_summary_block *)page_address(page);
1056
1057 mutex_lock(&curseg->curseg_mutex);
1058
1059 down_read(&curseg->journal_rwsem);
1060 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
1061 up_read(&curseg->journal_rwsem);
1062
1063 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
1064 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
1065
1066 mutex_unlock(&curseg->curseg_mutex);
1067
1068 set_page_dirty(page);
1069 f2fs_put_page(page, 1);
1070}
1071
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001072static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
1073{
1074 struct curseg_info *curseg = CURSEG_I(sbi, type);
1075 unsigned int segno = curseg->segno + 1;
1076 struct free_segmap_info *free_i = FREE_I(sbi);
1077
1078 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
1079 return !test_bit(segno, free_i->free_segmap);
1080 return 0;
1081}
1082
1083/*
1084 * Find a new segment from the free segments bitmap to right order
1085 * This function should be returned with success, otherwise BUG
1086 */
1087static void get_new_segment(struct f2fs_sb_info *sbi,
1088 unsigned int *newseg, bool new_sec, int dir)
1089{
1090 struct free_segmap_info *free_i = FREE_I(sbi);
1091 unsigned int segno, secno, zoneno;
1092 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
1093 unsigned int hint = *newseg / sbi->segs_per_sec;
1094 unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
1095 unsigned int left_start = hint;
1096 bool init = true;
1097 int go_left = 0;
1098 int i;
1099
1100 spin_lock(&free_i->segmap_lock);
1101
1102 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
1103 segno = find_next_zero_bit(free_i->free_segmap,
Chao Yu693163d2016-01-22 17:42:06 +08001104 (hint + 1) * sbi->segs_per_sec, *newseg + 1);
1105 if (segno < (hint + 1) * sbi->segs_per_sec)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001106 goto got_it;
1107 }
1108find_other_zone:
1109 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
1110 if (secno >= MAIN_SECS(sbi)) {
1111 if (dir == ALLOC_RIGHT) {
1112 secno = find_next_zero_bit(free_i->free_secmap,
1113 MAIN_SECS(sbi), 0);
1114 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
1115 } else {
1116 go_left = 1;
1117 left_start = hint - 1;
1118 }
1119 }
1120 if (go_left == 0)
1121 goto skip_left;
1122
1123 while (test_bit(left_start, free_i->free_secmap)) {
1124 if (left_start > 0) {
1125 left_start--;
1126 continue;
1127 }
1128 left_start = find_next_zero_bit(free_i->free_secmap,
1129 MAIN_SECS(sbi), 0);
1130 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
1131 break;
1132 }
1133 secno = left_start;
1134skip_left:
1135 hint = secno;
1136 segno = secno * sbi->segs_per_sec;
1137 zoneno = secno / sbi->secs_per_zone;
1138
1139 /* give up on finding another zone */
1140 if (!init)
1141 goto got_it;
1142 if (sbi->secs_per_zone == 1)
1143 goto got_it;
1144 if (zoneno == old_zoneno)
1145 goto got_it;
1146 if (dir == ALLOC_LEFT) {
1147 if (!go_left && zoneno + 1 >= total_zones)
1148 goto got_it;
1149 if (go_left && zoneno == 0)
1150 goto got_it;
1151 }
1152 for (i = 0; i < NR_CURSEG_TYPE; i++)
1153 if (CURSEG_I(sbi, i)->zone == zoneno)
1154 break;
1155
1156 if (i < NR_CURSEG_TYPE) {
1157 /* zone is in user, try another */
1158 if (go_left)
1159 hint = zoneno * sbi->secs_per_zone - 1;
1160 else if (zoneno + 1 >= total_zones)
1161 hint = 0;
1162 else
1163 hint = (zoneno + 1) * sbi->secs_per_zone;
1164 init = false;
1165 goto find_other_zone;
1166 }
1167got_it:
1168 /* set it as dirty segment in free segmap */
1169 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
1170 __set_inuse(sbi, segno);
1171 *newseg = segno;
1172 spin_unlock(&free_i->segmap_lock);
1173}
1174
1175static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
1176{
1177 struct curseg_info *curseg = CURSEG_I(sbi, type);
1178 struct summary_footer *sum_footer;
1179
1180 curseg->segno = curseg->next_segno;
1181 curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
1182 curseg->next_blkoff = 0;
1183 curseg->next_segno = NULL_SEGNO;
1184
1185 sum_footer = &(curseg->sum_blk->footer);
1186 memset(sum_footer, 0, sizeof(struct summary_footer));
1187 if (IS_DATASEG(type))
1188 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
1189 if (IS_NODESEG(type))
1190 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
1191 __set_sit_entry_type(sbi, type, curseg->segno, modified);
1192}
1193
1194/*
1195 * Allocate a current working segment.
1196 * This function always allocates a free segment in LFS manner.
1197 */
1198static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
1199{
1200 struct curseg_info *curseg = CURSEG_I(sbi, type);
1201 unsigned int segno = curseg->segno;
1202 int dir = ALLOC_LEFT;
1203
1204 write_sum_page(sbi, curseg->sum_blk,
1205 GET_SUM_BLOCK(sbi, segno));
1206 if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
1207 dir = ALLOC_RIGHT;
1208
1209 if (test_opt(sbi, NOHEAP))
1210 dir = ALLOC_RIGHT;
1211
1212 get_new_segment(sbi, &segno, new_sec, dir);
1213 curseg->next_segno = segno;
1214 reset_curseg(sbi, type, 1);
1215 curseg->alloc_type = LFS;
1216}
1217
1218static void __next_free_blkoff(struct f2fs_sb_info *sbi,
1219 struct curseg_info *seg, block_t start)
1220{
1221 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
1222 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1223 unsigned long *target_map = SIT_I(sbi)->tmp_map;
1224 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1225 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1226 int i, pos;
1227
1228 for (i = 0; i < entries; i++)
1229 target_map[i] = ckpt_map[i] | cur_map[i];
1230
1231 pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
1232
1233 seg->next_blkoff = pos;
1234}
1235
1236/*
1237 * If a segment is written by LFS manner, next block offset is just obtained
1238 * by increasing the current block offset. However, if a segment is written by
1239 * SSR manner, next block offset obtained by calling __next_free_blkoff
1240 */
1241static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
1242 struct curseg_info *seg)
1243{
1244 if (seg->alloc_type == SSR)
1245 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
1246 else
1247 seg->next_blkoff++;
1248}
1249
1250/*
1251 * This function always allocates a used segment(from dirty seglist) by SSR
1252 * manner, so it should recover the existing segment information of valid blocks
1253 */
1254static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
1255{
1256 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1257 struct curseg_info *curseg = CURSEG_I(sbi, type);
1258 unsigned int new_segno = curseg->next_segno;
1259 struct f2fs_summary_block *sum_node;
1260 struct page *sum_page;
1261
1262 write_sum_page(sbi, curseg->sum_blk,
1263 GET_SUM_BLOCK(sbi, curseg->segno));
1264 __set_test_and_inuse(sbi, new_segno);
1265
1266 mutex_lock(&dirty_i->seglist_lock);
1267 __remove_dirty_segment(sbi, new_segno, PRE);
1268 __remove_dirty_segment(sbi, new_segno, DIRTY);
1269 mutex_unlock(&dirty_i->seglist_lock);
1270
1271 reset_curseg(sbi, type, 1);
1272 curseg->alloc_type = SSR;
1273 __next_free_blkoff(sbi, curseg, 0);
1274
1275 if (reuse) {
1276 sum_page = get_sum_page(sbi, new_segno);
1277 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
1278 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
1279 f2fs_put_page(sum_page, 1);
1280 }
1281}
1282
1283static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
1284{
1285 struct curseg_info *curseg = CURSEG_I(sbi, type);
1286 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
1287
1288 if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
1289 return v_ops->get_victim(sbi,
1290 &(curseg)->next_segno, BG_GC, type, SSR);
1291
1292 /* For data segments, let's do SSR more intensively */
1293 for (; type >= CURSEG_HOT_DATA; type--)
1294 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
1295 BG_GC, type, SSR))
1296 return 1;
1297 return 0;
1298}
1299
1300/*
1301 * flush out current segment and replace it with new segment
1302 * This function should be returned with success, otherwise BUG
1303 */
1304static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
1305 int type, bool force)
1306{
1307 struct curseg_info *curseg = CURSEG_I(sbi, type);
1308
1309 if (force)
1310 new_curseg(sbi, type, true);
1311 else if (type == CURSEG_WARM_NODE)
1312 new_curseg(sbi, type, false);
1313 else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
1314 new_curseg(sbi, type, false);
1315 else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
1316 change_curseg(sbi, type, true);
1317 else
1318 new_curseg(sbi, type, false);
1319
1320 stat_inc_seg_type(sbi, curseg);
1321}
1322
1323static void __allocate_new_segments(struct f2fs_sb_info *sbi, int type)
1324{
1325 struct curseg_info *curseg = CURSEG_I(sbi, type);
1326 unsigned int old_segno;
1327
1328 old_segno = curseg->segno;
1329 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
1330 locate_dirty_segment(sbi, old_segno);
1331}
1332
1333void allocate_new_segments(struct f2fs_sb_info *sbi)
1334{
1335 int i;
1336
Jaegeuk Kimb017edd2016-06-03 19:29:38 -07001337 if (test_opt(sbi, LFS))
1338 return;
1339
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001340 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
1341 __allocate_new_segments(sbi, i);
1342}
1343
1344static const struct segment_allocation default_salloc_ops = {
1345 .allocate_segment = allocate_segment_by_default,
1346};
1347
1348int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
1349{
1350 __u64 start = F2FS_BYTES_TO_BLK(range->start);
1351 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
1352 unsigned int start_segno, end_segno;
1353 struct cp_control cpc;
Chao Yub962dc12015-12-23 17:50:30 +08001354 int err = 0;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001355
1356 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
1357 return -EINVAL;
1358
1359 cpc.trimmed = 0;
1360 if (end <= MAIN_BLKADDR(sbi))
1361 goto out;
1362
1363 /* start/end segment number in main_area */
1364 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
1365 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
1366 GET_SEGNO(sbi, end);
1367 cpc.reason = CP_DISCARD;
1368 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
1369
1370 /* do checkpoint to issue discard commands safely */
1371 for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
1372 cpc.trim_start = start_segno;
1373
1374 if (sbi->discard_blks == 0)
1375 break;
1376 else if (sbi->discard_blks < BATCHED_TRIM_BLOCKS(sbi))
1377 cpc.trim_end = end_segno;
1378 else
1379 cpc.trim_end = min_t(unsigned int,
1380 rounddown(start_segno +
1381 BATCHED_TRIM_SEGMENTS(sbi),
1382 sbi->segs_per_sec) - 1, end_segno);
1383
1384 mutex_lock(&sbi->gc_mutex);
Chao Yub962dc12015-12-23 17:50:30 +08001385 err = write_checkpoint(sbi, &cpc);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001386 mutex_unlock(&sbi->gc_mutex);
1387 }
1388out:
1389 range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
Chao Yub962dc12015-12-23 17:50:30 +08001390 return err;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001391}
1392
1393static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1394{
1395 struct curseg_info *curseg = CURSEG_I(sbi, type);
1396 if (curseg->next_blkoff < sbi->blocks_per_seg)
1397 return true;
1398 return false;
1399}
1400
1401static int __get_segment_type_2(struct page *page, enum page_type p_type)
1402{
1403 if (p_type == DATA)
1404 return CURSEG_HOT_DATA;
1405 else
1406 return CURSEG_HOT_NODE;
1407}
1408
1409static int __get_segment_type_4(struct page *page, enum page_type p_type)
1410{
1411 if (p_type == DATA) {
1412 struct inode *inode = page->mapping->host;
1413
1414 if (S_ISDIR(inode->i_mode))
1415 return CURSEG_HOT_DATA;
1416 else
1417 return CURSEG_COLD_DATA;
1418 } else {
1419 if (IS_DNODE(page) && is_cold_node(page))
1420 return CURSEG_WARM_NODE;
1421 else
1422 return CURSEG_COLD_NODE;
1423 }
1424}
1425
1426static int __get_segment_type_6(struct page *page, enum page_type p_type)
1427{
1428 if (p_type == DATA) {
1429 struct inode *inode = page->mapping->host;
1430
1431 if (S_ISDIR(inode->i_mode))
1432 return CURSEG_HOT_DATA;
1433 else if (is_cold_data(page) || file_is_cold(inode))
1434 return CURSEG_COLD_DATA;
1435 else
1436 return CURSEG_WARM_DATA;
1437 } else {
1438 if (IS_DNODE(page))
1439 return is_cold_node(page) ? CURSEG_WARM_NODE :
1440 CURSEG_HOT_NODE;
1441 else
1442 return CURSEG_COLD_NODE;
1443 }
1444}
1445
1446static int __get_segment_type(struct page *page, enum page_type p_type)
1447{
1448 switch (F2FS_P_SB(page)->active_logs) {
1449 case 2:
1450 return __get_segment_type_2(page, p_type);
1451 case 4:
1452 return __get_segment_type_4(page, p_type);
1453 }
1454 /* NR_CURSEG_TYPE(6) logs by default */
1455 f2fs_bug_on(F2FS_P_SB(page),
1456 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1457 return __get_segment_type_6(page, p_type);
1458}
1459
1460void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1461 block_t old_blkaddr, block_t *new_blkaddr,
1462 struct f2fs_summary *sum, int type)
1463{
1464 struct sit_info *sit_i = SIT_I(sbi);
1465 struct curseg_info *curseg;
1466 bool direct_io = (type == CURSEG_DIRECT_IO);
1467
1468 type = direct_io ? CURSEG_WARM_DATA : type;
1469
1470 curseg = CURSEG_I(sbi, type);
1471
1472 mutex_lock(&curseg->curseg_mutex);
1473 mutex_lock(&sit_i->sentry_lock);
1474
1475 /* direct_io'ed data is aligned to the segment for better performance */
1476 if (direct_io && curseg->next_blkoff &&
1477 !has_not_enough_free_secs(sbi, 0))
1478 __allocate_new_segments(sbi, type);
1479
1480 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1481
1482 /*
1483 * __add_sum_entry should be resided under the curseg_mutex
1484 * because, this function updates a summary entry in the
1485 * current summary block.
1486 */
1487 __add_sum_entry(sbi, type, sum);
1488
1489 __refresh_next_blkoff(sbi, curseg);
1490
1491 stat_inc_block_count(sbi, curseg);
1492
1493 if (!__has_curseg_space(sbi, type))
1494 sit_i->s_ops->allocate_segment(sbi, type, false);
1495 /*
1496 * SIT information should be updated before segment allocation,
1497 * since SSR needs latest valid block information.
1498 */
1499 refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1500
1501 mutex_unlock(&sit_i->sentry_lock);
1502
1503 if (page && IS_NODESEG(type))
1504 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1505
1506 mutex_unlock(&curseg->curseg_mutex);
1507}
1508
1509static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
1510{
1511 int type = __get_segment_type(fio->page, fio->type);
1512
Jaegeuk Kimcde8b232016-06-04 14:21:28 -07001513 if (fio->type == NODE || fio->type == DATA)
1514 mutex_lock(&fio->sbi->wio_mutex[fio->type]);
1515
Chao Yua83a52c2016-02-22 18:36:38 +08001516 allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
1517 &fio->new_blkaddr, sum, type);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001518
1519 /* writeout dirty page into bdev */
1520 f2fs_submit_page_mbio(fio);
Jaegeuk Kimcde8b232016-06-04 14:21:28 -07001521
1522 if (fio->type == NODE || fio->type == DATA)
1523 mutex_unlock(&fio->sbi->wio_mutex[fio->type]);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001524}
1525
1526void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1527{
1528 struct f2fs_io_info fio = {
1529 .sbi = sbi,
1530 .type = META,
1531 .rw = WRITE_SYNC | REQ_META | REQ_PRIO,
Chao Yua83a52c2016-02-22 18:36:38 +08001532 .old_blkaddr = page->index,
1533 .new_blkaddr = page->index,
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001534 .page = page,
1535 .encrypted_page = NULL,
1536 };
1537
1538 if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
1539 fio.rw &= ~REQ_META;
1540
1541 set_page_writeback(page);
1542 f2fs_submit_page_mbio(&fio);
1543}
1544
1545void write_node_page(unsigned int nid, struct f2fs_io_info *fio)
1546{
1547 struct f2fs_summary sum;
1548
1549 set_summary(&sum, nid, 0, 0);
1550 do_write_page(&sum, fio);
1551}
1552
1553void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
1554{
1555 struct f2fs_sb_info *sbi = fio->sbi;
1556 struct f2fs_summary sum;
1557 struct node_info ni;
1558
1559 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1560 get_node_info(sbi, dn->nid, &ni);
1561 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1562 do_write_page(&sum, fio);
Chao Yu4f686ae2016-02-24 17:16:47 +08001563 f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001564}
1565
1566void rewrite_data_page(struct f2fs_io_info *fio)
1567{
Chao Yua83a52c2016-02-22 18:36:38 +08001568 fio->new_blkaddr = fio->old_blkaddr;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001569 stat_inc_inplace_blocks(fio->sbi);
1570 f2fs_submit_page_mbio(fio);
1571}
1572
Chao Yua34694c2016-02-23 17:52:43 +08001573void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001574 block_t old_blkaddr, block_t new_blkaddr,
Chao Yu7cc3c202016-02-06 14:40:34 +08001575 bool recover_curseg, bool recover_newaddr)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001576{
1577 struct sit_info *sit_i = SIT_I(sbi);
1578 struct curseg_info *curseg;
1579 unsigned int segno, old_cursegno;
1580 struct seg_entry *se;
1581 int type;
1582 unsigned short old_blkoff;
1583
1584 segno = GET_SEGNO(sbi, new_blkaddr);
1585 se = get_seg_entry(sbi, segno);
1586 type = se->type;
1587
1588 if (!recover_curseg) {
1589 /* for recovery flow */
1590 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1591 if (old_blkaddr == NULL_ADDR)
1592 type = CURSEG_COLD_DATA;
1593 else
1594 type = CURSEG_WARM_DATA;
1595 }
1596 } else {
1597 if (!IS_CURSEG(sbi, segno))
1598 type = CURSEG_WARM_DATA;
1599 }
1600
1601 curseg = CURSEG_I(sbi, type);
1602
1603 mutex_lock(&curseg->curseg_mutex);
1604 mutex_lock(&sit_i->sentry_lock);
1605
1606 old_cursegno = curseg->segno;
1607 old_blkoff = curseg->next_blkoff;
1608
1609 /* change the current segment */
1610 if (segno != curseg->segno) {
1611 curseg->next_segno = segno;
1612 change_curseg(sbi, type, true);
1613 }
1614
1615 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1616 __add_sum_entry(sbi, type, sum);
1617
Chao Yu7cc3c202016-02-06 14:40:34 +08001618 if (!recover_curseg || recover_newaddr)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001619 update_sit_entry(sbi, new_blkaddr, 1);
1620 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1621 update_sit_entry(sbi, old_blkaddr, -1);
1622
1623 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
1624 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
1625
1626 locate_dirty_segment(sbi, old_cursegno);
1627
1628 if (recover_curseg) {
1629 if (old_cursegno != curseg->segno) {
1630 curseg->next_segno = old_cursegno;
1631 change_curseg(sbi, type, true);
1632 }
1633 curseg->next_blkoff = old_blkoff;
1634 }
1635
1636 mutex_unlock(&sit_i->sentry_lock);
1637 mutex_unlock(&curseg->curseg_mutex);
1638}
1639
1640void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
1641 block_t old_addr, block_t new_addr,
Chao Yu7cc3c202016-02-06 14:40:34 +08001642 unsigned char version, bool recover_curseg,
1643 bool recover_newaddr)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001644{
1645 struct f2fs_summary sum;
1646
1647 set_summary(&sum, dn->nid, dn->ofs_in_node, version);
1648
Chao Yu7cc3c202016-02-06 14:40:34 +08001649 __f2fs_replace_block(sbi, &sum, old_addr, new_addr,
1650 recover_curseg, recover_newaddr);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001651
Chao Yu4f686ae2016-02-24 17:16:47 +08001652 f2fs_update_data_blkaddr(dn, new_addr);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001653}
1654
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001655void f2fs_wait_on_page_writeback(struct page *page,
Jaegeuk Kim3e0b2f42016-01-20 23:43:51 +08001656 enum page_type type, bool ordered)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001657{
1658 if (PageWriteback(page)) {
1659 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1660
Chao Yufd2bbb12016-01-18 18:28:11 +08001661 f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE);
Jaegeuk Kim3e0b2f42016-01-20 23:43:51 +08001662 if (ordered)
1663 wait_on_page_writeback(page);
1664 else
1665 /* wait_for_stable_page(page) doesn't support */
1666 wait_on_page_writeback(page);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001667 }
1668}
1669
1670void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
1671 block_t blkaddr)
1672{
1673 struct page *cpage;
1674
1675 if (blkaddr == NEW_ADDR)
1676 return;
1677
1678 f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
1679
1680 cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
1681 if (cpage) {
Jaegeuk Kim3e0b2f42016-01-20 23:43:51 +08001682 f2fs_wait_on_page_writeback(cpage, DATA, true);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001683 f2fs_put_page(cpage, 1);
1684 }
1685}
1686
1687static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1688{
1689 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1690 struct curseg_info *seg_i;
1691 unsigned char *kaddr;
1692 struct page *page;
1693 block_t start;
1694 int i, j, offset;
1695
1696 start = start_sum_block(sbi);
1697
1698 page = get_meta_page(sbi, start++);
1699 kaddr = (unsigned char *)page_address(page);
1700
1701 /* Step 1: restore nat cache */
1702 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08001703 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001704
1705 /* Step 2: restore sit cache */
1706 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08001707 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001708 offset = 2 * SUM_JOURNAL_SIZE;
1709
1710 /* Step 3: restore summary entries */
1711 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1712 unsigned short blk_off;
1713 unsigned int segno;
1714
1715 seg_i = CURSEG_I(sbi, i);
1716 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1717 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1718 seg_i->next_segno = segno;
1719 reset_curseg(sbi, i, 0);
1720 seg_i->alloc_type = ckpt->alloc_type[i];
1721 seg_i->next_blkoff = blk_off;
1722
1723 if (seg_i->alloc_type == SSR)
1724 blk_off = sbi->blocks_per_seg;
1725
1726 for (j = 0; j < blk_off; j++) {
1727 struct f2fs_summary *s;
1728 s = (struct f2fs_summary *)(kaddr + offset);
1729 seg_i->sum_blk->entries[j] = *s;
1730 offset += SUMMARY_SIZE;
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -07001731 if (offset + SUMMARY_SIZE <= PAGE_SIZE -
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001732 SUM_FOOTER_SIZE)
1733 continue;
1734
1735 f2fs_put_page(page, 1);
1736 page = NULL;
1737
1738 page = get_meta_page(sbi, start++);
1739 kaddr = (unsigned char *)page_address(page);
1740 offset = 0;
1741 }
1742 }
1743 f2fs_put_page(page, 1);
1744 return 0;
1745}
1746
1747static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1748{
1749 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1750 struct f2fs_summary_block *sum;
1751 struct curseg_info *curseg;
1752 struct page *new;
1753 unsigned short blk_off;
1754 unsigned int segno = 0;
1755 block_t blk_addr = 0;
1756
1757 /* get segment number and block addr */
1758 if (IS_DATASEG(type)) {
1759 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1760 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1761 CURSEG_HOT_DATA]);
1762 if (__exist_node_summaries(sbi))
1763 blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1764 else
1765 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1766 } else {
1767 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1768 CURSEG_HOT_NODE]);
1769 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1770 CURSEG_HOT_NODE]);
1771 if (__exist_node_summaries(sbi))
1772 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1773 type - CURSEG_HOT_NODE);
1774 else
1775 blk_addr = GET_SUM_BLOCK(sbi, segno);
1776 }
1777
1778 new = get_meta_page(sbi, blk_addr);
1779 sum = (struct f2fs_summary_block *)page_address(new);
1780
1781 if (IS_NODESEG(type)) {
1782 if (__exist_node_summaries(sbi)) {
1783 struct f2fs_summary *ns = &sum->entries[0];
1784 int i;
1785 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1786 ns->version = 0;
1787 ns->ofs_in_node = 0;
1788 }
1789 } else {
1790 int err;
1791
1792 err = restore_node_summary(sbi, segno, sum);
1793 if (err) {
1794 f2fs_put_page(new, 1);
1795 return err;
1796 }
1797 }
1798 }
1799
1800 /* set uncompleted segment to curseg */
1801 curseg = CURSEG_I(sbi, type);
1802 mutex_lock(&curseg->curseg_mutex);
Chao Yufac2b092016-02-19 18:08:46 +08001803
1804 /* update journal info */
1805 down_write(&curseg->journal_rwsem);
1806 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
1807 up_write(&curseg->journal_rwsem);
1808
1809 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
1810 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001811 curseg->next_segno = segno;
1812 reset_curseg(sbi, type, 0);
1813 curseg->alloc_type = ckpt->alloc_type[type];
1814 curseg->next_blkoff = blk_off;
1815 mutex_unlock(&curseg->curseg_mutex);
1816 f2fs_put_page(new, 1);
1817 return 0;
1818}
1819
1820static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1821{
1822 int type = CURSEG_HOT_DATA;
1823 int err;
1824
1825 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1826 int npages = npages_for_summary_flush(sbi, true);
1827
1828 if (npages >= 2)
1829 ra_meta_pages(sbi, start_sum_block(sbi), npages,
1830 META_CP, true);
1831
1832 /* restore for compacted data summary */
1833 if (read_compacted_summaries(sbi))
1834 return -EINVAL;
1835 type = CURSEG_HOT_NODE;
1836 }
1837
1838 if (__exist_node_summaries(sbi))
1839 ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
1840 NR_CURSEG_TYPE - type, META_CP, true);
1841
1842 for (; type <= CURSEG_COLD_NODE; type++) {
1843 err = read_normal_summaries(sbi, type);
1844 if (err)
1845 return err;
1846 }
1847
1848 return 0;
1849}
1850
1851static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1852{
1853 struct page *page;
1854 unsigned char *kaddr;
1855 struct f2fs_summary *summary;
1856 struct curseg_info *seg_i;
1857 int written_size = 0;
1858 int i, j;
1859
1860 page = grab_meta_page(sbi, blkaddr++);
1861 kaddr = (unsigned char *)page_address(page);
1862
1863 /* Step 1: write nat cache */
1864 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08001865 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001866 written_size += SUM_JOURNAL_SIZE;
1867
1868 /* Step 2: write sit cache */
1869 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08001870 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001871 written_size += SUM_JOURNAL_SIZE;
1872
1873 /* Step 3: write summary entries */
1874 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1875 unsigned short blkoff;
1876 seg_i = CURSEG_I(sbi, i);
1877 if (sbi->ckpt->alloc_type[i] == SSR)
1878 blkoff = sbi->blocks_per_seg;
1879 else
1880 blkoff = curseg_blkoff(sbi, i);
1881
1882 for (j = 0; j < blkoff; j++) {
1883 if (!page) {
1884 page = grab_meta_page(sbi, blkaddr++);
1885 kaddr = (unsigned char *)page_address(page);
1886 written_size = 0;
1887 }
1888 summary = (struct f2fs_summary *)(kaddr + written_size);
1889 *summary = seg_i->sum_blk->entries[j];
1890 written_size += SUMMARY_SIZE;
1891
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -07001892 if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001893 SUM_FOOTER_SIZE)
1894 continue;
1895
1896 set_page_dirty(page);
1897 f2fs_put_page(page, 1);
1898 page = NULL;
1899 }
1900 }
1901 if (page) {
1902 set_page_dirty(page);
1903 f2fs_put_page(page, 1);
1904 }
1905}
1906
1907static void write_normal_summaries(struct f2fs_sb_info *sbi,
1908 block_t blkaddr, int type)
1909{
1910 int i, end;
1911 if (IS_DATASEG(type))
1912 end = type + NR_CURSEG_DATA_TYPE;
1913 else
1914 end = type + NR_CURSEG_NODE_TYPE;
1915
Chao Yufac2b092016-02-19 18:08:46 +08001916 for (i = type; i < end; i++)
1917 write_current_sum_page(sbi, i, blkaddr + (i - type));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001918}
1919
1920void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1921{
1922 if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1923 write_compacted_summaries(sbi, start_blk);
1924 else
1925 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1926}
1927
1928void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1929{
1930 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1931}
1932
Chao Yud59981d2016-02-14 18:50:40 +08001933int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001934 unsigned int val, int alloc)
1935{
1936 int i;
1937
1938 if (type == NAT_JOURNAL) {
Chao Yud59981d2016-02-14 18:50:40 +08001939 for (i = 0; i < nats_in_cursum(journal); i++) {
1940 if (le32_to_cpu(nid_in_journal(journal, i)) == val)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001941 return i;
1942 }
Chao Yud59981d2016-02-14 18:50:40 +08001943 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
1944 return update_nats_in_cursum(journal, 1);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001945 } else if (type == SIT_JOURNAL) {
Chao Yud59981d2016-02-14 18:50:40 +08001946 for (i = 0; i < sits_in_cursum(journal); i++)
1947 if (le32_to_cpu(segno_in_journal(journal, i)) == val)
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001948 return i;
Chao Yud59981d2016-02-14 18:50:40 +08001949 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
1950 return update_sits_in_cursum(journal, 1);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001951 }
1952 return -1;
1953}
1954
1955static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1956 unsigned int segno)
1957{
1958 return get_meta_page(sbi, current_sit_addr(sbi, segno));
1959}
1960
1961static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1962 unsigned int start)
1963{
1964 struct sit_info *sit_i = SIT_I(sbi);
1965 struct page *src_page, *dst_page;
1966 pgoff_t src_off, dst_off;
1967 void *src_addr, *dst_addr;
1968
1969 src_off = current_sit_addr(sbi, start);
1970 dst_off = next_sit_addr(sbi, src_off);
1971
1972 /* get current sit block page without lock */
1973 src_page = get_meta_page(sbi, src_off);
1974 dst_page = grab_meta_page(sbi, dst_off);
1975 f2fs_bug_on(sbi, PageDirty(src_page));
1976
1977 src_addr = page_address(src_page);
1978 dst_addr = page_address(dst_page);
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -07001979 memcpy(dst_addr, src_addr, PAGE_SIZE);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08001980
1981 set_page_dirty(dst_page);
1982 f2fs_put_page(src_page, 1);
1983
1984 set_to_next_sit(sit_i, start);
1985
1986 return dst_page;
1987}
1988
1989static struct sit_entry_set *grab_sit_entry_set(void)
1990{
1991 struct sit_entry_set *ses =
1992 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
1993
1994 ses->entry_cnt = 0;
1995 INIT_LIST_HEAD(&ses->set_list);
1996 return ses;
1997}
1998
1999static void release_sit_entry_set(struct sit_entry_set *ses)
2000{
2001 list_del(&ses->set_list);
2002 kmem_cache_free(sit_entry_set_slab, ses);
2003}
2004
2005static void adjust_sit_entry_set(struct sit_entry_set *ses,
2006 struct list_head *head)
2007{
2008 struct sit_entry_set *next = ses;
2009
2010 if (list_is_last(&ses->set_list, head))
2011 return;
2012
2013 list_for_each_entry_continue(next, head, set_list)
2014 if (ses->entry_cnt <= next->entry_cnt)
2015 break;
2016
2017 list_move_tail(&ses->set_list, &next->set_list);
2018}
2019
2020static void add_sit_entry(unsigned int segno, struct list_head *head)
2021{
2022 struct sit_entry_set *ses;
2023 unsigned int start_segno = START_SEGNO(segno);
2024
2025 list_for_each_entry(ses, head, set_list) {
2026 if (ses->start_segno == start_segno) {
2027 ses->entry_cnt++;
2028 adjust_sit_entry_set(ses, head);
2029 return;
2030 }
2031 }
2032
2033 ses = grab_sit_entry_set();
2034
2035 ses->start_segno = start_segno;
2036 ses->entry_cnt++;
2037 list_add(&ses->set_list, head);
2038}
2039
2040static void add_sits_in_set(struct f2fs_sb_info *sbi)
2041{
2042 struct f2fs_sm_info *sm_info = SM_I(sbi);
2043 struct list_head *set_list = &sm_info->sit_entry_set;
2044 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
2045 unsigned int segno;
2046
2047 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
2048 add_sit_entry(segno, set_list);
2049}
2050
2051static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
2052{
2053 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08002054 struct f2fs_journal *journal = curseg->journal;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002055 int i;
2056
Chao Yufac2b092016-02-19 18:08:46 +08002057 down_write(&curseg->journal_rwsem);
Chao Yud59981d2016-02-14 18:50:40 +08002058 for (i = 0; i < sits_in_cursum(journal); i++) {
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002059 unsigned int segno;
2060 bool dirtied;
2061
Chao Yud59981d2016-02-14 18:50:40 +08002062 segno = le32_to_cpu(segno_in_journal(journal, i));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002063 dirtied = __mark_sit_entry_dirty(sbi, segno);
2064
2065 if (!dirtied)
2066 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
2067 }
Chao Yud59981d2016-02-14 18:50:40 +08002068 update_sits_in_cursum(journal, -i);
Chao Yufac2b092016-02-19 18:08:46 +08002069 up_write(&curseg->journal_rwsem);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002070}
2071
2072/*
2073 * CP calls this function, which flushes SIT entries including sit_journal,
2074 * and moves prefree segs to free segs.
2075 */
2076void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2077{
2078 struct sit_info *sit_i = SIT_I(sbi);
2079 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
2080 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08002081 struct f2fs_journal *journal = curseg->journal;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002082 struct sit_entry_set *ses, *tmp;
2083 struct list_head *head = &SM_I(sbi)->sit_entry_set;
2084 bool to_journal = true;
2085 struct seg_entry *se;
2086
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002087 mutex_lock(&sit_i->sentry_lock);
2088
2089 if (!sit_i->dirty_sentries)
2090 goto out;
2091
2092 /*
2093 * add and account sit entries of dirty bitmap in sit entry
2094 * set temporarily
2095 */
2096 add_sits_in_set(sbi);
2097
2098 /*
2099 * if there are no enough space in journal to store dirty sit
2100 * entries, remove all entries from journal and add and account
2101 * them in sit entry set.
2102 */
Chao Yud59981d2016-02-14 18:50:40 +08002103 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002104 remove_sits_in_journal(sbi);
2105
2106 /*
2107 * there are two steps to flush sit entries:
2108 * #1, flush sit entries to journal in current cold data summary block.
2109 * #2, flush sit entries to sit page.
2110 */
2111 list_for_each_entry_safe(ses, tmp, head, set_list) {
2112 struct page *page = NULL;
2113 struct f2fs_sit_block *raw_sit = NULL;
2114 unsigned int start_segno = ses->start_segno;
2115 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
2116 (unsigned long)MAIN_SEGS(sbi));
2117 unsigned int segno = start_segno;
2118
2119 if (to_journal &&
Chao Yud59981d2016-02-14 18:50:40 +08002120 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002121 to_journal = false;
2122
Chao Yufac2b092016-02-19 18:08:46 +08002123 if (to_journal) {
2124 down_write(&curseg->journal_rwsem);
2125 } else {
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002126 page = get_next_sit_page(sbi, start_segno);
2127 raw_sit = page_address(page);
2128 }
2129
2130 /* flush dirty sit entries in region of current sit set */
2131 for_each_set_bit_from(segno, bitmap, end) {
2132 int offset, sit_offset;
2133
2134 se = get_seg_entry(sbi, segno);
2135
2136 /* add discard candidates */
2137 if (cpc->reason != CP_DISCARD) {
2138 cpc->trim_start = segno;
2139 add_discard_addrs(sbi, cpc);
2140 }
2141
2142 if (to_journal) {
Chao Yud59981d2016-02-14 18:50:40 +08002143 offset = lookup_journal_in_cursum(journal,
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002144 SIT_JOURNAL, segno, 1);
2145 f2fs_bug_on(sbi, offset < 0);
Chao Yud59981d2016-02-14 18:50:40 +08002146 segno_in_journal(journal, offset) =
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002147 cpu_to_le32(segno);
2148 seg_info_to_raw_sit(se,
Chao Yud59981d2016-02-14 18:50:40 +08002149 &sit_in_journal(journal, offset));
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002150 } else {
2151 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
2152 seg_info_to_raw_sit(se,
2153 &raw_sit->entries[sit_offset]);
2154 }
2155
2156 __clear_bit(segno, bitmap);
2157 sit_i->dirty_sentries--;
2158 ses->entry_cnt--;
2159 }
2160
Chao Yufac2b092016-02-19 18:08:46 +08002161 if (to_journal)
2162 up_write(&curseg->journal_rwsem);
2163 else
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002164 f2fs_put_page(page, 1);
2165
2166 f2fs_bug_on(sbi, ses->entry_cnt);
2167 release_sit_entry_set(ses);
2168 }
2169
2170 f2fs_bug_on(sbi, !list_empty(head));
2171 f2fs_bug_on(sbi, sit_i->dirty_sentries);
2172out:
2173 if (cpc->reason == CP_DISCARD) {
2174 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
2175 add_discard_addrs(sbi, cpc);
2176 }
2177 mutex_unlock(&sit_i->sentry_lock);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002178
2179 set_prefree_as_free_segments(sbi);
2180}
2181
2182static int build_sit_info(struct f2fs_sb_info *sbi)
2183{
2184 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2185 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2186 struct sit_info *sit_i;
2187 unsigned int sit_segs, start;
2188 char *src_bitmap, *dst_bitmap;
2189 unsigned int bitmap_size;
2190
2191 /* allocate memory for SIT information */
2192 sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
2193 if (!sit_i)
2194 return -ENOMEM;
2195
2196 SM_I(sbi)->sit_info = sit_i;
2197
2198 sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) *
2199 sizeof(struct seg_entry), GFP_KERNEL);
2200 if (!sit_i->sentries)
2201 return -ENOMEM;
2202
2203 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2204 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2205 if (!sit_i->dirty_sentries_bitmap)
2206 return -ENOMEM;
2207
2208 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2209 sit_i->sentries[start].cur_valid_map
2210 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2211 sit_i->sentries[start].ckpt_valid_map
2212 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2213 sit_i->sentries[start].discard_map
2214 = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2215 if (!sit_i->sentries[start].cur_valid_map ||
2216 !sit_i->sentries[start].ckpt_valid_map ||
2217 !sit_i->sentries[start].discard_map)
2218 return -ENOMEM;
2219 }
2220
2221 sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
2222 if (!sit_i->tmp_map)
2223 return -ENOMEM;
2224
2225 if (sbi->segs_per_sec > 1) {
2226 sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) *
2227 sizeof(struct sec_entry), GFP_KERNEL);
2228 if (!sit_i->sec_entries)
2229 return -ENOMEM;
2230 }
2231
2232 /* get information related with SIT */
2233 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
2234
2235 /* setup SIT bitmap from ckeckpoint pack */
2236 bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
2237 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
2238
2239 dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
2240 if (!dst_bitmap)
2241 return -ENOMEM;
2242
2243 /* init SIT information */
2244 sit_i->s_ops = &default_salloc_ops;
2245
2246 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
2247 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
2248 sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
2249 sit_i->sit_bitmap = dst_bitmap;
2250 sit_i->bitmap_size = bitmap_size;
2251 sit_i->dirty_sentries = 0;
2252 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
2253 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
2254 sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
2255 mutex_init(&sit_i->sentry_lock);
2256 return 0;
2257}
2258
2259static int build_free_segmap(struct f2fs_sb_info *sbi)
2260{
2261 struct free_segmap_info *free_i;
2262 unsigned int bitmap_size, sec_bitmap_size;
2263
2264 /* allocate memory for free segmap information */
2265 free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
2266 if (!free_i)
2267 return -ENOMEM;
2268
2269 SM_I(sbi)->free_info = free_i;
2270
2271 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2272 free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
2273 if (!free_i->free_segmap)
2274 return -ENOMEM;
2275
2276 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2277 free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
2278 if (!free_i->free_secmap)
2279 return -ENOMEM;
2280
2281 /* set all segments as dirty temporarily */
2282 memset(free_i->free_segmap, 0xff, bitmap_size);
2283 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
2284
2285 /* init free segmap information */
2286 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
2287 free_i->free_segments = 0;
2288 free_i->free_sections = 0;
2289 spin_lock_init(&free_i->segmap_lock);
2290 return 0;
2291}
2292
2293static int build_curseg(struct f2fs_sb_info *sbi)
2294{
2295 struct curseg_info *array;
2296 int i;
2297
2298 array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
2299 if (!array)
2300 return -ENOMEM;
2301
2302 SM_I(sbi)->curseg_array = array;
2303
2304 for (i = 0; i < NR_CURSEG_TYPE; i++) {
2305 mutex_init(&array[i].curseg_mutex);
Jaegeuk Kimdb3e5892016-06-02 16:45:12 -07002306 array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002307 if (!array[i].sum_blk)
2308 return -ENOMEM;
Chao Yufac2b092016-02-19 18:08:46 +08002309 init_rwsem(&array[i].journal_rwsem);
2310 array[i].journal = kzalloc(sizeof(struct f2fs_journal),
2311 GFP_KERNEL);
2312 if (!array[i].journal)
2313 return -ENOMEM;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002314 array[i].segno = NULL_SEGNO;
2315 array[i].next_blkoff = 0;
2316 }
2317 return restore_curseg_summaries(sbi);
2318}
2319
2320static void build_sit_entries(struct f2fs_sb_info *sbi)
2321{
2322 struct sit_info *sit_i = SIT_I(sbi);
2323 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
Chao Yufac2b092016-02-19 18:08:46 +08002324 struct f2fs_journal *journal = curseg->journal;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002325 int sit_blk_cnt = SIT_BLK_CNT(sbi);
2326 unsigned int i, start, end;
2327 unsigned int readed, start_blk = 0;
Chao Yu8be7c502016-02-14 18:54:33 +08002328 int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002329
2330 do {
2331 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
2332
2333 start = start_blk * sit_i->sents_per_block;
2334 end = (start_blk + readed) * sit_i->sents_per_block;
2335
2336 for (; start < end && start < MAIN_SEGS(sbi); start++) {
2337 struct seg_entry *se = &sit_i->sentries[start];
2338 struct f2fs_sit_block *sit_blk;
2339 struct f2fs_sit_entry sit;
2340 struct page *page;
2341
Chao Yufac2b092016-02-19 18:08:46 +08002342 down_read(&curseg->journal_rwsem);
Chao Yud59981d2016-02-14 18:50:40 +08002343 for (i = 0; i < sits_in_cursum(journal); i++) {
2344 if (le32_to_cpu(segno_in_journal(journal, i))
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002345 == start) {
Chao Yud59981d2016-02-14 18:50:40 +08002346 sit = sit_in_journal(journal, i);
Chao Yufac2b092016-02-19 18:08:46 +08002347 up_read(&curseg->journal_rwsem);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002348 goto got_it;
2349 }
2350 }
Chao Yufac2b092016-02-19 18:08:46 +08002351 up_read(&curseg->journal_rwsem);
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002352
2353 page = get_current_sit_page(sbi, start);
2354 sit_blk = (struct f2fs_sit_block *)page_address(page);
2355 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
2356 f2fs_put_page(page, 1);
2357got_it:
2358 check_block_count(sbi, start, &sit);
2359 seg_info_from_raw_sit(se, &sit);
2360
2361 /* build discard map only one time */
2362 memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE);
2363 sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks;
2364
2365 if (sbi->segs_per_sec > 1) {
2366 struct sec_entry *e = get_sec_entry(sbi, start);
2367 e->valid_blocks += se->valid_blocks;
2368 }
2369 }
2370 start_blk += readed;
2371 } while (start_blk < sit_blk_cnt);
2372}
2373
2374static void init_free_segmap(struct f2fs_sb_info *sbi)
2375{
2376 unsigned int start;
2377 int type;
2378
2379 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2380 struct seg_entry *sentry = get_seg_entry(sbi, start);
2381 if (!sentry->valid_blocks)
2382 __set_free(sbi, start);
2383 }
2384
2385 /* set use the current segments */
2386 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
2387 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
2388 __set_test_and_inuse(sbi, curseg_t->segno);
2389 }
2390}
2391
2392static void init_dirty_segmap(struct f2fs_sb_info *sbi)
2393{
2394 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2395 struct free_segmap_info *free_i = FREE_I(sbi);
2396 unsigned int segno = 0, offset = 0;
2397 unsigned short valid_blocks;
2398
2399 while (1) {
2400 /* find dirty segment based on free segmap */
2401 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
2402 if (segno >= MAIN_SEGS(sbi))
2403 break;
2404 offset = segno + 1;
2405 valid_blocks = get_valid_blocks(sbi, segno, 0);
2406 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
2407 continue;
2408 if (valid_blocks > sbi->blocks_per_seg) {
2409 f2fs_bug_on(sbi, 1);
2410 continue;
2411 }
2412 mutex_lock(&dirty_i->seglist_lock);
2413 __locate_dirty_segment(sbi, segno, DIRTY);
2414 mutex_unlock(&dirty_i->seglist_lock);
2415 }
2416}
2417
2418static int init_victim_secmap(struct f2fs_sb_info *sbi)
2419{
2420 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2421 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
2422
2423 dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2424 if (!dirty_i->victim_secmap)
2425 return -ENOMEM;
2426 return 0;
2427}
2428
2429static int build_dirty_segmap(struct f2fs_sb_info *sbi)
2430{
2431 struct dirty_seglist_info *dirty_i;
2432 unsigned int bitmap_size, i;
2433
2434 /* allocate memory for dirty segments list information */
2435 dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
2436 if (!dirty_i)
2437 return -ENOMEM;
2438
2439 SM_I(sbi)->dirty_info = dirty_i;
2440 mutex_init(&dirty_i->seglist_lock);
2441
2442 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
2443
2444 for (i = 0; i < NR_DIRTY_TYPE; i++) {
2445 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
2446 if (!dirty_i->dirty_segmap[i])
2447 return -ENOMEM;
2448 }
2449
2450 init_dirty_segmap(sbi);
2451 return init_victim_secmap(sbi);
2452}
2453
2454/*
2455 * Update min, max modified time for cost-benefit GC algorithm
2456 */
2457static void init_min_max_mtime(struct f2fs_sb_info *sbi)
2458{
2459 struct sit_info *sit_i = SIT_I(sbi);
2460 unsigned int segno;
2461
2462 mutex_lock(&sit_i->sentry_lock);
2463
2464 sit_i->min_mtime = LLONG_MAX;
2465
2466 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2467 unsigned int i;
2468 unsigned long long mtime = 0;
2469
2470 for (i = 0; i < sbi->segs_per_sec; i++)
2471 mtime += get_seg_entry(sbi, segno + i)->mtime;
2472
2473 mtime = div_u64(mtime, sbi->segs_per_sec);
2474
2475 if (sit_i->min_mtime > mtime)
2476 sit_i->min_mtime = mtime;
2477 }
2478 sit_i->max_mtime = get_mtime(sbi);
2479 mutex_unlock(&sit_i->sentry_lock);
2480}
2481
2482int build_segment_manager(struct f2fs_sb_info *sbi)
2483{
2484 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2485 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2486 struct f2fs_sm_info *sm_info;
2487 int err;
2488
2489 sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2490 if (!sm_info)
2491 return -ENOMEM;
2492
2493 /* init sm info */
2494 sbi->sm_info = sm_info;
2495 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2496 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2497 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2498 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2499 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2500 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2501 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2502 sm_info->rec_prefree_segments = sm_info->main_segments *
2503 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
Jaegeuk Kim9d52b122016-07-13 18:23:35 -07002504 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
2505 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
2506
Jaegeuk Kimbc6969e2016-06-13 09:47:48 -07002507 if (!test_opt(sbi, LFS))
2508 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002509 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2510 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2511
2512 INIT_LIST_HEAD(&sm_info->discard_list);
2513 sm_info->nr_discards = 0;
2514 sm_info->max_discards = 0;
2515
2516 sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
2517
2518 INIT_LIST_HEAD(&sm_info->sit_entry_set);
2519
2520 if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2521 err = create_flush_cmd_control(sbi);
2522 if (err)
2523 return err;
2524 }
2525
2526 err = build_sit_info(sbi);
2527 if (err)
2528 return err;
2529 err = build_free_segmap(sbi);
2530 if (err)
2531 return err;
2532 err = build_curseg(sbi);
2533 if (err)
2534 return err;
2535
2536 /* reinit free segmap based on SIT */
2537 build_sit_entries(sbi);
2538
2539 init_free_segmap(sbi);
2540 err = build_dirty_segmap(sbi);
2541 if (err)
2542 return err;
2543
2544 init_min_max_mtime(sbi);
2545 return 0;
2546}
2547
2548static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2549 enum dirty_type dirty_type)
2550{
2551 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2552
2553 mutex_lock(&dirty_i->seglist_lock);
2554 f2fs_kvfree(dirty_i->dirty_segmap[dirty_type]);
2555 dirty_i->nr_dirty[dirty_type] = 0;
2556 mutex_unlock(&dirty_i->seglist_lock);
2557}
2558
2559static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2560{
2561 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2562 f2fs_kvfree(dirty_i->victim_secmap);
2563}
2564
2565static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2566{
2567 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2568 int i;
2569
2570 if (!dirty_i)
2571 return;
2572
2573 /* discard pre-free/dirty segments list */
2574 for (i = 0; i < NR_DIRTY_TYPE; i++)
2575 discard_dirty_segmap(sbi, i);
2576
2577 destroy_victim_secmap(sbi);
2578 SM_I(sbi)->dirty_info = NULL;
2579 kfree(dirty_i);
2580}
2581
2582static void destroy_curseg(struct f2fs_sb_info *sbi)
2583{
2584 struct curseg_info *array = SM_I(sbi)->curseg_array;
2585 int i;
2586
2587 if (!array)
2588 return;
2589 SM_I(sbi)->curseg_array = NULL;
Chao Yufac2b092016-02-19 18:08:46 +08002590 for (i = 0; i < NR_CURSEG_TYPE; i++) {
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002591 kfree(array[i].sum_blk);
Chao Yufac2b092016-02-19 18:08:46 +08002592 kfree(array[i].journal);
2593 }
Jaegeuk Kim315f4552015-11-29 09:25:08 -08002594 kfree(array);
2595}
2596
2597static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2598{
2599 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2600 if (!free_i)
2601 return;
2602 SM_I(sbi)->free_info = NULL;
2603 f2fs_kvfree(free_i->free_segmap);
2604 f2fs_kvfree(free_i->free_secmap);
2605 kfree(free_i);
2606}
2607
2608static void destroy_sit_info(struct f2fs_sb_info *sbi)
2609{
2610 struct sit_info *sit_i = SIT_I(sbi);
2611 unsigned int start;
2612
2613 if (!sit_i)
2614 return;
2615
2616 if (sit_i->sentries) {
2617 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2618 kfree(sit_i->sentries[start].cur_valid_map);
2619 kfree(sit_i->sentries[start].ckpt_valid_map);
2620 kfree(sit_i->sentries[start].discard_map);
2621 }
2622 }
2623 kfree(sit_i->tmp_map);
2624
2625 f2fs_kvfree(sit_i->sentries);
2626 f2fs_kvfree(sit_i->sec_entries);
2627 f2fs_kvfree(sit_i->dirty_sentries_bitmap);
2628
2629 SM_I(sbi)->sit_info = NULL;
2630 kfree(sit_i->sit_bitmap);
2631 kfree(sit_i);
2632}
2633
2634void destroy_segment_manager(struct f2fs_sb_info *sbi)
2635{
2636 struct f2fs_sm_info *sm_info = SM_I(sbi);
2637
2638 if (!sm_info)
2639 return;
2640 destroy_flush_cmd_control(sbi);
2641 destroy_dirty_segmap(sbi);
2642 destroy_curseg(sbi);
2643 destroy_free_segmap(sbi);
2644 destroy_sit_info(sbi);
2645 sbi->sm_info = NULL;
2646 kfree(sm_info);
2647}
2648
2649int __init create_segment_manager_caches(void)
2650{
2651 discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2652 sizeof(struct discard_entry));
2653 if (!discard_entry_slab)
2654 goto fail;
2655
2656 sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2657 sizeof(struct sit_entry_set));
2658 if (!sit_entry_set_slab)
2659 goto destory_discard_entry;
2660
2661 inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
2662 sizeof(struct inmem_pages));
2663 if (!inmem_entry_slab)
2664 goto destroy_sit_entry_set;
2665 return 0;
2666
2667destroy_sit_entry_set:
2668 kmem_cache_destroy(sit_entry_set_slab);
2669destory_discard_entry:
2670 kmem_cache_destroy(discard_entry_slab);
2671fail:
2672 return -ENOMEM;
2673}
2674
2675void destroy_segment_manager_caches(void)
2676{
2677 kmem_cache_destroy(sit_entry_set_slab);
2678 kmem_cache_destroy(discard_entry_slab);
2679 kmem_cache_destroy(inmem_entry_slab);
2680}