Blame - fs/logfs/gc.c - kernel/msm-4.9

blob: 76c242fbe1b0a66c7d1b38c877beab55fb2995c4 [file] [log] [blame]

Joern Engel	5db53f3	2009-11-20 20:13:39 +0100	[diff] [blame]	1	/*
				2	* fs/logfs/gc.c - garbage collection code
				3	*
				4	* As should be obvious for Linux kernel code, license is GPLv2
				5	*
				6	* Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
				7	*/
				8	#include "logfs.h"
				9	#include <linux/sched.h>
Tejun Heo	5a0e3ad	2010-03-24 17:04:11 +0900	[diff] [blame]	10	#include <linux/slab.h>
Joern Engel	5db53f3	2009-11-20 20:13:39 +0100	[diff] [blame]	11
				12	/*
				13	* Wear leveling needs to kick in when the difference between low erase
				14	* counts and high erase counts gets too big. A good value for "too big"
				15	* may be somewhat below 10% of maximum erase count for the device.
				16	* Why not 397, to pick a nice round number with no specific meaning? :)
				17	*
				18	* WL_RATELIMIT is the minimum time between two wear level events. A huge
				19	* number of segments may fulfil the requirements for wear leveling at the
				20	* same time. If that happens we don't want to cause a latency from hell,
				21	* but just gently pick one segment every so often and minimize overhead.
				22	*/
				23	#define WL_DELTA 397
				24	#define WL_RATELIMIT 100
				25	#define MAX_OBJ_ALIASES 2600
				26	#define SCAN_RATIO 512 /* number of scanned segments per gc'd segment */
				27	#define LIST_SIZE 64 /* base size of candidate lists */
				28	#define SCAN_ROUNDS 128 /* maximum number of complete medium scans */
				29	#define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
				30
				31	static int no_free_segments(struct super_block *sb)
				32	{
				33	struct logfs_super *super = logfs_super(sb);
				34
				35	return super->s_free_list.count;
				36	}
				37
				38	/* journal has distance -1, top-most ifile layer distance 0 */
				39	static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
				40	{
				41	struct logfs_super *super = logfs_super(sb);
				42	u8 gc_level = (__force u8)__gc_level;
				43
				44	switch (gc_level) {
				45	case 0: /* fall through */
				46	case 1: /* fall through */
				47	case 2: /* fall through */
				48	case 3:
				49	/* file data or indirect blocks */
				50	return super->s_ifile_levels + super->s_iblock_levels - gc_level;
				51	case 6: /* fall through */
				52	case 7: /* fall through */
				53	case 8: /* fall through */
				54	case 9:
				55	/* inode file data or indirect blocks */
				56	return super->s_ifile_levels - (gc_level - 6);
				57	default:
				58	printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
				59	gc_level);
				60	WARN_ON(1);
				61	return super->s_ifile_levels + super->s_iblock_levels;
				62	}
				63	}
				64
				65	static int segment_is_reserved(struct super_block *sb, u32 segno)
				66	{
				67	struct logfs_super *super = logfs_super(sb);
				68	struct logfs_area *area;
				69	void *reserved;
				70	int i;
				71
				72	/* Some segments are reserved. Just pretend they were all valid */
				73	reserved = btree_lookup32(&super->s_reserved_segments, segno);
				74	if (reserved)
				75	return 1;
				76
				77	/* Currently open segments */
				78	for_each_area(i) {
				79	area = super->s_area[i];
				80	if (area->a_is_open && area->a_segno == segno)
				81	return 1;
				82	}
				83
				84	return 0;
				85	}
				86
				87	static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
				88	{
				89	BUG();
				90	}
				91
				92	/*
				93	* Returns the bytes consumed by valid objects in this segment. Object headers
				94	* are counted, the segment header is not.
				95	*/
				96	static u32 logfs_valid_bytes(struct super_block sb, u32 segno, u32 ec,
				97	gc_level_t *gc_level)
				98	{
				99	struct logfs_segment_entry se;
				100	u32 ec_level;
				101
				102	logfs_get_segment_entry(sb, segno, &se);
				103	if (se.ec_level == cpu_to_be32(BADSEG) \|\|
				104	se.valid == cpu_to_be32(RESERVED))
				105	return RESERVED;
				106
				107	ec_level = be32_to_cpu(se.ec_level);
				108	*ec = ec_level >> 4;
				109	*gc_level = GC_LEVEL(ec_level & 0xf);
				110	return be32_to_cpu(se.valid);
				111	}
				112
				113	static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
				114	u64 bix, gc_level_t gc_level)
				115	{
				116	struct inode *inode;
				117	int err, cookie;
				118
				119	inode = logfs_safe_iget(sb, ino, &cookie);
				120	err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
				121	BUG_ON(err);
				122	logfs_safe_iput(inode, cookie);
				123	}
				124
				125	static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist)
				126	{
				127	struct logfs_super *super = logfs_super(sb);
				128	struct logfs_segment_header sh;
				129	struct logfs_object_header oh;
				130	u64 ofs, ino, bix;
				131	u32 seg_ofs, logical_segno, cleaned = 0;
				132	int err, len, valid;
				133	gc_level_t gc_level;
				134
				135	LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
				136
				137	btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
				138	err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
				139	BUG_ON(err);
				140	gc_level = GC_LEVEL(sh.level);
				141	logical_segno = be32_to_cpu(sh.segno);
				142	if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
				143	logfs_mark_segment_bad(sb, segno);
				144	cleaned = -1;
				145	goto out;
				146	}
				147
				148	for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
				149	seg_ofs + sizeof(oh) < super->s_segsize; ) {
				150	ofs = dev_ofs(sb, logical_segno, seg_ofs);
				151	err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
				152	&oh);
				153	BUG_ON(err);
				154
				155	if (!memchr_inv(&oh, 0xff, sizeof(oh)))
				156	break;
				157
				158	if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
				159	logfs_mark_segment_bad(sb, segno);
				160	cleaned = super->s_segsize - 1;
				161	goto out;
				162	}
				163
				164	ino = be64_to_cpu(oh.ino);
				165	bix = be64_to_cpu(oh.bix);
				166	len = sizeof(oh) + be16_to_cpu(oh.len);
				167	valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
				168	if (valid == 1) {
				169	logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
				170	cleaned += len;
				171	} else if (valid == 2) {
				172	/* Will be invalid upon journal commit */
				173	cleaned += len;
				174	}
				175	seg_ofs += len;
				176	}
				177	out:
				178	btree_remove32(&super->s_reserved_segments, segno);
				179	return cleaned;
				180	}
				181
				182	static struct gc_candidate add_list(struct gc_candidate cand,
				183	struct candidate_list *list)
				184	{
				185	struct rb_node **p = &list->rb_tree.rb_node;
				186	struct rb_node *parent = NULL;
				187	struct gc_candidate *cur;
				188	int comp;
				189
				190	cand->list = list;
				191	while (*p) {
				192	parent = *p;
				193	cur = rb_entry(parent, struct gc_candidate, rb_node);
				194
				195	if (list->sort_by_ec)
				196	comp = cand->erase_count < cur->erase_count;
				197	else
				198	comp = cand->valid < cur->valid;
				199
				200	if (comp)
				201	p = &parent->rb_left;
				202	else
				203	p = &parent->rb_right;
				204	}
				205	rb_link_node(&cand->rb_node, parent, p);
				206	rb_insert_color(&cand->rb_node, &list->rb_tree);
				207
				208	if (list->count <= list->maxcount) {
				209	list->count++;
				210	return NULL;
				211	}
				212	cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
				213	rb_erase(&cand->rb_node, &list->rb_tree);
				214	cand->list = NULL;
				215	return cand;
				216	}
				217
				218	static void remove_from_list(struct gc_candidate *cand)
				219	{
				220	struct candidate_list *list = cand->list;
				221
				222	rb_erase(&cand->rb_node, &list->rb_tree);
				223	list->count--;
				224	}
				225
				226	static void free_candidate(struct super_block sb, struct gc_candidate cand)
				227	{
				228	struct logfs_super *super = logfs_super(sb);
				229
				230	btree_remove32(&super->s_cand_tree, cand->segno);
				231	kfree(cand);
				232	}
				233
				234	u32 get_best_cand(struct super_block sb, struct candidate_list list, u32 *ec)
				235	{
				236	struct gc_candidate *cand;
				237	u32 segno;
				238
				239	BUG_ON(list->count == 0);
				240
				241	cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
				242	remove_from_list(cand);
				243	segno = cand->segno;
				244	if (ec)
				245	*ec = cand->erase_count;
				246	free_candidate(sb, cand);
				247	return segno;
				248	}
				249
				250	/*
				251	* We have several lists to manage segments with. The reserve_list is used to
				252	* deal with bad blocks. We try to keep the best (lowest ec) segments on this
				253	* list.
				254	* The free_list contains free segments for normal usage. It usually gets the
				255	* second pick after the reserve_list. But when the free_list is running short
				256	* it is more important to keep the free_list full than to keep a reserve.
				257	*
				258	* Segments that are not free are put onto a per-level low_list. If we have
				259	* to run garbage collection, we pick a candidate from there. All segments on
				260	* those lists should have at least some free space so GC will make progress.
				261	*
				262	* And last we have the ec_list, which is used to pick segments for wear
				263	* leveling.
				264	*
				265	* If all appropriate lists are full, we simply free the candidate and forget
				266	* about that segment for a while. We have better candidates for each purpose.
				267	*/
				268	static void __add_candidate(struct super_block sb, struct gc_candidate cand)
				269	{
				270	struct logfs_super *super = logfs_super(sb);
				271	u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
				272
				273	if (cand->valid == 0) {
				274	/* 100% free segments */
				275	log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
				276	cand->segno, cand->erase_count,
				277	dev_ofs(sb, cand->segno, 0));
				278	cand = add_list(cand, &super->s_reserve_list);
				279	if (cand) {
				280	log_gc_noisy("add free segment %x (ec %x) at %llx\n",
				281	cand->segno, cand->erase_count,
				282	dev_ofs(sb, cand->segno, 0));
				283	cand = add_list(cand, &super->s_free_list);
				284	}
				285	} else {
				286	/* good candidates for Garbage Collection */
				287	if (cand->valid < full)
				288	cand = add_list(cand, &super->s_low_list[cand->dist]);
				289	/* good candidates for wear leveling,
				290	* segments that were recently written get ignored */
				291	if (cand)
				292	cand = add_list(cand, &super->s_ec_list);
				293	}
				294	if (cand)
				295	free_candidate(sb, cand);
				296	}
				297
				298	static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
				299	u8 dist)
				300	{
				301	struct logfs_super *super = logfs_super(sb);
				302	struct gc_candidate *cand;
				303
				304	cand = kmalloc(sizeof(*cand), GFP_NOFS);
				305	if (!cand)
				306	return -ENOMEM;
				307
				308	cand->segno = segno;
				309	cand->valid = valid;
				310	cand->erase_count = ec;
				311	cand->dist = dist;
				312
				313	btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
				314	__add_candidate(sb, cand);
				315	return 0;
				316	}
				317
				318	static void remove_segment_from_lists(struct super_block *sb, u32 segno)
				319	{
				320	struct logfs_super *super = logfs_super(sb);
				321	struct gc_candidate *cand;
				322
				323	cand = btree_lookup32(&super->s_cand_tree, segno);
				324	if (cand) {
				325	remove_from_list(cand);
				326	free_candidate(sb, cand);
				327	}
				328	}
				329
				330	static void scan_segment(struct super_block *sb, u32 segno)
				331	{
				332	u32 valid, ec = 0;
				333	gc_level_t gc_level = 0;
				334	u8 dist;
				335
				336	if (segment_is_reserved(sb, segno))
				337	return;
				338
				339	remove_segment_from_lists(sb, segno);
				340	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
				341	if (valid == RESERVED)
				342	return;
				343
				344	dist = root_distance(sb, gc_level);
				345	add_candidate(sb, segno, valid, ec, dist);
				346	}
				347
				348	static struct gc_candidate first_in_list(struct candidate_list list)
				349	{
				350	if (list->count == 0)
				351	return NULL;
				352	return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
				353	}
				354
				355	/*
				356	* Find the best segment for garbage collection. Main criterion is
				357	* the segment requiring the least effort to clean. Secondary
				358	* criterion is to GC on the lowest level available.
				359	*
				360	* So we search the least effort segment on the lowest level first,
				361	* then move up and pick another segment iff is requires significantly
				362	* less effort. Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
				363	*/
				364	static struct gc_candidate get_candidate(struct super_block sb)
				365	{
				366	struct logfs_super *super = logfs_super(sb);
				367	int i, max_dist;
				368	struct gc_candidate cand = NULL, this;
				369
				370	max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS);
				371
				372	for (i = max_dist; i >= 0; i--) {
				373	this = first_in_list(&super->s_low_list[i]);
				374	if (!this)
				375	continue;
				376	if (!cand)
				377	cand = this;
				378	if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
				379	cand = this;
				380	}
				381	return cand;
				382	}
				383
				384	static int __logfs_gc_once(struct super_block sb, struct gc_candidate cand)
				385	{
				386	struct logfs_super *super = logfs_super(sb);
				387	gc_level_t gc_level;
				388	u32 cleaned, valid, segno, ec;
				389	u8 dist;
				390
				391	if (!cand) {
				392	log_gc("GC attempted, but no candidate found\n");
				393	return 0;
				394	}
				395
				396	segno = cand->segno;
				397	dist = cand->dist;
				398	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
				399	free_candidate(sb, cand);
				400	log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
				401	segno, (u64)segno << super->s_segshift,
				402	dist, no_free_segments(sb), valid,
				403	super->s_free_bytes);
				404	cleaned = logfs_gc_segment(sb, segno, dist);
				405	log_gc("GC segment #%02x complete - now %x valid\n", segno,
				406	valid - cleaned);
				407	BUG_ON(cleaned != valid);
				408	return 1;
				409	}
				410
				411	static int logfs_gc_once(struct super_block *sb)
				412	{
				413	struct gc_candidate *cand;
				414
				415	cand = get_candidate(sb);
				416	if (cand)
				417	remove_from_list(cand);
				418	return __logfs_gc_once(sb, cand);
				419	}
				420
				421	/* returns 1 if a wrap occurs, 0 otherwise */
				422	static int logfs_scan_some(struct super_block *sb)
				423	{
				424	struct logfs_super *super = logfs_super(sb);
				425	u32 segno;
				426	int i, ret = 0;
				427
				428	segno = super->s_sweeper;
				429	for (i = SCAN_RATIO; i > 0; i--) {
				430	segno++;
				431	if (segno >= super->s_no_segs) {
				432	segno = 0;
				433	ret = 1;
				434	/* Break out of the loop. We want to read a single
				435	* block from the segment size on next invocation if
				436	* SCAN_RATIO is set to match block size
				437	*/
				438	break;
				439	}
				440
				441	scan_segment(sb, segno);
				442	}
				443	super->s_sweeper = segno;
				444	return ret;
				445	}
				446
				447	/*
				448	* In principle, this function should loop forever, looking for GC candidates
				449	* and moving data. LogFS is designed in such a way that this loop is
				450	* guaranteed to terminate.
				451	*
				452	* Limiting the loop to some iterations serves purely to catch cases when
				453	* these guarantees have failed. An actual endless loop is an obvious bug
				454	* and should be reported as such.
				455	*/
				456	static void __logfs_gc_pass(struct super_block *sb, int target)
				457	{
				458	struct logfs_super *super = logfs_super(sb);
				459	struct logfs_block *block;
				460	int round, progress, last_progress = 0;
				461
Joern Engel	032d8f7	2010-04-13 17:46:37 +0200	[diff] [blame]	462	/*
				463	* Doing too many changes to the segfile at once would result
				464	* in a large number of aliases. Write the journal before
				465	* things get out of hand.
				466	*/
				467	if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
				468	logfs_write_anchor(sb);
				469
Joern Engel	5db53f3	2009-11-20 20:13:39 +0100	[diff] [blame]	470	if (no_free_segments(sb) >= target &&
				471	super->s_no_object_aliases < MAX_OBJ_ALIASES)
				472	return;
				473
				474	log_gc("__logfs_gc_pass(%x)\n", target);
				475	for (round = 0; round < SCAN_ROUNDS; ) {
				476	if (no_free_segments(sb) >= target)
				477	goto write_alias;
				478
				479	/* Sync in-memory state with on-medium state in case they
				480	* diverged */
Joern Engel	c6d3830	2010-03-04 21:36:19 +0100	[diff] [blame]	481	logfs_write_anchor(sb);
Joern Engel	5db53f3	2009-11-20 20:13:39 +0100	[diff] [blame]	482	round += logfs_scan_some(sb);
				483	if (no_free_segments(sb) >= target)
				484	goto write_alias;
				485	progress = logfs_gc_once(sb);
				486	if (progress)
				487	last_progress = round;
				488	else if (round - last_progress > 2)
				489	break;
				490	continue;
				491
				492	/*
				493	* The goto logic is nasty, I just don't know a better way to
				494	* code it. GC is supposed to ensure two things:
				495	* 1. Enough free segments are available.
				496	* 2. The number of aliases is bounded.
				497	* When 1. is achieved, we take a look at 2. and write back
				498	* some alias-containing blocks, if necessary. However, after
				499	* each such write we need to go back to 1., as writes can
				500	* consume free segments.
				501	*/
				502	write_alias:
				503	if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
				504	return;
				505	if (list_empty(&super->s_object_alias)) {
				506	/* All aliases are still in btree */
				507	return;
				508	}
				509	log_gc("Write back one alias\n");
				510	block = list_entry(super->s_object_alias.next,
				511	struct logfs_block, alias_list);
				512	block->ops->write_block(block);
				513	/*
				514	* To round off the nasty goto logic, we reset round here. It
				515	* is a safety-net for GC not making any progress and limited
				516	* to something reasonably small. If incremented it for every
				517	* single alias, the loop could terminate rather quickly.
				518	*/
				519	round = 0;
				520	}
				521	LOGFS_BUG(sb);
				522	}
				523
				524	static int wl_ratelimit(struct super_block sb, u64 next_event)
				525	{
				526	struct logfs_super *super = logfs_super(sb);
				527
				528	if (*next_event < super->s_gec) {
				529	*next_event = super->s_gec + WL_RATELIMIT;
				530	return 0;
				531	}
				532	return 1;
				533	}
				534
				535	static void logfs_wl_pass(struct super_block *sb)
				536	{
				537	struct logfs_super *super = logfs_super(sb);
				538	struct gc_candidate wl_cand, free_cand;
				539
				540	if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
				541	return;
				542
				543	wl_cand = first_in_list(&super->s_ec_list);
				544	if (!wl_cand)
				545	return;
				546	free_cand = first_in_list(&super->s_free_list);
				547	if (!free_cand)
				548	return;
				549
				550	if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
				551	remove_from_list(wl_cand);
				552	__logfs_gc_once(sb, wl_cand);
				553	}
				554	}
				555
				556	/*
				557	* The journal needs wear leveling as well. But moving the journal is an
				558	* expensive operation so we try to avoid it as much as possible. And if we
				559	* have to do it, we move the whole journal, not individual segments.
				560	*
				561	* Ratelimiting is not strictly necessary here, it mainly serves to avoid the
				562	* calculations. First we check whether moving the journal would be a
				563	* significant improvement. That means that a) the current journal segments
				564	* have more wear than the future journal segments and b) the current journal
				565	* segments have more wear than normal ostore segments.
				566	* Rationale for b) is that we don't have to move the journal if it is aging
				567	* less than the ostore, even if the reserve segments age even less (they are
				568	* excluded from wear leveling, after all).
				569	* Next we check that the superblocks have less wear than the journal. Since
				570	* moving the journal requires writing the superblocks, we have to protect the
				571	* superblocks even more than the journal.
				572	*
				573	* Also we double the acceptable wear difference, compared to ostore wear
				574	* leveling. Journal data is read and rewritten rapidly, comparatively. So
				575	* soft errors have much less time to accumulate and we allow the journal to
				576	* be a bit worse than the ostore.
				577	*/
				578	static void logfs_journal_wl_pass(struct super_block *sb)
				579	{
				580	struct logfs_super *super = logfs_super(sb);
				581	struct gc_candidate *cand;
				582	u32 min_journal_ec = -1, max_reserve_ec = 0;
				583	int i;
				584
				585	if (wl_ratelimit(sb, &super->s_wl_gec_journal))
				586	return;
				587
				588	if (super->s_reserve_list.count < super->s_no_journal_segs) {
				589	/* Reserve is not full enough to move complete journal */
				590	return;
				591	}
				592
				593	journal_for_each(i)
				594	if (super->s_journal_seg[i])
				595	min_journal_ec = min(min_journal_ec,
				596	super->s_journal_ec[i]);
				597	cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
				598	struct gc_candidate, rb_node);
				599	max_reserve_ec = cand->erase_count;
				600	for (i = 0; i < 2; i++) {
				601	struct logfs_segment_entry se;
				602	u32 segno = seg_no(sb, super->s_sb_ofs[i]);
				603	u32 ec;
				604
				605	logfs_get_segment_entry(sb, segno, &se);
				606	ec = be32_to_cpu(se.ec_level) >> 4;
				607	max_reserve_ec = max(max_reserve_ec, ec);
				608	}
				609
				610	if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
				611	do_logfs_journal_wl_pass(sb);
				612	}
				613	}
				614
				615	void logfs_gc_pass(struct super_block *sb)
				616	{
				617	struct logfs_super *super = logfs_super(sb);
				618
				619	//BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
				620	/* Write journal before free space is getting saturated with dirty
				621	* objects.
				622	*/
				623	if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
				624	+ LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
Joern Engel	c6d3830	2010-03-04 21:36:19 +0100	[diff] [blame]	625	logfs_write_anchor(sb);
				626	__logfs_gc_pass(sb, super->s_total_levels);
Joern Engel	5db53f3	2009-11-20 20:13:39 +0100	[diff] [blame]	627	logfs_wl_pass(sb);
				628	logfs_journal_wl_pass(sb);
				629	}
				630
				631	static int check_area(struct super_block *sb, int i)
				632	{
				633	struct logfs_super *super = logfs_super(sb);
				634	struct logfs_area *area = super->s_area[i];
				635	struct logfs_object_header oh;
				636	u32 segno = area->a_segno;
				637	u32 ofs = area->a_used_bytes;
				638	__be32 crc;
				639	int err;
				640
				641	if (!area->a_is_open)
				642	return 0;
				643
				644	for (ofs = area->a_used_bytes;
				645	ofs <= super->s_segsize - sizeof(oh);
				646	ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
				647	err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
				648	if (err)
				649	return err;
				650
				651	if (!memchr_inv(&oh, 0xff, sizeof(oh)))
				652	break;
				653
				654	crc = logfs_crc32(&oh, sizeof(oh) - 4, 4);
				655	if (crc != oh.crc) {
				656	printk(KERN_INFO "interrupted header at %llx\n",
				657	dev_ofs(sb, segno, ofs));
				658	return 0;
				659	}
				660	}
				661	if (ofs != area->a_used_bytes) {
				662	printk(KERN_INFO "%x bytes unaccounted data found at %llx\n",
				663	ofs - area->a_used_bytes,
				664	dev_ofs(sb, segno, area->a_used_bytes));
				665	area->a_used_bytes = ofs;
				666	}
				667	return 0;
				668	}
				669
				670	int logfs_check_areas(struct super_block *sb)
				671	{
				672	int i, err;
				673
				674	for_each_area(i) {
				675	err = check_area(sb, i);
				676	if (err)
				677	return err;
				678	}
				679	return 0;
				680	}
				681
				682	static void logfs_init_candlist(struct candidate_list *list, int maxcount,
				683	int sort_by_ec)
				684	{
				685	list->count = 0;
				686	list->maxcount = maxcount;
				687	list->sort_by_ec = sort_by_ec;
				688	list->rb_tree = RB_ROOT;
				689	}
				690
				691	int logfs_init_gc(struct super_block *sb)
				692	{
				693	struct logfs_super *super = logfs_super(sb);
				694	int i;
				695
				696	btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
				697	logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
				698	logfs_init_candlist(&super->s_reserve_list,
				699	super->s_bad_seg_reserve, 1);
				700	for_each_area(i)
				701	logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
				702	logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
				703	return 0;
				704	}
				705
				706	static void logfs_cleanup_list(struct super_block *sb,
				707	struct candidate_list *list)
				708	{
				709	struct gc_candidate *cand;
				710
				711	while (list->count) {
				712	cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
				713	rb_node);
				714	remove_from_list(cand);
				715	free_candidate(sb, cand);
				716	}
				717	BUG_ON(list->rb_tree.rb_node);
				718	}
				719
				720	void logfs_cleanup_gc(struct super_block *sb)
				721	{
				722	struct logfs_super *super = logfs_super(sb);
				723	int i;
				724
				725	if (!super->s_free_list.count)
				726	return;
				727
				728	/*
				729	* FIXME: The btree may still contain a single empty node. So we
				730	* call the grim visitor to clean up that mess. Btree code should
				731	* do it for us, really.
				732	*/
				733	btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
				734	logfs_cleanup_list(sb, &super->s_free_list);
				735	logfs_cleanup_list(sb, &super->s_reserve_list);
				736	for_each_area(i)
				737	logfs_cleanup_list(sb, &super->s_low_list[i]);
				738	logfs_cleanup_list(sb, &super->s_ec_list);
				739	}