blob: 4ea1b7f0ae783d6ba390f43c5d856d1c863c1399 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2001-2003 Red Hat, Inc.
5 *
6 * Created by David Woodhouse <dwmw2@infradead.org>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000010 * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 *
12 */
13
14#include <linux/kernel.h>
15#include <linux/mtd/mtd.h>
16#include <linux/slab.h>
17#include <linux/pagemap.h>
18#include <linux/crc32.h>
19#include <linux/compiler.h>
20#include <linux/stat.h>
21#include "nodelist.h"
22#include "compr.h"
23
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000024static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 struct jffs2_inode_cache *ic,
26 struct jffs2_raw_node_ref *raw);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000027static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -070028 struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000029static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -070030 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000031static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -070032 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35 uint32_t start, uint32_t end);
36static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38 uint32_t start, uint32_t end);
39static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
40 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42/* Called with erase_completion_lock held */
43static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44{
45 struct jffs2_eraseblock *ret;
46 struct list_head *nextlist = NULL;
47 int n = jiffies % 128;
48
49 /* Pick an eraseblock to garbage collect next. This is where we'll
50 put the clever wear-levelling algorithms. Eventually. */
51 /* We possibly want to favour the dirtier blocks more when the
52 number of free blocks is low. */
Artem B. Bityuckiya42163d2005-03-20 17:45:29 +000053again:
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56 nextlist = &c->bad_used_list;
57 } else if (n < 50 && !list_empty(&c->erasable_list)) {
Thomas Gleixner182ec4e2005-11-07 11:16:07 +000058 /* Note that most of them will have gone directly to be erased.
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 So don't favour the erasable_list _too_ much. */
60 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61 nextlist = &c->erasable_list;
62 } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63 /* Most of the time, pick one off the very_dirty list */
64 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65 nextlist = &c->very_dirty_list;
66 } else if (n < 126 && !list_empty(&c->dirty_list)) {
67 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68 nextlist = &c->dirty_list;
69 } else if (!list_empty(&c->clean_list)) {
70 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71 nextlist = &c->clean_list;
72 } else if (!list_empty(&c->dirty_list)) {
73 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75 nextlist = &c->dirty_list;
76 } else if (!list_empty(&c->very_dirty_list)) {
77 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78 nextlist = &c->very_dirty_list;
79 } else if (!list_empty(&c->erasable_list)) {
80 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82 nextlist = &c->erasable_list;
Artem B. Bityuckiya42163d2005-03-20 17:45:29 +000083 } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84 /* There are blocks are wating for the wbuf sync */
85 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
Artem B. Bityuckiy3cceb9f2005-03-20 21:43:26 +000086 spin_unlock(&c->erase_completion_lock);
Artem B. Bityuckiya42163d2005-03-20 17:45:29 +000087 jffs2_flush_wbuf_pad(c);
Artem B. Bityuckiy3cceb9f2005-03-20 21:43:26 +000088 spin_lock(&c->erase_completion_lock);
Artem B. Bityuckiya42163d2005-03-20 17:45:29 +000089 goto again;
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 } else {
91 /* Eep. All were empty */
92 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93 return NULL;
94 }
95
96 ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97 list_del(&ret->list);
98 c->gcblock = ret;
99 ret->gc_node = ret->first_node;
100 if (!ret->gc_node) {
101 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102 BUG();
103 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000104
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105 /* Have we accidentally picked a clean block with wasted space ? */
106 if (ret->wasted_size) {
107 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108 ret->dirty_size += ret->wasted_size;
109 c->wasted_size -= ret->wasted_size;
110 c->dirty_size += ret->wasted_size;
111 ret->wasted_size = 0;
112 }
113
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 return ret;
115}
116
117/* jffs2_garbage_collect_pass
118 * Make a single attempt to progress GC. Move one node, and possibly
119 * start erasing one eraseblock.
120 */
121int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122{
123 struct jffs2_inode_info *f;
124 struct jffs2_inode_cache *ic;
125 struct jffs2_eraseblock *jeb;
126 struct jffs2_raw_node_ref *raw;
127 int ret = 0, inum, nlink;
KaiGai Koheiaa98d7c2006-05-13 15:09:47 +0900128 int xattr = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
130 if (down_interruptible(&c->alloc_sem))
131 return -EINTR;
132
133 for (;;) {
134 spin_lock(&c->erase_completion_lock);
135 if (!c->unchecked_size)
136 break;
137
138 /* We can't start doing GC yet. We haven't finished checking
139 the node CRCs etc. Do it now. */
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 /* checked_ino is protected by the alloc_sem */
KaiGai Koheiaa98d7c2006-05-13 15:09:47 +0900142 if (c->checked_ino > c->highest_ino && xattr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144 c->unchecked_size);
Artem B. Bityutskiye0c8e422005-07-24 16:14:17 +0100145 jffs2_dbg_dump_block_lists_nolock(c);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 spin_unlock(&c->erase_completion_lock);
147 BUG();
148 }
149
150 spin_unlock(&c->erase_completion_lock);
151
KaiGai Koheiaa98d7c2006-05-13 15:09:47 +0900152 if (!xattr)
153 xattr = jffs2_verify_xattr(c);
154
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 spin_lock(&c->inocache_lock);
156
157 ic = jffs2_get_ino_cache(c, c->checked_ino++);
158
159 if (!ic) {
160 spin_unlock(&c->inocache_lock);
161 continue;
162 }
163
164 if (!ic->nlink) {
165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166 ic->ino));
167 spin_unlock(&c->inocache_lock);
168 continue;
169 }
170 switch(ic->state) {
171 case INO_STATE_CHECKEDABSENT:
172 case INO_STATE_PRESENT:
173 D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
174 spin_unlock(&c->inocache_lock);
175 continue;
176
177 case INO_STATE_GC:
178 case INO_STATE_CHECKING:
179 printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
180 spin_unlock(&c->inocache_lock);
181 BUG();
182
183 case INO_STATE_READING:
184 /* We need to wait for it to finish, lest we move on
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000185 and trigger the BUG() above while we haven't yet
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 finished checking all its nodes */
187 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
David Woodhoused96fb992006-04-17 00:19:48 +0100188 /* We need to come back again for the _same_ inode. We've
189 made no progress in this case, but that should be OK */
190 c->checked_ino--;
191
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 up(&c->alloc_sem);
193 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
194 return 0;
195
196 default:
197 BUG();
198
199 case INO_STATE_UNCHECKED:
200 ;
201 }
202 ic->state = INO_STATE_CHECKING;
203 spin_unlock(&c->inocache_lock);
204
205 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
206
207 ret = jffs2_do_crccheck_inode(c, ic);
208 if (ret)
209 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
210
211 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
212 up(&c->alloc_sem);
213 return ret;
214 }
215
216 /* First, work out which block we're garbage-collecting */
217 jeb = c->gcblock;
218
219 if (!jeb)
220 jeb = jffs2_find_gc_block(c);
221
222 if (!jeb) {
223 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
224 spin_unlock(&c->erase_completion_lock);
225 up(&c->alloc_sem);
226 return -EIO;
227 }
228
229 D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
230 D1(if (c->nextblock)
231 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
232
233 if (!jeb->used_size) {
234 up(&c->alloc_sem);
235 goto eraseit;
236 }
237
238 raw = jeb->gc_node;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 while(ref_obsolete(raw)) {
241 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
242 raw = raw->next_phys;
243 if (unlikely(!raw)) {
244 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000245 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
247 jeb->gc_node = raw;
248 spin_unlock(&c->erase_completion_lock);
249 up(&c->alloc_sem);
250 BUG();
251 }
252 }
253 jeb->gc_node = raw;
254
255 D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
256
257 if (!raw->next_in_ino) {
258 /* Inode-less node. Clean marker, snapshot or something like that */
259 /* FIXME: If it's something that needs to be copied, including something
260 we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
261 spin_unlock(&c->erase_completion_lock);
262 jffs2_mark_node_obsolete(c, raw);
263 up(&c->alloc_sem);
264 goto eraseit_lock;
265 }
266
267 ic = jffs2_raw_ref_to_ic(raw);
268
KaiGai Koheiaa98d7c2006-05-13 15:09:47 +0900269 /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
270 We can decide whether this node is inode or xattr by ic->class.
271 ret = 0 : ic is xattr_datum/xattr_ref, and GC was SUCCESSED.
272 ret < 0 : ic is xattr_datum/xattr_ref, but GC was FAILED.
273 ret > 0 : ic is NOT xattr_datum/xattr_ref.
274 */
275 ret = jffs2_garbage_collect_xattr(c, ic);
276 if (ret <= 0)
277 goto release_sem;
278
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 /* We need to hold the inocache. Either the erase_completion_lock or
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000280 the inocache_lock are sufficient; we trade down since the inocache_lock
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 causes less contention. */
282 spin_lock(&c->inocache_lock);
283
284 spin_unlock(&c->erase_completion_lock);
285
286 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
287
288 /* Three possibilities:
289 1. Inode is already in-core. We must iget it and do proper
290 updating to its fragtree, etc.
291 2. Inode is not in-core, node is REF_PRISTINE. We lock the
292 inocache to prevent a read_inode(), copy the node intact.
293 3. Inode is not in-core, node is not pristine. We must iget()
294 and take the slow path.
295 */
296
297 switch(ic->state) {
298 case INO_STATE_CHECKEDABSENT:
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000299 /* It's been checked, but it's not currently in-core.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 We can just copy any pristine nodes, but have
301 to prevent anyone else from doing read_inode() while
302 we're at it, so we set the state accordingly */
303 if (ref_flags(raw) == REF_PRISTINE)
304 ic->state = INO_STATE_GC;
305 else {
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000306 D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 ic->ino));
308 }
309 break;
310
311 case INO_STATE_PRESENT:
312 /* It's in-core. GC must iget() it. */
313 break;
314
315 case INO_STATE_UNCHECKED:
316 case INO_STATE_CHECKING:
317 case INO_STATE_GC:
318 /* Should never happen. We should have finished checking
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000319 by the time we actually start doing any GC, and since
320 we're holding the alloc_sem, no other garbage collection
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 can happen.
322 */
323 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
324 ic->ino, ic->state);
325 up(&c->alloc_sem);
326 spin_unlock(&c->inocache_lock);
327 BUG();
328
329 case INO_STATE_READING:
330 /* Someone's currently trying to read it. We must wait for
331 them to finish and then go through the full iget() route
332 to do the GC. However, sometimes read_inode() needs to get
333 the alloc_sem() (for marking nodes invalid) so we must
334 drop the alloc_sem before sleeping. */
335
336 up(&c->alloc_sem);
337 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
338 ic->ino, ic->state));
339 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000340 /* And because we dropped the alloc_sem we must start again from the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 beginning. Ponder chance of livelock here -- we're returning success
342 without actually making any progress.
343
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000344 Q: What are the chances that the inode is back in INO_STATE_READING
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 again by the time we next enter this function? And that this happens
346 enough times to cause a real delay?
347
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000348 A: Small enough that I don't care :)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 */
350 return 0;
351 }
352
353 /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000354 node intact, and we don't have to muck about with the fragtree etc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 because we know it's not in-core. If it _was_ in-core, we go through
356 all the iget() crap anyway */
357
358 if (ic->state == INO_STATE_GC) {
359 spin_unlock(&c->inocache_lock);
360
361 ret = jffs2_garbage_collect_pristine(c, ic, raw);
362
363 spin_lock(&c->inocache_lock);
364 ic->state = INO_STATE_CHECKEDABSENT;
365 wake_up(&c->inocache_wq);
366
367 if (ret != -EBADFD) {
368 spin_unlock(&c->inocache_lock);
369 goto release_sem;
370 }
371
372 /* Fall through if it wanted us to, with inocache_lock held */
373 }
374
375 /* Prevent the fairly unlikely race where the gcblock is
376 entirely obsoleted by the final close of a file which had
377 the only valid nodes in the block, followed by erasure,
378 followed by freeing of the ic because the erased block(s)
379 held _all_ the nodes of that inode.... never been seen but
380 it's vaguely possible. */
381
382 inum = ic->ino;
383 nlink = ic->nlink;
384 spin_unlock(&c->inocache_lock);
385
386 f = jffs2_gc_fetch_inode(c, inum, nlink);
387 if (IS_ERR(f)) {
388 ret = PTR_ERR(f);
389 goto release_sem;
390 }
391 if (!f) {
392 ret = 0;
393 goto release_sem;
394 }
395
396 ret = jffs2_garbage_collect_live(c, jeb, raw, f);
397
398 jffs2_gc_release_inode(c, f);
399
400 release_sem:
401 up(&c->alloc_sem);
402
403 eraseit_lock:
404 /* If we've finished this block, start it erasing */
405 spin_lock(&c->erase_completion_lock);
406
407 eraseit:
408 if (c->gcblock && !c->gcblock->used_size) {
409 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
410 /* We're GC'ing an empty block? */
411 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
412 c->gcblock = NULL;
413 c->nr_erasing_blocks++;
414 jffs2_erase_pending_trigger(c);
415 }
416 spin_unlock(&c->erase_completion_lock);
417
418 return ret;
419}
420
421static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
422 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
423{
424 struct jffs2_node_frag *frag;
425 struct jffs2_full_dnode *fn = NULL;
426 struct jffs2_full_dirent *fd;
427 uint32_t start = 0, end = 0, nrfrags = 0;
428 int ret = 0;
429
430 down(&f->sem);
431
432 /* Now we have the lock for this inode. Check that it's still the one at the head
433 of the list. */
434
435 spin_lock(&c->erase_completion_lock);
436
437 if (c->gcblock != jeb) {
438 spin_unlock(&c->erase_completion_lock);
439 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
440 goto upnout;
441 }
442 if (ref_obsolete(raw)) {
443 spin_unlock(&c->erase_completion_lock);
444 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
445 /* They'll call again */
446 goto upnout;
447 }
448 spin_unlock(&c->erase_completion_lock);
449
450 /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
451 if (f->metadata && f->metadata->raw == raw) {
452 fn = f->metadata;
453 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
454 goto upnout;
455 }
456
457 /* FIXME. Read node and do lookup? */
458 for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
459 if (frag->node && frag->node->raw == raw) {
460 fn = frag->node;
461 end = frag->ofs + frag->size;
462 if (!nrfrags++)
463 start = frag->ofs;
464 if (nrfrags == frag->node->frags)
465 break; /* We've found them all */
466 }
467 }
468 if (fn) {
469 if (ref_flags(raw) == REF_PRISTINE) {
470 ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
471 if (!ret) {
472 /* Urgh. Return it sensibly. */
473 frag->node->raw = f->inocache->nodes;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000474 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 if (ret != -EBADFD)
476 goto upnout;
477 }
478 /* We found a datanode. Do the GC */
479 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
480 /* It crosses a page boundary. Therefore, it must be a hole. */
481 ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
482 } else {
483 /* It could still be a hole. But we GC the page this way anyway */
484 ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
485 }
486 goto upnout;
487 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000488
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 /* Wasn't a dnode. Try dirent */
490 for (fd = f->dents; fd; fd=fd->next) {
491 if (fd->raw == raw)
492 break;
493 }
494
495 if (fd && fd->ino) {
496 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
497 } else if (fd) {
498 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
499 } else {
500 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
501 ref_offset(raw), f->inocache->ino);
502 if (ref_obsolete(raw)) {
503 printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
504 } else {
Artem B. Bityutskiye0c8e422005-07-24 16:14:17 +0100505 jffs2_dbg_dump_node(c, ref_offset(raw));
506 BUG();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 }
508 }
509 upnout:
510 up(&f->sem);
511
512 return ret;
513}
514
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000515static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 struct jffs2_inode_cache *ic,
517 struct jffs2_raw_node_ref *raw)
518{
519 union jffs2_node_union *node;
520 struct jffs2_raw_node_ref *nraw;
521 size_t retlen;
522 int ret;
523 uint32_t phys_ofs, alloclen;
524 uint32_t crc, rawlen;
525 int retried = 0;
526
527 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
528
529 rawlen = ref_totlen(c, c->gcblock, raw);
530
531 /* Ask for a small amount of space (or the totlen if smaller) because we
532 don't want to force wastage of the end of a block if splitting would
533 work. */
Ferenc Havasie631ddb2005-09-07 09:35:26 +0100534 ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) +
535 JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen);
536 /* this is not the exact summary size of it,
537 it is only an upper estimation */
538
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 if (ret)
540 return ret;
541
542 if (alloclen < rawlen) {
543 /* Doesn't fit untouched. We'll go the old route and split it */
544 return -EBADFD;
545 }
546
547 node = kmalloc(rawlen, GFP_KERNEL);
548 if (!node)
549 return -ENOMEM;
550
551 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
552 if (!ret && retlen != rawlen)
553 ret = -EIO;
554 if (ret)
555 goto out_node;
556
557 crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
558 if (je32_to_cpu(node->u.hdr_crc) != crc) {
559 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
560 ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
561 goto bail;
562 }
563
564 switch(je16_to_cpu(node->u.nodetype)) {
565 case JFFS2_NODETYPE_INODE:
566 crc = crc32(0, node, sizeof(node->i)-8);
567 if (je32_to_cpu(node->i.node_crc) != crc) {
568 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
569 ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
570 goto bail;
571 }
572
573 if (je32_to_cpu(node->i.dsize)) {
574 crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
575 if (je32_to_cpu(node->i.data_crc) != crc) {
576 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
577 ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
578 goto bail;
579 }
580 }
581 break;
582
583 case JFFS2_NODETYPE_DIRENT:
584 crc = crc32(0, node, sizeof(node->d)-8);
585 if (je32_to_cpu(node->d.node_crc) != crc) {
586 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
587 ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
588 goto bail;
589 }
590
591 if (node->d.nsize) {
592 crc = crc32(0, node->d.name, node->d.nsize);
593 if (je32_to_cpu(node->d.name_crc) != crc) {
594 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
595 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
596 goto bail;
597 }
598 }
599 break;
600 default:
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000601 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 ref_offset(raw), je16_to_cpu(node->u.nodetype));
603 goto bail;
604 }
605
606 nraw = jffs2_alloc_raw_node_ref();
607 if (!nraw) {
608 ret = -ENOMEM;
609 goto out_node;
610 }
611
612 /* OK, all the CRCs are good; this node can just be copied as-is. */
613 retry:
614 nraw->flash_offset = phys_ofs;
615 nraw->__totlen = rawlen;
616 nraw->next_phys = NULL;
617
618 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
619
620 if (ret || (retlen != rawlen)) {
621 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
622 rawlen, phys_ofs, ret, retlen);
623 if (retlen) {
624 /* Doesn't belong to any inode */
625 nraw->next_in_ino = NULL;
626
627 nraw->flash_offset |= REF_OBSOLETE;
628 jffs2_add_physical_node_ref(c, nraw);
629 jffs2_mark_node_obsolete(c, nraw);
630 } else {
631 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
632 jffs2_free_raw_node_ref(nraw);
633 }
634 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
635 /* Try to reallocate space and retry */
636 uint32_t dummy;
637 struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
638
639 retried = 1;
640
641 D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000642
Artem B. Bityutskiy730554d2005-07-17 07:56:26 +0100643 jffs2_dbg_acct_sanity_check(c,jeb);
644 jffs2_dbg_acct_paranoia_check(c, jeb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
Ferenc Havasie631ddb2005-09-07 09:35:26 +0100646 ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen);
647 /* this is not the exact summary size of it,
648 it is only an upper estimation */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649
650 if (!ret) {
651 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
652
Artem B. Bityutskiy730554d2005-07-17 07:56:26 +0100653 jffs2_dbg_acct_sanity_check(c,jeb);
654 jffs2_dbg_acct_paranoia_check(c, jeb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
656 goto retry;
657 }
658 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
659 jffs2_free_raw_node_ref(nraw);
660 }
661
662 jffs2_free_raw_node_ref(nraw);
663 if (!ret)
664 ret = -EIO;
665 goto out_node;
666 }
667 nraw->flash_offset |= REF_PRISTINE;
668 jffs2_add_physical_node_ref(c, nraw);
669
670 /* Link into per-inode list. This is safe because of the ic
671 state being INO_STATE_GC. Note that if we're doing this
672 for an inode which is in-core, the 'nraw' pointer is then
673 going to be fetched from ic->nodes by our caller. */
674 spin_lock(&c->erase_completion_lock);
675 nraw->next_in_ino = ic->nodes;
676 ic->nodes = nraw;
677 spin_unlock(&c->erase_completion_lock);
678
679 jffs2_mark_node_obsolete(c, raw);
680 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
681
682 out_node:
683 kfree(node);
684 return ret;
685 bail:
686 ret = -EBADFD;
687 goto out_node;
688}
689
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000690static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
692{
693 struct jffs2_full_dnode *new_fn;
694 struct jffs2_raw_inode ri;
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100695 struct jffs2_node_frag *last_frag;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 jint16_t dev;
697 char *mdata = NULL, mdatalen = 0;
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100698 uint32_t alloclen, phys_ofs, ilen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 int ret;
700
701 if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
702 S_ISCHR(JFFS2_F_I_MODE(f)) ) {
703 /* For these, we don't actually need to read the old node */
704 /* FIXME: for minor or major > 255. */
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000705 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 JFFS2_F_I_RDEV_MIN(f)));
707 mdata = (char *)&dev;
708 mdatalen = sizeof(dev);
709 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
710 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
711 mdatalen = fn->size;
712 mdata = kmalloc(fn->size, GFP_KERNEL);
713 if (!mdata) {
714 printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
715 return -ENOMEM;
716 }
717 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
718 if (ret) {
719 printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
720 kfree(mdata);
721 return ret;
722 }
723 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
724
725 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000726
Ferenc Havasie631ddb2005-09-07 09:35:26 +0100727 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen,
728 JFFS2_SUMMARY_INODE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 if (ret) {
730 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
731 sizeof(ri)+ mdatalen, ret);
732 goto out;
733 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000734
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100735 last_frag = frag_last(&f->fragtree);
736 if (last_frag)
737 /* Fetch the inode length from the fragtree rather then
738 * from i_size since i_size may have not been updated yet */
739 ilen = last_frag->ofs + last_frag->size;
740 else
741 ilen = JFFS2_F_I_SIZE(f);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000742
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 memset(&ri, 0, sizeof(ri));
744 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
745 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
746 ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
747 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
748
749 ri.ino = cpu_to_je32(f->inocache->ino);
750 ri.version = cpu_to_je32(++f->highest_version);
751 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
752 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
753 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100754 ri.isize = cpu_to_je32(ilen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
756 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
757 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
758 ri.offset = cpu_to_je32(0);
759 ri.csize = cpu_to_je32(mdatalen);
760 ri.dsize = cpu_to_je32(mdatalen);
761 ri.compr = JFFS2_COMPR_NONE;
762 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
763 ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
764
765 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
766
767 if (IS_ERR(new_fn)) {
768 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
769 ret = PTR_ERR(new_fn);
770 goto out;
771 }
772 jffs2_mark_node_obsolete(c, fn->raw);
773 jffs2_free_full_dnode(fn);
774 f->metadata = new_fn;
775 out:
776 if (S_ISLNK(JFFS2_F_I_MODE(f)))
777 kfree(mdata);
778 return ret;
779}
780
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000781static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
783{
784 struct jffs2_full_dirent *new_fd;
785 struct jffs2_raw_dirent rd;
786 uint32_t alloclen, phys_ofs;
787 int ret;
788
789 rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
790 rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
791 rd.nsize = strlen(fd->name);
792 rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
793 rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
794
795 rd.pino = cpu_to_je32(f->inocache->ino);
796 rd.version = cpu_to_je32(++f->highest_version);
797 rd.ino = cpu_to_je32(fd->ino);
Artem B. Bityutskiy3a69e0c2005-08-17 14:46:26 +0100798 /* If the times on this inode were set by explicit utime() they can be different,
799 so refrain from splatting them. */
800 if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
801 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000802 else
Artem B. Bityutskiy3a69e0c2005-08-17 14:46:26 +0100803 rd.mctime = cpu_to_je32(0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 rd.type = fd->type;
805 rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
806 rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000807
Ferenc Havasie631ddb2005-09-07 09:35:26 +0100808 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen,
809 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 if (ret) {
811 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
812 sizeof(rd)+rd.nsize, ret);
813 return ret;
814 }
815 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
816
817 if (IS_ERR(new_fd)) {
818 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
819 return PTR_ERR(new_fd);
820 }
821 jffs2_add_fd_to_list(c, new_fd, &f->dents);
822 return 0;
823}
824
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000825static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
827{
828 struct jffs2_full_dirent **fdp = &f->dents;
829 int found = 0;
830
831 /* On a medium where we can't actually mark nodes obsolete
832 pernamently, such as NAND flash, we need to work out
833 whether this deletion dirent is still needed to actively
834 delete a 'real' dirent with the same name that's still
835 somewhere else on the flash. */
836 if (!jffs2_can_mark_obsolete(c)) {
837 struct jffs2_raw_dirent *rd;
838 struct jffs2_raw_node_ref *raw;
839 int ret;
840 size_t retlen;
841 int name_len = strlen(fd->name);
842 uint32_t name_crc = crc32(0, fd->name, name_len);
843 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
844
845 rd = kmalloc(rawlen, GFP_KERNEL);
846 if (!rd)
847 return -ENOMEM;
848
849 /* Prevent the erase code from nicking the obsolete node refs while
850 we're looking at them. I really don't like this extra lock but
851 can't see any alternative. Suggestions on a postcard to... */
852 down(&c->erase_free_sem);
853
854 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
855
856 /* We only care about obsolete ones */
857 if (!(ref_obsolete(raw)))
858 continue;
859
860 /* Any dirent with the same name is going to have the same length... */
861 if (ref_totlen(c, NULL, raw) != rawlen)
862 continue;
863
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000864 /* Doesn't matter if there's one in the same erase block. We're going to
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 delete it too at the same time. */
Andrew Victor3be36672005-02-09 09:09:05 +0000866 if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 continue;
868
869 D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
870
871 /* This is an obsolete node belonging to the same directory, and it's of the right
872 length. We need to take a closer look...*/
873 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
874 if (ret) {
875 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
876 /* If we can't read it, we don't need to continue to obsolete it. Continue */
877 continue;
878 }
879 if (retlen != rawlen) {
880 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
881 retlen, rawlen, ref_offset(raw));
882 continue;
883 }
884
885 if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
886 continue;
887
888 /* If the name CRC doesn't match, skip */
889 if (je32_to_cpu(rd->name_crc) != name_crc)
890 continue;
891
892 /* If the name length doesn't match, or it's another deletion dirent, skip */
893 if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
894 continue;
895
896 /* OK, check the actual name now */
897 if (memcmp(rd->name, fd->name, name_len))
898 continue;
899
900 /* OK. The name really does match. There really is still an older node on
901 the flash which our deletion dirent obsoletes. So we have to write out
902 a new deletion dirent to replace it */
903 up(&c->erase_free_sem);
904
905 D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
906 ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
907 kfree(rd);
908
909 return jffs2_garbage_collect_dirent(c, jeb, f, fd);
910 }
911
912 up(&c->erase_free_sem);
913 kfree(rd);
914 }
915
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000916 /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
Artem B. Bityutskiy3a69e0c2005-08-17 14:46:26 +0100917 we should update the metadata node with those times accordingly */
918
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 /* No need for it any more. Just mark it obsolete and remove it from the list */
920 while (*fdp) {
921 if ((*fdp) == fd) {
922 found = 1;
923 *fdp = fd->next;
924 break;
925 }
926 fdp = &(*fdp)->next;
927 }
928 if (!found) {
929 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
930 }
931 jffs2_mark_node_obsolete(c, fd->raw);
932 jffs2_free_full_dirent(fd);
933 return 0;
934}
935
936static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
937 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
938 uint32_t start, uint32_t end)
939{
940 struct jffs2_raw_inode ri;
941 struct jffs2_node_frag *frag;
942 struct jffs2_full_dnode *new_fn;
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +0100943 uint32_t alloclen, phys_ofs, ilen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 int ret;
945
946 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
947 f->inocache->ino, start, end));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000948
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 memset(&ri, 0, sizeof(ri));
950
951 if(fn->frags > 1) {
952 size_t readlen;
953 uint32_t crc;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000954 /* It's partially obsoleted by a later write. So we have to
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 write it out again with the _same_ version as before */
956 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
957 if (readlen != sizeof(ri) || ret) {
958 printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
959 goto fill;
960 }
961 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
962 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
963 ref_offset(fn->raw),
964 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
965 return -EIO;
966 }
967 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
968 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
969 ref_offset(fn->raw),
970 je32_to_cpu(ri.totlen), sizeof(ri));
971 return -EIO;
972 }
973 crc = crc32(0, &ri, sizeof(ri)-8);
974 if (crc != je32_to_cpu(ri.node_crc)) {
975 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000976 ref_offset(fn->raw),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 je32_to_cpu(ri.node_crc), crc);
978 /* FIXME: We could possibly deal with this by writing new holes for each frag */
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000979 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 start, end, f->inocache->ino);
981 goto fill;
982 }
983 if (ri.compr != JFFS2_COMPR_ZERO) {
984 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +0000985 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 start, end, f->inocache->ino);
987 goto fill;
988 }
989 } else {
990 fill:
991 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
992 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
993 ri.totlen = cpu_to_je32(sizeof(ri));
994 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
995
996 ri.ino = cpu_to_je32(f->inocache->ino);
997 ri.version = cpu_to_je32(++f->highest_version);
998 ri.offset = cpu_to_je32(start);
999 ri.dsize = cpu_to_je32(end - start);
1000 ri.csize = cpu_to_je32(0);
1001 ri.compr = JFFS2_COMPR_ZERO;
1002 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001003
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +01001004 frag = frag_last(&f->fragtree);
1005 if (frag)
1006 /* Fetch the inode length from the fragtree rather then
1007 * from i_size since i_size may have not been updated yet */
1008 ilen = frag->ofs + frag->size;
1009 else
1010 ilen = JFFS2_F_I_SIZE(f);
1011
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1013 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1014 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +01001015 ri.isize = cpu_to_je32(ilen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1017 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1018 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1019 ri.data_crc = cpu_to_je32(0);
1020 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1021
Ferenc Havasie631ddb2005-09-07 09:35:26 +01001022 ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen,
1023 JFFS2_SUMMARY_INODE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 if (ret) {
1025 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1026 sizeof(ri), ret);
1027 return ret;
1028 }
1029 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
1030
1031 if (IS_ERR(new_fn)) {
1032 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1033 return PTR_ERR(new_fn);
1034 }
1035 if (je32_to_cpu(ri.version) == f->highest_version) {
1036 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1037 if (f->metadata) {
1038 jffs2_mark_node_obsolete(c, f->metadata->raw);
1039 jffs2_free_full_dnode(f->metadata);
1040 f->metadata = NULL;
1041 }
1042 return 0;
1043 }
1044
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001045 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 * We should only get here in the case where the node we are
1047 * replacing had more than one frag, so we kept the same version
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001048 * number as before. (Except in case of error -- see 'goto fill;'
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 * above.)
1050 */
1051 D1(if(unlikely(fn->frags <= 1)) {
1052 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1053 fn->frags, je32_to_cpu(ri.version), f->highest_version,
1054 je32_to_cpu(ri.ino));
1055 });
1056
1057 /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1058 mark_ref_normal(new_fn->raw);
1059
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001060 for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 frag; frag = frag_next(frag)) {
1062 if (frag->ofs > fn->size + fn->ofs)
1063 break;
1064 if (frag->node == fn) {
1065 frag->node = new_fn;
1066 new_fn->frags++;
1067 fn->frags--;
1068 }
1069 }
1070 if (fn->frags) {
1071 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1072 BUG();
1073 }
1074 if (!new_fn->frags) {
1075 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1076 BUG();
1077 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001078
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 jffs2_mark_node_obsolete(c, fn->raw);
1080 jffs2_free_full_dnode(fn);
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001081
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082 return 0;
1083}
1084
1085static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1086 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1087 uint32_t start, uint32_t end)
1088{
1089 struct jffs2_full_dnode *new_fn;
1090 struct jffs2_raw_inode ri;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001091 uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 int ret = 0;
1093 unsigned char *comprbuf = NULL, *writebuf;
1094 unsigned long pg;
1095 unsigned char *pg_ptr;
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001096
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 memset(&ri, 0, sizeof(ri));
1098
1099 D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1100 f->inocache->ino, start, end));
1101
1102 orig_end = end;
1103 orig_start = start;
1104
1105 if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1106 /* Attempt to do some merging. But only expand to cover logically
1107 adjacent frags if the block containing them is already considered
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001108 to be dirty. Otherwise we end up with GC just going round in
1109 circles dirtying the nodes it already wrote out, especially
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 on NAND where we have small eraseblocks and hence a much higher
1111 chance of nodes having to be split to cross boundaries. */
1112
1113 struct jffs2_node_frag *frag;
1114 uint32_t min, max;
1115
1116 min = start & ~(PAGE_CACHE_SIZE-1);
1117 max = min + PAGE_CACHE_SIZE;
1118
1119 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1120
1121 /* BUG_ON(!frag) but that'll happen anyway... */
1122
1123 BUG_ON(frag->ofs != start);
1124
1125 /* First grow down... */
1126 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1127
1128 /* If the previous frag doesn't even reach the beginning, there's
1129 excessive fragmentation. Just merge. */
1130 if (frag->ofs > min) {
1131 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1132 frag->ofs, frag->ofs+frag->size));
1133 start = frag->ofs;
1134 continue;
1135 }
1136 /* OK. This frag holds the first byte of the page. */
1137 if (!frag->node || !frag->node->raw) {
1138 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1139 frag->ofs, frag->ofs+frag->size));
1140 break;
1141 } else {
1142
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001143 /* OK, it's a frag which extends to the beginning of the page. Does it live
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 in a block which is still considered clean? If so, don't obsolete it.
1145 If not, cover it anyway. */
1146
1147 struct jffs2_raw_node_ref *raw = frag->node->raw;
1148 struct jffs2_eraseblock *jeb;
1149
1150 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1151
1152 if (jeb == c->gcblock) {
1153 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1154 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1155 start = frag->ofs;
1156 break;
1157 }
1158 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1159 D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1160 frag->ofs, frag->ofs+frag->size, jeb->offset));
1161 break;
1162 }
1163
1164 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1165 frag->ofs, frag->ofs+frag->size, jeb->offset));
1166 start = frag->ofs;
1167 break;
1168 }
1169 }
1170
1171 /* ... then up */
1172
1173 /* Find last frag which is actually part of the node we're to GC. */
1174 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1175
1176 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1177
1178 /* If the previous frag doesn't even reach the beginning, there's lots
1179 of fragmentation. Just merge. */
1180 if (frag->ofs+frag->size < max) {
1181 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1182 frag->ofs, frag->ofs+frag->size));
1183 end = frag->ofs + frag->size;
1184 continue;
1185 }
1186
1187 if (!frag->node || !frag->node->raw) {
1188 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1189 frag->ofs, frag->ofs+frag->size));
1190 break;
1191 } else {
1192
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001193 /* OK, it's a frag which extends to the beginning of the page. Does it live
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 in a block which is still considered clean? If so, don't obsolete it.
1195 If not, cover it anyway. */
1196
1197 struct jffs2_raw_node_ref *raw = frag->node->raw;
1198 struct jffs2_eraseblock *jeb;
1199
1200 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1201
1202 if (jeb == c->gcblock) {
1203 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1204 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1205 end = frag->ofs + frag->size;
1206 break;
1207 }
1208 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1209 D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1210 frag->ofs, frag->ofs+frag->size, jeb->offset));
1211 break;
1212 }
1213
1214 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1215 frag->ofs, frag->ofs+frag->size, jeb->offset));
1216 end = frag->ofs + frag->size;
1217 break;
1218 }
1219 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001220 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221 orig_start, orig_end, start, end));
1222
Artem B. Bityuckiy8557fd52005-04-09 11:47:03 +01001223 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 BUG_ON(end < orig_end);
1225 BUG_ON(start > orig_start);
1226 }
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001227
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 /* First, use readpage() to read the appropriate page into the page cache */
1229 /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1230 * triggered garbage collection in the first place?
1231 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1232 * page OK. We'll actually write it out again in commit_write, which is a little
1233 * suboptimal, but at least we're correct.
1234 */
1235 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1236
1237 if (IS_ERR(pg_ptr)) {
1238 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1239 return PTR_ERR(pg_ptr);
1240 }
1241
1242 offset = start;
1243 while(offset < orig_end) {
1244 uint32_t datalen;
1245 uint32_t cdatalen;
1246 uint16_t comprtype = JFFS2_COMPR_NONE;
1247
Ferenc Havasie631ddb2005-09-07 09:35:26 +01001248 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs,
1249 &alloclen, JFFS2_SUMMARY_INODE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250
1251 if (ret) {
1252 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1253 sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1254 break;
1255 }
1256 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1257 datalen = end - offset;
1258
1259 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1260
1261 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1262
1263 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1264 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1265 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1266 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1267
1268 ri.ino = cpu_to_je32(f->inocache->ino);
1269 ri.version = cpu_to_je32(++f->highest_version);
1270 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1271 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1272 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1273 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1274 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1275 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1276 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1277 ri.offset = cpu_to_je32(offset);
1278 ri.csize = cpu_to_je32(cdatalen);
1279 ri.dsize = cpu_to_je32(datalen);
1280 ri.compr = comprtype & 0xff;
1281 ri.usercompr = (comprtype >> 8) & 0xff;
1282 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1283 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
Thomas Gleixner182ec4e2005-11-07 11:16:07 +00001284
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1286
1287 jffs2_free_comprbuf(comprbuf, writebuf);
1288
1289 if (IS_ERR(new_fn)) {
1290 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1291 ret = PTR_ERR(new_fn);
1292 break;
1293 }
1294 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1295 offset += datalen;
1296 if (f->metadata) {
1297 jffs2_mark_node_obsolete(c, f->metadata->raw);
1298 jffs2_free_full_dnode(f->metadata);
1299 f->metadata = NULL;
1300 }
1301 }
1302
1303 jffs2_gc_release_page(c, pg_ptr, &pg);
1304 return ret;
1305}