Blame - fs/reiserfs/journal.c - kernel/msm-4.9

blob: c9ad3a7849f413838cbd2465be4fdcf291411e5e [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	** Write ahead logging implementation copyright Chris Mason 2000
				3	**
				4	** The background commits make this code very interelated, and
				5	** overly complex. I need to rethink things a bit....The major players:
				6	**
				7	** journal_begin -- call with the number of blocks you expect to log.
				8	** If the current transaction is too
				9	** old, it will block until the current transaction is
				10	** finished, and then start a new one.
				11	** Usually, your transaction will get joined in with
				12	** previous ones for speed.
				13	**
				14	** journal_join -- same as journal_begin, but won't block on the current
				15	** transaction regardless of age. Don't ever call
				16	** this. Ever. There are only two places it should be
				17	** called from, and they are both inside this file.
				18	**
				19	** journal_mark_dirty -- adds blocks into this transaction. clears any flags
				20	** that might make them get sent to disk
				21	** and then marks them BH_JDirty. Puts the buffer head
				22	** into the current transaction hash.
				23	**
				24	** journal_end -- if the current transaction is batchable, it does nothing
				25	** otherwise, it could do an async/synchronous commit, or
				26	** a full flush of all log and real blocks in the
				27	** transaction.
				28	**
				29	** flush_old_commits -- if the current transaction is too old, it is ended and
				30	** commit blocks are sent to disk. Forces commit blocks
				31	** to disk for all backgrounded commits that have been
				32	** around too long.
				33	** -- Note, if you call this as an immediate flush from
				34	** from within kupdate, it will ignore the immediate flag
				35	*/
				36
				37	#include <linux/config.h>
				38	#include <asm/uaccess.h>
				39	#include <asm/system.h>
				40
				41	#include <linux/time.h>
				42	#include <asm/semaphore.h>
				43
				44	#include <linux/vmalloc.h>
				45	#include <linux/reiserfs_fs.h>
				46
				47	#include <linux/kernel.h>
				48	#include <linux/errno.h>
				49	#include <linux/fcntl.h>
				50	#include <linux/stat.h>
				51	#include <linux/string.h>
				52	#include <linux/smp_lock.h>
				53	#include <linux/buffer_head.h>
				54	#include <linux/workqueue.h>
				55	#include <linux/writeback.h>
				56	#include <linux/blkdev.h>
				57
				58
				59	/* gets a struct reiserfs_journal_list * from a list head */
				60	#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
				61	j_list))
				62	#define JOURNAL_WORK_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
				63	j_working_list))
				64
				65	/* the number of mounted filesystems. This is used to decide when to
				66	** start and kill the commit workqueue
				67	*/
				68	static int reiserfs_mounted_fs_count;
				69
				70	static struct workqueue_struct *commit_wq;
				71
				72	#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit
				73	structs at 4k */
				74	#define BUFNR 64 /read ahead /
				75
				76	/* cnode stat bits. Move these into reiserfs_fs.h */
				77
				78	#define BLOCK_FREED 2 /* this block was freed, and can't be written. */
				79	#define BLOCK_FREED_HOLDER 3 /* this block was freed during this transaction, and can't be written */
				80
				81	#define BLOCK_NEEDS_FLUSH 4 /* used in flush_journal_list */
				82	#define BLOCK_DIRTIED 5
				83
				84
				85	/* journal list state bits */
				86	#define LIST_TOUCHED 1
				87	#define LIST_DIRTY 2
				88	#define LIST_COMMIT_PENDING 4 /* someone will commit this list */
				89
				90	/* flags for do_journal_end */
				91	#define FLUSH_ALL 1 /* flush commit and real blocks */
				92	#define COMMIT_NOW 2 /* end and commit this transaction */
				93	#define WAIT 4 /* wait for the log blocks to hit the disk*/
				94
				95	static int do_journal_end(struct reiserfs_transaction_handle ,struct super_block ,unsigned long nblocks,int flags) ;
				96	static int flush_journal_list(struct super_block s, struct reiserfs_journal_list jl, int flushall) ;
				97	static int flush_commit_list(struct super_block s, struct reiserfs_journal_list jl, int flushall) ;
				98	static int can_dirty(struct reiserfs_journal_cnode *cn) ;
				99	static int journal_join(struct reiserfs_transaction_handle th, struct super_block p_s_sb, unsigned long nblocks);
				100	static int release_journal_dev( struct super_block *super,
				101	struct reiserfs_journal *journal );
				102	static int dirty_one_transaction(struct super_block *s,
				103	struct reiserfs_journal_list *jl);
				104	static void flush_async_commits(void *p);
				105	static void queue_log_writer(struct super_block *s);
				106
				107	/* values for join in do_journal_begin_r */
				108	enum {
				109	JBEGIN_REG = 0, /* regular journal begin */
				110	JBEGIN_JOIN = 1, /* join the running transaction if at all possible */
				111	JBEGIN_ABORT = 2, /* called from cleanup code, ignores aborted flag */
				112	};
				113
				114	static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
				115	struct super_block * p_s_sb,
				116	unsigned long nblocks,int join);
				117
				118	static void init_journal_hash(struct super_block *p_s_sb) {
				119	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				120	memset(journal->j_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ;
				121	}
				122
				123	/*
				124	** clears BH_Dirty and sticks the buffer on the clean list. Called because I can't allow refile_buffer to
				125	** make schedule happen after I've freed a block. Look at remove_from_transaction and journal_mark_freed for
				126	** more details.
				127	*/
				128	static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
				129	if (bh) {
				130	clear_buffer_dirty(bh);
				131	clear_buffer_journal_test(bh);
				132	}
				133	return 0 ;
				134	}
				135
				136	static void disable_barrier(struct super_block *s)
				137	{
				138	REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH);
				139	printk("reiserfs: disabling flush barriers on %s\n", reiserfs_bdevname(s));
				140	}
				141
				142	static struct reiserfs_bitmap_node *
				143	allocate_bitmap_node(struct super_block *p_s_sb) {
				144	struct reiserfs_bitmap_node *bn ;
				145	static int id;
				146
				147	bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS, p_s_sb) ;
				148	if (!bn) {
				149	return NULL ;
				150	}
				151	bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb) ;
				152	if (!bn->data) {
				153	reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
				154	return NULL ;
				155	}
				156	bn->id = id++ ;
				157	memset(bn->data, 0, p_s_sb->s_blocksize) ;
				158	INIT_LIST_HEAD(&bn->list) ;
				159	return bn ;
				160	}
				161
				162	static struct reiserfs_bitmap_node *
				163	get_bitmap_node(struct super_block *p_s_sb) {
				164	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				165	struct reiserfs_bitmap_node *bn = NULL;
				166	struct list_head *entry = journal->j_bitmap_nodes.next ;
				167
				168	journal->j_used_bitmap_nodes++ ;
				169	repeat:
				170
				171	if(entry != &journal->j_bitmap_nodes) {
				172	bn = list_entry(entry, struct reiserfs_bitmap_node, list) ;
				173	list_del(entry) ;
				174	memset(bn->data, 0, p_s_sb->s_blocksize) ;
				175	journal->j_free_bitmap_nodes-- ;
				176	return bn ;
				177	}
				178	bn = allocate_bitmap_node(p_s_sb) ;
				179	if (!bn) {
				180	yield();
				181	goto repeat ;
				182	}
				183	return bn ;
				184	}
				185	static inline void free_bitmap_node(struct super_block *p_s_sb,
				186	struct reiserfs_bitmap_node *bn) {
				187	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				188	journal->j_used_bitmap_nodes-- ;
				189	if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
				190	reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ;
				191	reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
				192	} else {
				193	list_add(&bn->list, &journal->j_bitmap_nodes) ;
				194	journal->j_free_bitmap_nodes++ ;
				195	}
				196	}
				197
				198	static void allocate_bitmap_nodes(struct super_block *p_s_sb) {
				199	int i ;
				200	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				201	struct reiserfs_bitmap_node *bn = NULL ;
				202	for (i = 0 ; i < REISERFS_MIN_BITMAP_NODES ; i++) {
				203	bn = allocate_bitmap_node(p_s_sb) ;
				204	if (bn) {
				205	list_add(&bn->list, &journal->j_bitmap_nodes) ;
				206	journal->j_free_bitmap_nodes++ ;
				207	} else {
				208	break ; // this is ok, we'll try again when more are needed
				209	}
				210	}
				211	}
				212
				213	static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block,
				214	struct reiserfs_list_bitmap *jb) {
				215	int bmap_nr = block / (p_s_sb->s_blocksize << 3) ;
				216	int bit_nr = block % (p_s_sb->s_blocksize << 3) ;
				217
				218	if (!jb->bitmaps[bmap_nr]) {
				219	jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb) ;
				220	}
				221	set_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data) ;
				222	return 0 ;
				223	}
				224
				225	static void cleanup_bitmap_list(struct super_block *p_s_sb,
				226	struct reiserfs_list_bitmap *jb) {
				227	int i;
				228	if (jb->bitmaps == NULL)
				229	return;
				230
				231	for (i = 0 ; i < SB_BMAP_NR(p_s_sb) ; i++) {
				232	if (jb->bitmaps[i]) {
				233	free_bitmap_node(p_s_sb, jb->bitmaps[i]) ;
				234	jb->bitmaps[i] = NULL ;
				235	}
				236	}
				237	}
				238
				239	/*
				240	** only call this on FS unmount.
				241	*/
				242	static int free_list_bitmaps(struct super_block *p_s_sb,
				243	struct reiserfs_list_bitmap *jb_array) {
				244	int i ;
				245	struct reiserfs_list_bitmap *jb ;
				246	for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
				247	jb = jb_array + i ;
				248	jb->journal_list = NULL ;
				249	cleanup_bitmap_list(p_s_sb, jb) ;
				250	vfree(jb->bitmaps) ;
				251	jb->bitmaps = NULL ;
				252	}
				253	return 0;
				254	}
				255
				256	static int free_bitmap_nodes(struct super_block *p_s_sb) {
				257	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				258	struct list_head *next = journal->j_bitmap_nodes.next ;
				259	struct reiserfs_bitmap_node *bn ;
				260
				261	while(next != &journal->j_bitmap_nodes) {
				262	bn = list_entry(next, struct reiserfs_bitmap_node, list) ;
				263	list_del(next) ;
				264	reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb) ;
				265	reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb) ;
				266	next = journal->j_bitmap_nodes.next ;
				267	journal->j_free_bitmap_nodes-- ;
				268	}
				269
				270	return 0 ;
				271	}
				272
				273	/*
				274	** get memory for JOURNAL_NUM_BITMAPS worth of bitmaps.
				275	** jb_array is the array to be filled in.
				276	*/
				277	int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb,
				278	struct reiserfs_list_bitmap *jb_array,
				279	int bmap_nr) {
				280	int i ;
				281	int failed = 0 ;
				282	struct reiserfs_list_bitmap *jb ;
				283	int mem = bmap_nr * sizeof(struct reiserfs_bitmap_node *) ;
				284
				285	for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
				286	jb = jb_array + i ;
				287	jb->journal_list = NULL ;
				288	jb->bitmaps = vmalloc( mem ) ;
				289	if (!jb->bitmaps) {
				290	reiserfs_warning(p_s_sb, "clm-2000, unable to allocate bitmaps for journal lists") ;
				291	failed = 1;
				292	break ;
				293	}
				294	memset(jb->bitmaps, 0, mem) ;
				295	}
				296	if (failed) {
				297	free_list_bitmaps(p_s_sb, jb_array) ;
				298	return -1 ;
				299	}
				300	return 0 ;
				301	}
				302
				303	/*
				304	** find an available list bitmap. If you can't find one, flush a commit list
				305	** and try again
				306	*/
				307	static struct reiserfs_list_bitmap *
				308	get_list_bitmap(struct super_block p_s_sb, struct reiserfs_journal_list jl) {
				309	int i,j ;
				310	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				311	struct reiserfs_list_bitmap *jb = NULL ;
				312
				313	for (j = 0 ; j < (JOURNAL_NUM_BITMAPS * 3) ; j++) {
				314	i = journal->j_list_bitmap_index ;
				315	journal->j_list_bitmap_index = (i + 1) % JOURNAL_NUM_BITMAPS ;
				316	jb = journal->j_list_bitmap + i ;
				317	if (journal->j_list_bitmap[i].journal_list) {
				318	flush_commit_list(p_s_sb, journal->j_list_bitmap[i].journal_list, 1) ;
				319	if (!journal->j_list_bitmap[i].journal_list) {
				320	break ;
				321	}
				322	} else {
				323	break ;
				324	}
				325	}
				326	if (jb->journal_list) { /* double check to make sure if flushed correctly */
				327	return NULL ;
				328	}
				329	jb->journal_list = jl ;
				330	return jb ;
				331	}
				332
				333	/*
				334	** allocates a new chunk of X nodes, and links them all together as a list.
				335	** Uses the cnode->next and cnode->prev pointers
				336	** returns NULL on failure
				337	*/
				338	static struct reiserfs_journal_cnode *allocate_cnodes(int num_cnodes) {
				339	struct reiserfs_journal_cnode *head ;
				340	int i ;
				341	if (num_cnodes <= 0) {
				342	return NULL ;
				343	}
				344	head = vmalloc(num_cnodes * sizeof(struct reiserfs_journal_cnode)) ;
				345	if (!head) {
				346	return NULL ;
				347	}
				348	memset(head, 0, num_cnodes * sizeof(struct reiserfs_journal_cnode)) ;
				349	head[0].prev = NULL ;
				350	head[0].next = head + 1 ;
				351	for (i = 1 ; i < num_cnodes; i++) {
				352	head[i].prev = head + (i - 1) ;
				353	head[i].next = head + (i + 1) ; /* if last one, overwrite it after the if */
				354	}
				355	head[num_cnodes -1].next = NULL ;
				356	return head ;
				357	}
				358
				359	/*
				360	** pulls a cnode off the free list, or returns NULL on failure
				361	*/
				362	static struct reiserfs_journal_cnode get_cnode(struct super_block p_s_sb) {
				363	struct reiserfs_journal_cnode *cn ;
				364	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				365
				366	reiserfs_check_lock_depth(p_s_sb, "get_cnode") ;
				367
				368	if (journal->j_cnode_free <= 0) {
				369	return NULL ;
				370	}
				371	journal->j_cnode_used++ ;
				372	journal->j_cnode_free-- ;
				373	cn = journal->j_cnode_free_list ;
				374	if (!cn) {
				375	return cn ;
				376	}
				377	if (cn->next) {
				378	cn->next->prev = NULL ;
				379	}
				380	journal->j_cnode_free_list = cn->next ;
				381	memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ;
				382	return cn ;
				383	}
				384
				385	/*
				386	** returns a cnode to the free list
				387	*/
				388	static void free_cnode(struct super_block p_s_sb, struct reiserfs_journal_cnode cn) {
				389	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				390
				391	reiserfs_check_lock_depth(p_s_sb, "free_cnode") ;
				392
				393	journal->j_cnode_used-- ;
				394	journal->j_cnode_free++ ;
				395	/* memset(cn, 0, sizeof(struct reiserfs_journal_cnode)) ; */
				396	cn->next = journal->j_cnode_free_list ;
				397	if (journal->j_cnode_free_list) {
				398	journal->j_cnode_free_list->prev = cn ;
				399	}
				400	cn->prev = NULL ; /* not needed with the memset, but I might kill the memset, and forget to do this */
				401	journal->j_cnode_free_list = cn ;
				402	}
				403
				404	static void clear_prepared_bits(struct buffer_head *bh) {
				405	clear_buffer_journal_prepared (bh);
				406	clear_buffer_journal_restore_dirty (bh);
				407	}
				408
				409	/* utility function to force a BUG if it is called without the big
				410	** kernel lock held. caller is the string printed just before calling BUG()
				411	*/
				412	void reiserfs_check_lock_depth(struct super_block sb, char caller) {
				413	#ifdef CONFIG_SMP
				414	if (current->lock_depth < 0) {
				415	reiserfs_panic (sb, "%s called without kernel lock held", caller) ;
				416	}
				417	#else
				418	;
				419	#endif
				420	}
				421
				422	/* return a cnode with same dev, block number and size in table, or null if not found */
				423	static inline struct reiserfs_journal_cnode *
				424	get_journal_hash_dev(struct super_block *sb,
				425	struct reiserfs_journal_cnode **table,
				426	long bl)
				427	{
				428	struct reiserfs_journal_cnode *cn ;
				429	cn = journal_hash(table, sb, bl) ;
				430	while(cn) {
				431	if (cn->blocknr == bl && cn->sb == sb)
				432	return cn ;
				433	cn = cn->hnext ;
				434	}
				435	return (struct reiserfs_journal_cnode *)0 ;
				436	}
				437
				438	/*
				439	** this actually means 'can this block be reallocated yet?'. If you set search_all, a block can only be allocated
				440	** if it is not in the current transaction, was not freed by the current transaction, and has no chance of ever
				441	** being overwritten by a replay after crashing.
				442	**
				443	** If you don't set search_all, a block can only be allocated if it is not in the current transaction. Since deleting
				444	** a block removes it from the current transaction, this case should never happen. If you don't set search_all, make
				445	** sure you never write the block without logging it.
				446	**
				447	** next_zero_bit is a suggestion about the next block to try for find_forward.
				448	** when bl is rejected because it is set in a journal list bitmap, we search
				449	** for the next zero bit in the bitmap that rejected bl. Then, we return that
				450	** through next_zero_bit for find_forward to try.
				451	**
				452	** Just because we return something in next_zero_bit does not mean we won't
				453	** reject it on the next call to reiserfs_in_journal
				454	**
				455	*/
				456	int reiserfs_in_journal(struct super_block *p_s_sb,
				457	int bmap_nr, int bit_nr, int search_all,
				458	b_blocknr_t *next_zero_bit) {
				459	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				460	struct reiserfs_journal_cnode *cn ;
				461	struct reiserfs_list_bitmap *jb ;
				462	int i ;
				463	unsigned long bl;
				464
				465	next_zero_bit = 0 ; / always start this at zero. */
				466
				467	PROC_INFO_INC( p_s_sb, journal.in_journal );
				468	/* If we aren't doing a search_all, this is a metablock, and it will be logged before use.
				469	** if we crash before the transaction that freed it commits, this transaction won't
				470	** have committed either, and the block will never be written
				471	*/
				472	if (search_all) {
				473	for (i = 0 ; i < JOURNAL_NUM_BITMAPS ; i++) {
				474	PROC_INFO_INC( p_s_sb, journal.in_journal_bitmap );
				475	jb = journal->j_list_bitmap + i ;
				476	if (jb->journal_list && jb->bitmaps[bmap_nr] &&
				477	test_bit(bit_nr, (unsigned long *)jb->bitmaps[bmap_nr]->data)) {
				478	next_zero_bit = find_next_zero_bit((unsigned long )
				479	(jb->bitmaps[bmap_nr]->data),
				480	p_s_sb->s_blocksize << 3, bit_nr+1) ;
				481	return 1 ;
				482	}
				483	}
				484	}
				485
				486	bl = bmap_nr * (p_s_sb->s_blocksize << 3) + bit_nr;
				487	/* is it in any old transactions? */
				488	if (search_all && (cn = get_journal_hash_dev(p_s_sb, journal->j_list_hash_table, bl))) {
				489	return 1;
				490	}
				491
				492	/* is it in the current transaction. This should never happen */
				493	if ((cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, bl))) {
				494	BUG();
				495	return 1;
				496	}
				497
				498	PROC_INFO_INC( p_s_sb, journal.in_journal_reusable );
				499	/* safe for reuse */
				500	return 0 ;
				501	}
				502
				503	/* insert cn into table
				504	*/
				505	static inline void insert_journal_hash(struct reiserfs_journal_cnode *table, struct reiserfs_journal_cnode cn) {
				506	struct reiserfs_journal_cnode *cn_orig ;
				507
				508	cn_orig = journal_hash(table, cn->sb, cn->blocknr) ;
				509	cn->hnext = cn_orig ;
				510	cn->hprev = NULL ;
				511	if (cn_orig) {
				512	cn_orig->hprev = cn ;
				513	}
				514	journal_hash(table, cn->sb, cn->blocknr) = cn ;
				515	}
				516
				517	/* lock the current transaction */
				518	inline static void lock_journal(struct super_block *p_s_sb) {
				519	PROC_INFO_INC( p_s_sb, journal.lock_journal );
				520	down(&SB_JOURNAL(p_s_sb)->j_lock);
				521	}
				522
				523	/* unlock the current transaction */
				524	inline static void unlock_journal(struct super_block *p_s_sb) {
				525	up(&SB_JOURNAL(p_s_sb)->j_lock);
				526	}
				527
				528	static inline void get_journal_list(struct reiserfs_journal_list *jl)
				529	{
				530	jl->j_refcount++;
				531	}
				532
				533	static inline void put_journal_list(struct super_block *s,
				534	struct reiserfs_journal_list *jl)
				535	{
				536	if (jl->j_refcount < 1) {
				537	reiserfs_panic (s, "trans id %lu, refcount at %d", jl->j_trans_id,
				538	jl->j_refcount);
				539	}
				540	if (--jl->j_refcount == 0)
				541	reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
				542	}
				543
				544	/*
				545	** this used to be much more involved, and I'm keeping it just in case things get ugly again.
				546	** it gets called by flush_commit_list, and cleans up any data stored about blocks freed during a
				547	** transaction.
				548	*/
				549	static void cleanup_freed_for_journal_list(struct super_block p_s_sb, struct reiserfs_journal_list jl) {
				550
				551	struct reiserfs_list_bitmap *jb = jl->j_list_bitmap ;
				552	if (jb) {
				553	cleanup_bitmap_list(p_s_sb, jb) ;
				554	}
				555	jl->j_list_bitmap->journal_list = NULL ;
				556	jl->j_list_bitmap = NULL ;
				557	}
				558
				559	static int journal_list_still_alive(struct super_block *s,
				560	unsigned long trans_id)
				561	{
				562	struct reiserfs_journal *journal = SB_JOURNAL (s);
				563	struct list_head *entry = &journal->j_journal_list;
				564	struct reiserfs_journal_list *jl;
				565
				566	if (!list_empty(entry)) {
				567	jl = JOURNAL_LIST_ENTRY(entry->next);
				568	if (jl->j_trans_id <= trans_id) {
				569	return 1;
				570	}
				571	}
				572	return 0;
				573	}
				574
				575	static void reiserfs_end_buffer_io_sync(struct buffer_head *bh, int uptodate) {
				576	char b[BDEVNAME_SIZE];
				577
				578	if (buffer_journaled(bh)) {
				579	reiserfs_warning(NULL, "clm-2084: pinned buffer %lu:%s sent to disk",
				580	bh->b_blocknr, bdevname(bh->b_bdev, b)) ;
				581	}
				582	if (uptodate)
				583	set_buffer_uptodate(bh) ;
				584	else
				585	clear_buffer_uptodate(bh) ;
				586	unlock_buffer(bh) ;
				587	put_bh(bh) ;
				588	}
				589
				590	static void reiserfs_end_ordered_io(struct buffer_head *bh, int uptodate) {
				591	if (uptodate)
				592	set_buffer_uptodate(bh) ;
				593	else
				594	clear_buffer_uptodate(bh) ;
				595	unlock_buffer(bh) ;
				596	put_bh(bh) ;
				597	}
				598
				599	static void submit_logged_buffer(struct buffer_head *bh) {
				600	get_bh(bh) ;
				601	bh->b_end_io = reiserfs_end_buffer_io_sync ;
				602	clear_buffer_journal_new (bh);
				603	clear_buffer_dirty(bh) ;
				604	if (!test_clear_buffer_journal_test (bh))
				605	BUG();
				606	if (!buffer_uptodate(bh))
				607	BUG();
				608	submit_bh(WRITE, bh) ;
				609	}
				610
				611	static void submit_ordered_buffer(struct buffer_head *bh) {
				612	get_bh(bh) ;
				613	bh->b_end_io = reiserfs_end_ordered_io;
				614	clear_buffer_dirty(bh) ;
				615	if (!buffer_uptodate(bh))
				616	BUG();
				617	submit_bh(WRITE, bh) ;
				618	}
				619
				620	static int submit_barrier_buffer(struct buffer_head *bh) {
				621	get_bh(bh) ;
				622	bh->b_end_io = reiserfs_end_ordered_io;
				623	clear_buffer_dirty(bh) ;
				624	if (!buffer_uptodate(bh))
				625	BUG();
				626	return submit_bh(WRITE_BARRIER, bh) ;
				627	}
				628
				629	static void check_barrier_completion(struct super_block *s,
				630	struct buffer_head *bh) {
				631	if (buffer_eopnotsupp(bh)) {
				632	clear_buffer_eopnotsupp(bh);
				633	disable_barrier(s);
				634	set_buffer_uptodate(bh);
				635	set_buffer_dirty(bh);
				636	sync_dirty_buffer(bh);
				637	}
				638	}
				639
				640	#define CHUNK_SIZE 32
				641	struct buffer_chunk {
				642	struct buffer_head *bh[CHUNK_SIZE];
				643	int nr;
				644	};
				645
				646	static void write_chunk(struct buffer_chunk *chunk) {
				647	int i;
				648	for (i = 0; i < chunk->nr ; i++) {
				649	submit_logged_buffer(chunk->bh[i]) ;
				650	}
				651	chunk->nr = 0;
				652	}
				653
				654	static void write_ordered_chunk(struct buffer_chunk *chunk) {
				655	int i;
				656	for (i = 0; i < chunk->nr ; i++) {
				657	submit_ordered_buffer(chunk->bh[i]) ;
				658	}
				659	chunk->nr = 0;
				660	}
				661
				662	static int add_to_chunk(struct buffer_chunk chunk, struct buffer_head bh,
				663	spinlock_t *lock,
				664	void (fn)(struct buffer_chunk *))
				665	{
				666	int ret = 0;
				667	if (chunk->nr >= CHUNK_SIZE)
				668	BUG();
				669	chunk->bh[chunk->nr++] = bh;
				670	if (chunk->nr >= CHUNK_SIZE) {
				671	ret = 1;
				672	if (lock)
				673	spin_unlock(lock);
				674	fn(chunk);
				675	if (lock)
				676	spin_lock(lock);
				677	}
				678	return ret;
				679	}
				680
				681
				682	static atomic_t nr_reiserfs_jh = ATOMIC_INIT(0);
				683	static struct reiserfs_jh *alloc_jh(void) {
				684	struct reiserfs_jh *jh;
				685	while(1) {
				686	jh = kmalloc(sizeof(*jh), GFP_NOFS);
				687	if (jh) {
				688	atomic_inc(&nr_reiserfs_jh);
				689	return jh;
				690	}
				691	yield();
				692	}
				693	}
				694
				695	/*
				696	* we want to free the jh when the buffer has been written
				697	* and waited on
				698	*/
				699	void reiserfs_free_jh(struct buffer_head *bh) {
				700	struct reiserfs_jh *jh;
				701
				702	jh = bh->b_private;
				703	if (jh) {
				704	bh->b_private = NULL;
				705	jh->bh = NULL;
				706	list_del_init(&jh->list);
				707	kfree(jh);
				708	if (atomic_read(&nr_reiserfs_jh) <= 0)
				709	BUG();
				710	atomic_dec(&nr_reiserfs_jh);
				711	put_bh(bh);
				712	}
				713	}
				714
				715	static inline int __add_jh(struct reiserfs_journal j, struct buffer_head bh,
				716	int tail)
				717	{
				718	struct reiserfs_jh *jh;
				719
				720	if (bh->b_private) {
				721	spin_lock(&j->j_dirty_buffers_lock);
				722	if (!bh->b_private) {
				723	spin_unlock(&j->j_dirty_buffers_lock);
				724	goto no_jh;
				725	}
				726	jh = bh->b_private;
				727	list_del_init(&jh->list);
				728	} else {
				729	no_jh:
				730	get_bh(bh);
				731	jh = alloc_jh();
				732	spin_lock(&j->j_dirty_buffers_lock);
				733	/* buffer must be locked for __add_jh, should be able to have
				734	* two adds at the same time
				735	*/
				736	if (bh->b_private)
				737	BUG();
				738	jh->bh = bh;
				739	bh->b_private = jh;
				740	}
				741	jh->jl = j->j_current_jl;
				742	if (tail)
				743	list_add_tail(&jh->list, &jh->jl->j_tail_bh_list);
				744	else {
				745	list_add_tail(&jh->list, &jh->jl->j_bh_list);
				746	}
				747	spin_unlock(&j->j_dirty_buffers_lock);
				748	return 0;
				749	}
				750
				751	int reiserfs_add_tail_list(struct inode inode, struct buffer_head bh) {
				752	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 1);
				753	}
				754	int reiserfs_add_ordered_list(struct inode inode, struct buffer_head bh) {
				755	return __add_jh(SB_JOURNAL(inode->i_sb), bh, 0);
				756	}
				757
				758	#define JH_ENTRY(l) list_entry((l), struct reiserfs_jh, list)
				759	static int write_ordered_buffers(spinlock_t *lock,
				760	struct reiserfs_journal *j,
				761	struct reiserfs_journal_list *jl,
				762	struct list_head *list)
				763	{
				764	struct buffer_head *bh;
				765	struct reiserfs_jh *jh;
				766	int ret = j->j_errno;
				767	struct buffer_chunk chunk;
				768	struct list_head tmp;
				769	INIT_LIST_HEAD(&tmp);
				770
				771	chunk.nr = 0;
				772	spin_lock(lock);
				773	while(!list_empty(list)) {
				774	jh = JH_ENTRY(list->next);
				775	bh = jh->bh;
				776	get_bh(bh);
				777	if (test_set_buffer_locked(bh)) {
				778	if (!buffer_dirty(bh)) {
				779	list_del_init(&jh->list);
				780	list_add(&jh->list, &tmp);
				781	goto loop_next;
				782	}
				783	spin_unlock(lock);
				784	if (chunk.nr)
				785	write_ordered_chunk(&chunk);
				786	wait_on_buffer(bh);
				787	cond_resched();
				788	spin_lock(lock);
				789	goto loop_next;
				790	}
				791	if (buffer_dirty(bh)) {
				792	list_del_init(&jh->list);
				793	list_add(&jh->list, &tmp);
				794	add_to_chunk(&chunk, bh, lock, write_ordered_chunk);
				795	} else {
				796	reiserfs_free_jh(bh);
				797	unlock_buffer(bh);
				798	}
				799	loop_next:
				800	put_bh(bh);
				801	cond_resched_lock(lock);
				802	}
				803	if (chunk.nr) {
				804	spin_unlock(lock);
				805	write_ordered_chunk(&chunk);
				806	spin_lock(lock);
				807	}
				808	while(!list_empty(&tmp)) {
				809	jh = JH_ENTRY(tmp.prev);
				810	bh = jh->bh;
				811	get_bh(bh);
				812	reiserfs_free_jh(bh);
				813
				814	if (buffer_locked(bh)) {
				815	spin_unlock(lock);
				816	wait_on_buffer(bh);
				817	spin_lock(lock);
				818	}
				819	if (!buffer_uptodate(bh)) {
				820	ret = -EIO;
				821	}
				822	put_bh(bh);
				823	cond_resched_lock(lock);
				824	}
				825	spin_unlock(lock);
				826	return ret;
				827	}
				828
				829	static int flush_older_commits(struct super_block s, struct reiserfs_journal_list jl) {
				830	struct reiserfs_journal *journal = SB_JOURNAL (s);
				831	struct reiserfs_journal_list *other_jl;
				832	struct reiserfs_journal_list *first_jl;
				833	struct list_head *entry;
				834	unsigned long trans_id = jl->j_trans_id;
				835	unsigned long other_trans_id;
				836	unsigned long first_trans_id;
				837
				838	find_first:
				839	/*
				840	* first we walk backwards to find the oldest uncommitted transation
				841	*/
				842	first_jl = jl;
				843	entry = jl->j_list.prev;
				844	while(1) {
				845	other_jl = JOURNAL_LIST_ENTRY(entry);
				846	if (entry == &journal->j_journal_list \|\|
				847	atomic_read(&other_jl->j_older_commits_done))
				848	break;
				849
				850	first_jl = other_jl;
				851	entry = other_jl->j_list.prev;
				852	}
				853
				854	/* if we didn't find any older uncommitted transactions, return now */
				855	if (first_jl == jl) {
				856	return 0;
				857	}
				858
				859	first_trans_id = first_jl->j_trans_id;
				860
				861	entry = &first_jl->j_list;
				862	while(1) {
				863	other_jl = JOURNAL_LIST_ENTRY(entry);
				864	other_trans_id = other_jl->j_trans_id;
				865
				866	if (other_trans_id < trans_id) {
				867	if (atomic_read(&other_jl->j_commit_left) != 0) {
				868	flush_commit_list(s, other_jl, 0);
				869
				870	/* list we were called with is gone, return */
				871	if (!journal_list_still_alive(s, trans_id))
				872	return 1;
				873
				874	/* the one we just flushed is gone, this means all
				875	* older lists are also gone, so first_jl is no longer
				876	* valid either. Go back to the beginning.
				877	*/
				878	if (!journal_list_still_alive(s, other_trans_id)) {
				879	goto find_first;
				880	}
				881	}
				882	entry = entry->next;
				883	if (entry == &journal->j_journal_list)
				884	return 0;
				885	} else {
				886	return 0;
				887	}
				888	}
				889	return 0;
				890	}
				891	int reiserfs_async_progress_wait(struct super_block *s) {
				892	DEFINE_WAIT(wait);
				893	struct reiserfs_journal *j = SB_JOURNAL(s);
				894	if (atomic_read(&j->j_async_throttle))
				895	blk_congestion_wait(WRITE, HZ/10);
				896	return 0;
				897	}
				898
				899	/*
				900	** if this journal list still has commit blocks unflushed, send them to disk.
				901	**
				902	** log areas must be flushed in order (transaction 2 can't commit before transaction 1)
				903	** Before the commit block can by written, every other log block must be safely on disk
				904	**
				905	*/
				906	static int flush_commit_list(struct super_block s, struct reiserfs_journal_list jl, int flushall) {
				907	int i;
				908	int bn ;
				909	struct buffer_head *tbh = NULL ;
				910	unsigned long trans_id = jl->j_trans_id;
				911	struct reiserfs_journal *journal = SB_JOURNAL (s);
				912	int barrier = 0;
				913	int retval = 0;
				914
				915	reiserfs_check_lock_depth(s, "flush_commit_list") ;
				916
				917	if (atomic_read(&jl->j_older_commits_done)) {
				918	return 0 ;
				919	}
				920
				921	/* before we can put our commit blocks on disk, we have to make sure everyone older than
				922	** us is on disk too
				923	*/
				924	BUG_ON (jl->j_len <= 0);
				925	BUG_ON (trans_id == journal->j_trans_id);
				926
				927	get_journal_list(jl);
				928	if (flushall) {
				929	if (flush_older_commits(s, jl) == 1) {
				930	/* list disappeared during flush_older_commits. return */
				931	goto put_jl;
				932	}
				933	}
				934
				935	/* make sure nobody is trying to flush this one at the same time */
				936	down(&jl->j_commit_lock);
				937	if (!journal_list_still_alive(s, trans_id)) {
				938	up(&jl->j_commit_lock);
				939	goto put_jl;
				940	}
				941	BUG_ON (jl->j_trans_id == 0);
				942
				943	/* this commit is done, exit */
				944	if (atomic_read(&(jl->j_commit_left)) <= 0) {
				945	if (flushall) {
				946	atomic_set(&(jl->j_older_commits_done), 1) ;
				947	}
				948	up(&jl->j_commit_lock);
				949	goto put_jl;
				950	}
				951
				952	if (!list_empty(&jl->j_bh_list)) {
				953	unlock_kernel();
				954	write_ordered_buffers(&journal->j_dirty_buffers_lock,
				955	journal, jl, &jl->j_bh_list);
				956	lock_kernel();
				957	}
				958	BUG_ON (!list_empty(&jl->j_bh_list));
				959	/*
				960	* for the description block and all the log blocks, submit any buffers
				961	* that haven't already reached the disk
				962	*/
				963	atomic_inc(&journal->j_async_throttle);
				964	for (i = 0 ; i < (jl->j_len + 1) ; i++) {
				965	bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start+i) %
				966	SB_ONDISK_JOURNAL_SIZE(s);
				967	tbh = journal_find_get_block(s, bn) ;
				968	if (buffer_dirty(tbh)) /* redundant, ll_rw_block() checks */
				969	ll_rw_block(WRITE, 1, &tbh) ;
				970	put_bh(tbh) ;
				971	}
				972	atomic_dec(&journal->j_async_throttle);
				973
				974	/* wait on everything written so far before writing the commit
				975	* if we are in barrier mode, send the commit down now
				976	*/
				977	barrier = reiserfs_barrier_flush(s);
				978	if (barrier) {
				979	int ret;
				980	lock_buffer(jl->j_commit_bh);
				981	ret = submit_barrier_buffer(jl->j_commit_bh);
				982	if (ret == -EOPNOTSUPP) {
				983	set_buffer_uptodate(jl->j_commit_bh);
				984	disable_barrier(s);
				985	barrier = 0;
				986	}
				987	}
				988	for (i = 0 ; i < (jl->j_len + 1) ; i++) {
				989	bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) +
				990	(jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s) ;
				991	tbh = journal_find_get_block(s, bn) ;
				992	wait_on_buffer(tbh) ;
				993	// since we're using ll_rw_blk above, it might have skipped over
				994	// a locked buffer. Double check here
				995	//
				996	if (buffer_dirty(tbh)) /* redundant, sync_dirty_buffer() checks */
				997	sync_dirty_buffer(tbh);
				998	if (unlikely (!buffer_uptodate(tbh))) {
				999	#ifdef CONFIG_REISERFS_CHECK
				1000	reiserfs_warning(s, "journal-601, buffer write failed") ;
				1001	#endif
				1002	retval = -EIO;
				1003	}
				1004	put_bh(tbh) ; /* once for journal_find_get_block */
				1005	put_bh(tbh) ; /* once due to original getblk in do_journal_end */
				1006	atomic_dec(&(jl->j_commit_left)) ;
				1007	}
				1008
				1009	BUG_ON (atomic_read(&(jl->j_commit_left)) != 1);
				1010
				1011	if (!barrier) {
				1012	if (buffer_dirty(jl->j_commit_bh))
				1013	BUG();
				1014	mark_buffer_dirty(jl->j_commit_bh) ;
				1015	sync_dirty_buffer(jl->j_commit_bh) ;
				1016	} else
				1017	wait_on_buffer(jl->j_commit_bh);
				1018
				1019	check_barrier_completion(s, jl->j_commit_bh);
				1020
				1021	/* If there was a write error in the journal - we can't commit this
				1022	* transaction - it will be invalid and, if successful, will just end
				1023	* up propogating the write error out to the filesystem. */
				1024	if (unlikely (!buffer_uptodate(jl->j_commit_bh))) {
				1025	#ifdef CONFIG_REISERFS_CHECK
				1026	reiserfs_warning(s, "journal-615: buffer write failed") ;
				1027	#endif
				1028	retval = -EIO;
				1029	}
				1030	bforget(jl->j_commit_bh) ;
				1031	if (journal->j_last_commit_id != 0 &&
				1032	(jl->j_trans_id - journal->j_last_commit_id) != 1) {
				1033	reiserfs_warning(s, "clm-2200: last commit %lu, current %lu",
				1034	journal->j_last_commit_id,
				1035	jl->j_trans_id);
				1036	}
				1037	journal->j_last_commit_id = jl->j_trans_id;
				1038
				1039	/* now, every commit block is on the disk. It is safe to allow blocks freed during this transaction to be reallocated */
				1040	cleanup_freed_for_journal_list(s, jl) ;
				1041
				1042	retval = retval ? retval : journal->j_errno;
				1043
				1044	/* mark the metadata dirty */
				1045	if (!retval)
				1046	dirty_one_transaction(s, jl);
				1047	atomic_dec(&(jl->j_commit_left)) ;
				1048
				1049	if (flushall) {
				1050	atomic_set(&(jl->j_older_commits_done), 1) ;
				1051	}
				1052	up(&jl->j_commit_lock);
				1053	put_jl:
				1054	put_journal_list(s, jl);
				1055
				1056	if (retval)
				1057	reiserfs_abort (s, retval, "Journal write error in %s", __FUNCTION__);
				1058	return retval;
				1059	}
				1060
				1061	/*
				1062	** flush_journal_list frequently needs to find a newer transaction for a given block. This does that, or
				1063	** returns NULL if it can't find anything
				1064	*/
				1065	static struct reiserfs_journal_list find_newer_jl_for_cn(struct reiserfs_journal_cnode cn) {
				1066	struct super_block *sb = cn->sb;
				1067	b_blocknr_t blocknr = cn->blocknr ;
				1068
				1069	cn = cn->hprev ;
				1070	while(cn) {
				1071	if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist) {
				1072	return cn->jlist ;
				1073	}
				1074	cn = cn->hprev ;
				1075	}
				1076	return NULL ;
				1077	}
				1078
				1079	static void remove_journal_hash(struct super_block , struct reiserfs_journal_cnode *,
				1080	struct reiserfs_journal_list *, unsigned long, int);
				1081
				1082	/*
				1083	** once all the real blocks have been flushed, it is safe to remove them from the
				1084	** journal list for this transaction. Aside from freeing the cnode, this also allows the
				1085	** block to be reallocated for data blocks if it had been deleted.
				1086	*/
				1087	static void remove_all_from_journal_list(struct super_block p_s_sb, struct reiserfs_journal_list jl, int debug) {
				1088	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				1089	struct reiserfs_journal_cnode cn, last ;
				1090	cn = jl->j_realblock ;
				1091
				1092	/* which is better, to lock once around the whole loop, or
				1093	** to lock for each call to remove_journal_hash?
				1094	*/
				1095	while(cn) {
				1096	if (cn->blocknr != 0) {
				1097	if (debug) {
				1098	reiserfs_warning (p_s_sb, "block %u, bh is %d, state %ld", cn->blocknr,
				1099	cn->bh ? 1: 0, cn->state) ;
				1100	}
				1101	cn->state = 0 ;
				1102	remove_journal_hash(p_s_sb, journal->j_list_hash_table, jl, cn->blocknr, 1) ;
				1103	}
				1104	last = cn ;
				1105	cn = cn->next ;
				1106	free_cnode(p_s_sb, last) ;
				1107	}
				1108	jl->j_realblock = NULL ;
				1109	}
				1110
				1111	/*
				1112	** if this timestamp is greater than the timestamp we wrote last to the header block, write it to the header block.
				1113	** once this is done, I can safely say the log area for this transaction won't ever be replayed, and I can start
				1114	** releasing blocks in this transaction for reuse as data blocks.
				1115	** called by flush_journal_list, before it calls remove_all_from_journal_list
				1116	**
				1117	*/
				1118	static int _update_journal_header_block(struct super_block *p_s_sb, unsigned long offset, unsigned long trans_id) {
				1119	struct reiserfs_journal_header *jh ;
				1120	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				1121
				1122	if (reiserfs_is_journal_aborted (journal))
				1123	return -EIO;
				1124
				1125	if (trans_id >= journal->j_last_flush_trans_id) {
				1126	if (buffer_locked((journal->j_header_bh))) {
				1127	wait_on_buffer((journal->j_header_bh)) ;
				1128	if (unlikely (!buffer_uptodate(journal->j_header_bh))) {
				1129	#ifdef CONFIG_REISERFS_CHECK
				1130	reiserfs_warning (p_s_sb, "journal-699: buffer write failed") ;
				1131	#endif
				1132	return -EIO;
				1133	}
				1134	}
				1135	journal->j_last_flush_trans_id = trans_id ;
				1136	journal->j_first_unflushed_offset = offset ;
				1137	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data) ;
				1138	jh->j_last_flush_trans_id = cpu_to_le32(trans_id) ;
				1139	jh->j_first_unflushed_offset = cpu_to_le32(offset) ;
				1140	jh->j_mount_id = cpu_to_le32(journal->j_mount_id) ;
				1141
				1142	if (reiserfs_barrier_flush(p_s_sb)) {
				1143	int ret;
				1144	lock_buffer(journal->j_header_bh);
				1145	ret = submit_barrier_buffer(journal->j_header_bh);
				1146	if (ret == -EOPNOTSUPP) {
				1147	set_buffer_uptodate(journal->j_header_bh);
				1148	disable_barrier(p_s_sb);
				1149	goto sync;
				1150	}
				1151	wait_on_buffer(journal->j_header_bh);
				1152	check_barrier_completion(p_s_sb, journal->j_header_bh);
				1153	} else {
				1154	sync:
				1155	set_buffer_dirty(journal->j_header_bh) ;
				1156	sync_dirty_buffer(journal->j_header_bh) ;
				1157	}
				1158	if (!buffer_uptodate(journal->j_header_bh)) {
				1159	reiserfs_warning (p_s_sb, "journal-837: IO error during journal replay");
				1160	return -EIO ;
				1161	}
				1162	}
				1163	return 0 ;
				1164	}
				1165
				1166	static int update_journal_header_block(struct super_block *p_s_sb,
				1167	unsigned long offset,
				1168	unsigned long trans_id) {
				1169	return _update_journal_header_block(p_s_sb, offset, trans_id);
				1170	}
				1171	/*
				1172	** flush any and all journal lists older than you are
				1173	** can only be called from flush_journal_list
				1174	*/
				1175	static int flush_older_journal_lists(struct super_block *p_s_sb,
				1176	struct reiserfs_journal_list *jl)
				1177	{
				1178	struct list_head *entry;
				1179	struct reiserfs_journal_list *other_jl ;
				1180	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				1181	unsigned long trans_id = jl->j_trans_id;
				1182
				1183	/* we know we are the only ones flushing things, no extra race
				1184	* protection is required.
				1185	*/
				1186	restart:
				1187	entry = journal->j_journal_list.next;
				1188	/* Did we wrap? */
				1189	if (entry == &journal->j_journal_list)
				1190	return 0;
				1191	other_jl = JOURNAL_LIST_ENTRY(entry);
				1192	if (other_jl->j_trans_id < trans_id) {
				1193	BUG_ON (other_jl->j_refcount <= 0);
				1194	/* do not flush all */
				1195	flush_journal_list(p_s_sb, other_jl, 0) ;
				1196
				1197	/* other_jl is now deleted from the list */
				1198	goto restart;
				1199	}
				1200	return 0 ;
				1201	}
				1202
				1203	static void del_from_work_list(struct super_block *s,
				1204	struct reiserfs_journal_list *jl) {
				1205	struct reiserfs_journal *journal = SB_JOURNAL (s);
				1206	if (!list_empty(&jl->j_working_list)) {
				1207	list_del_init(&jl->j_working_list);
				1208	journal->j_num_work_lists--;
				1209	}
				1210	}
				1211
				1212	/* flush a journal list, both commit and real blocks
				1213	**
				1214	** always set flushall to 1, unless you are calling from inside
				1215	** flush_journal_list
				1216	**
				1217	** IMPORTANT. This can only be called while there are no journal writers,
				1218	** and the journal is locked. That means it can only be called from
				1219	** do_journal_end, or by journal_release
				1220	*/
				1221	static int flush_journal_list(struct super_block *s,
				1222	struct reiserfs_journal_list *jl, int flushall) {
				1223	struct reiserfs_journal_list *pjl ;
				1224	struct reiserfs_journal_cnode cn, last ;
				1225	int count ;
				1226	int was_jwait = 0 ;
				1227	int was_dirty = 0 ;
				1228	struct buffer_head *saved_bh ;
				1229	unsigned long j_len_saved = jl->j_len ;
				1230	struct reiserfs_journal *journal = SB_JOURNAL (s);
				1231	int err = 0;
				1232
				1233	BUG_ON (j_len_saved <= 0);
				1234
				1235	if (atomic_read(&journal->j_wcount) != 0) {
				1236	reiserfs_warning(s, "clm-2048: flush_journal_list called with wcount %d",
				1237	atomic_read(&journal->j_wcount)) ;
				1238	}
				1239	BUG_ON (jl->j_trans_id == 0);
				1240
				1241	/* if flushall == 0, the lock is already held */
				1242	if (flushall) {
				1243	down(&journal->j_flush_sem);
				1244	} else if (!down_trylock(&journal->j_flush_sem)) {
				1245	BUG();
				1246	}
				1247
				1248	count = 0 ;
				1249	if (j_len_saved > journal->j_trans_max) {
				1250	reiserfs_panic(s, "journal-715: flush_journal_list, length is %lu, trans id %lu\n", j_len_saved, jl->j_trans_id);
				1251	return 0 ;
				1252	}
				1253
				1254	/* if all the work is already done, get out of here */
				1255	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
				1256	atomic_read(&(jl->j_commit_left)) <= 0) {
				1257	goto flush_older_and_return ;
				1258	}
				1259
				1260	/* start by putting the commit list on disk. This will also flush
				1261	** the commit lists of any olders transactions
				1262	*/
				1263	flush_commit_list(s, jl, 1) ;
				1264
				1265	if (!(jl->j_state & LIST_DIRTY) && !reiserfs_is_journal_aborted (journal))
				1266	BUG();
				1267
				1268	/* are we done now? */
				1269	if (atomic_read(&(jl->j_nonzerolen)) <= 0 &&
				1270	atomic_read(&(jl->j_commit_left)) <= 0) {
				1271	goto flush_older_and_return ;
				1272	}
				1273
				1274	/* loop through each cnode, see if we need to write it,
				1275	** or wait on a more recent transaction, or just ignore it
				1276	*/
				1277	if (atomic_read(&(journal->j_wcount)) != 0) {
				1278	reiserfs_panic(s, "journal-844: panic journal list is flushing, wcount is not 0\n") ;
				1279	}
				1280	cn = jl->j_realblock ;
				1281	while(cn) {
				1282	was_jwait = 0 ;
				1283	was_dirty = 0 ;
				1284	saved_bh = NULL ;
				1285	/* blocknr of 0 is no longer in the hash, ignore it */
				1286	if (cn->blocknr == 0) {
				1287	goto free_cnode ;
				1288	}
				1289
				1290	/* This transaction failed commit. Don't write out to the disk */
				1291	if (!(jl->j_state & LIST_DIRTY))
				1292	goto free_cnode;
				1293
				1294	pjl = find_newer_jl_for_cn(cn) ;
				1295	/* the order is important here. We check pjl to make sure we
				1296	** don't clear BH_JDirty_wait if we aren't the one writing this
				1297	** block to disk
				1298	*/
				1299	if (!pjl && cn->bh) {
				1300	saved_bh = cn->bh ;
				1301
				1302	/* we do this to make sure nobody releases the buffer while
				1303	** we are working with it
				1304	*/
				1305	get_bh(saved_bh) ;
				1306
				1307	if (buffer_journal_dirty(saved_bh)) {
				1308	BUG_ON (!can_dirty (cn));
				1309	was_jwait = 1 ;
				1310	was_dirty = 1 ;
				1311	} else if (can_dirty(cn)) {
				1312	/* everything with !pjl && jwait should be writable */
				1313	BUG();
				1314	}
				1315	}
				1316
				1317	/* if someone has this block in a newer transaction, just make
				1318	** sure they are commited, and don't try writing it to disk
				1319	*/
				1320	if (pjl) {
				1321	if (atomic_read(&pjl->j_commit_left))
				1322	flush_commit_list(s, pjl, 1) ;
				1323	goto free_cnode ;
				1324	}
				1325
				1326	/* bh == NULL when the block got to disk on its own, OR,
				1327	** the block got freed in a future transaction
				1328	*/
				1329	if (saved_bh == NULL) {
				1330	goto free_cnode ;
				1331	}
				1332
				1333	/* this should never happen. kupdate_one_transaction has this list
				1334	** locked while it works, so we should never see a buffer here that
				1335	** is not marked JDirty_wait
				1336	*/
				1337	if ((!was_jwait) && !buffer_locked(saved_bh)) {
				1338	reiserfs_warning (s, "journal-813: BAD! buffer %llu %cdirty %cjwait, "
				1339	"not in a newer tranasction",
				1340	(unsigned long long)saved_bh->b_blocknr,
				1341	was_dirty ? ' ' : '!', was_jwait ? ' ' : '!') ;
				1342	}
				1343	if (was_dirty) {
				1344	/* we inc again because saved_bh gets decremented at free_cnode */
				1345	get_bh(saved_bh) ;
				1346	set_bit(BLOCK_NEEDS_FLUSH, &cn->state) ;
				1347	lock_buffer(saved_bh);
				1348	BUG_ON (cn->blocknr != saved_bh->b_blocknr);
				1349	if (buffer_dirty(saved_bh))
				1350	submit_logged_buffer(saved_bh) ;
				1351	else
				1352	unlock_buffer(saved_bh);
				1353	count++ ;
				1354	} else {
				1355	reiserfs_warning (s, "clm-2082: Unable to flush buffer %llu in %s",
				1356	(unsigned long long)saved_bh->b_blocknr, __FUNCTION__);
				1357	}
				1358	free_cnode:
				1359	last = cn ;
				1360	cn = cn->next ;
				1361	if (saved_bh) {
				1362	/* we incremented this to keep others from taking the buffer head away */
				1363	put_bh(saved_bh) ;
				1364	if (atomic_read(&(saved_bh->b_count)) < 0) {
				1365	reiserfs_warning (s, "journal-945: saved_bh->b_count < 0");
				1366	}
				1367	}
				1368	}
				1369	if (count > 0) {
				1370	cn = jl->j_realblock ;
				1371	while(cn) {
				1372	if (test_bit(BLOCK_NEEDS_FLUSH, &cn->state)) {
				1373	if (!cn->bh) {
				1374	reiserfs_panic(s, "journal-1011: cn->bh is NULL\n") ;
				1375	}
				1376	wait_on_buffer(cn->bh) ;
				1377	if (!cn->bh) {
				1378	reiserfs_panic(s, "journal-1012: cn->bh is NULL\n") ;
				1379	}
				1380	if (unlikely (!buffer_uptodate(cn->bh))) {
				1381	#ifdef CONFIG_REISERFS_CHECK
				1382	reiserfs_warning(s, "journal-949: buffer write failed\n") ;
				1383	#endif
				1384	err = -EIO;
				1385	}
				1386	/* note, we must clear the JDirty_wait bit after the up to date
				1387	** check, otherwise we race against our flushpage routine
				1388	*/
				1389	BUG_ON (!test_clear_buffer_journal_dirty (cn->bh));
				1390
				1391	/* undo the inc from journal_mark_dirty */
				1392	put_bh(cn->bh) ;
				1393	brelse(cn->bh) ;
				1394	}
				1395	cn = cn->next ;
				1396	}
				1397	}
				1398
				1399	if (err)
				1400	reiserfs_abort (s, -EIO, "Write error while pushing transaction to disk in %s", __FUNCTION__);
				1401	flush_older_and_return:
				1402
				1403
				1404	/* before we can update the journal header block, we _must_ flush all
				1405	** real blocks from all older transactions to disk. This is because
				1406	** once the header block is updated, this transaction will not be
				1407	** replayed after a crash
				1408	*/
				1409	if (flushall) {
				1410	flush_older_journal_lists(s, jl);
				1411	}
				1412
				1413	err = journal->j_errno;
				1414	/* before we can remove everything from the hash tables for this
				1415	** transaction, we must make sure it can never be replayed
				1416	**
				1417	** since we are only called from do_journal_end, we know for sure there
				1418	** are no allocations going on while we are flushing journal lists. So,
				1419	** we only need to update the journal header block for the last list
				1420	** being flushed
				1421	*/
				1422	if (!err && flushall) {
				1423	err = update_journal_header_block(s, (jl->j_start + jl->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(s), jl->j_trans_id) ;
				1424	if (err)
				1425	reiserfs_abort (s, -EIO, "Write error while updating journal header in %s", __FUNCTION__);
				1426	}
				1427	remove_all_from_journal_list(s, jl, 0) ;
				1428	list_del_init(&jl->j_list);
				1429	journal->j_num_lists--;
				1430	del_from_work_list(s, jl);
				1431
				1432	if (journal->j_last_flush_id != 0 &&
				1433	(jl->j_trans_id - journal->j_last_flush_id) != 1) {
				1434	reiserfs_warning(s, "clm-2201: last flush %lu, current %lu",
				1435	journal->j_last_flush_id,
				1436	jl->j_trans_id);
				1437	}
				1438	journal->j_last_flush_id = jl->j_trans_id;
				1439
				1440	/* not strictly required since we are freeing the list, but it should
				1441	* help find code using dead lists later on
				1442	*/
				1443	jl->j_len = 0 ;
				1444	atomic_set(&(jl->j_nonzerolen), 0) ;
				1445	jl->j_start = 0 ;
				1446	jl->j_realblock = NULL ;
				1447	jl->j_commit_bh = NULL ;
				1448	jl->j_trans_id = 0 ;
				1449	jl->j_state = 0;
				1450	put_journal_list(s, jl);
				1451	if (flushall)
				1452	up(&journal->j_flush_sem);
				1453	return err ;
				1454	}
				1455
				1456	static int write_one_transaction(struct super_block *s,
				1457	struct reiserfs_journal_list *jl,
				1458	struct buffer_chunk *chunk)
				1459	{
				1460	struct reiserfs_journal_cnode *cn;
				1461	int ret = 0 ;
				1462
				1463	jl->j_state \|= LIST_TOUCHED;
				1464	del_from_work_list(s, jl);
				1465	if (jl->j_len == 0 \|\| atomic_read(&jl->j_nonzerolen) == 0) {
				1466	return 0;
				1467	}
				1468
				1469	cn = jl->j_realblock ;
				1470	while(cn) {
				1471	/* if the blocknr == 0, this has been cleared from the hash,
				1472	** skip it
				1473	*/
				1474	if (cn->blocknr == 0) {
				1475	goto next ;
				1476	}
				1477	if (cn->bh && can_dirty(cn) && buffer_dirty(cn->bh)) {
				1478	struct buffer_head *tmp_bh;
				1479	/* we can race against journal_mark_freed when we try
				1480	* to lock_buffer(cn->bh), so we have to inc the buffer
				1481	* count, and recheck things after locking
				1482	*/
				1483	tmp_bh = cn->bh;
				1484	get_bh(tmp_bh);
				1485	lock_buffer(tmp_bh);
				1486	if (cn->bh && can_dirty(cn) && buffer_dirty(tmp_bh)) {
				1487	if (!buffer_journal_dirty(tmp_bh) \|\|
				1488	buffer_journal_prepared(tmp_bh))
				1489	BUG();
				1490	add_to_chunk(chunk, tmp_bh, NULL, write_chunk);
				1491	ret++;
				1492	} else {
				1493	/* note, cn->bh might be null now */
				1494	unlock_buffer(tmp_bh);
				1495	}
				1496	put_bh(tmp_bh);
				1497	}
				1498	next:
				1499	cn = cn->next ;
				1500	cond_resched();
				1501	}
				1502	return ret ;
				1503	}
				1504
				1505	/* used by flush_commit_list */
				1506	static int dirty_one_transaction(struct super_block *s,
				1507	struct reiserfs_journal_list *jl)
				1508	{
				1509	struct reiserfs_journal_cnode *cn;
				1510	struct reiserfs_journal_list *pjl;
				1511	int ret = 0 ;
				1512
				1513	jl->j_state \|= LIST_DIRTY;
				1514	cn = jl->j_realblock ;
				1515	while(cn) {
				1516	/* look for a more recent transaction that logged this
				1517	** buffer. Only the most recent transaction with a buffer in
				1518	** it is allowed to send that buffer to disk
				1519	*/
				1520	pjl = find_newer_jl_for_cn(cn) ;
				1521	if (!pjl && cn->blocknr && cn->bh && buffer_journal_dirty(cn->bh))
				1522	{
				1523	BUG_ON (!can_dirty(cn));
				1524	/* if the buffer is prepared, it will either be logged
				1525	* or restored. If restored, we need to make sure
				1526	* it actually gets marked dirty
				1527	*/
				1528	clear_buffer_journal_new (cn->bh);
				1529	if (buffer_journal_prepared (cn->bh)) {
				1530	set_buffer_journal_restore_dirty (cn->bh);
				1531	} else {
				1532	set_buffer_journal_test (cn->bh);
				1533	mark_buffer_dirty(cn->bh);
				1534	}
				1535	}
				1536	cn = cn->next ;
				1537	}
				1538	return ret ;
				1539	}
				1540
				1541	static int kupdate_transactions(struct super_block *s,
				1542	struct reiserfs_journal_list *jl,
				1543	struct reiserfs_journal_list **next_jl,
				1544	unsigned long *next_trans_id,
				1545	int num_blocks,
				1546	int num_trans) {
				1547	int ret = 0;
				1548	int written = 0 ;
				1549	int transactions_flushed = 0;
				1550	unsigned long orig_trans_id = jl->j_trans_id;
				1551	struct buffer_chunk chunk;
				1552	struct list_head *entry;
				1553	struct reiserfs_journal *journal = SB_JOURNAL (s);
				1554	chunk.nr = 0;
				1555
				1556	down(&journal->j_flush_sem);
				1557	if (!journal_list_still_alive(s, orig_trans_id)) {
				1558	goto done;
				1559	}
				1560
				1561	/* we've got j_flush_sem held, nobody is going to delete any
				1562	* of these lists out from underneath us
				1563	*/
				1564	while((num_trans && transactions_flushed < num_trans) \|\|
				1565	(!num_trans && written < num_blocks)) {
				1566
				1567	if (jl->j_len == 0 \|\| (jl->j_state & LIST_TOUCHED) \|\|
				1568	atomic_read(&jl->j_commit_left) \|\| !(jl->j_state & LIST_DIRTY))
				1569	{
				1570	del_from_work_list(s, jl);
				1571	break;
				1572	}
				1573	ret = write_one_transaction(s, jl, &chunk);
				1574
				1575	if (ret < 0)
				1576	goto done;
				1577	transactions_flushed++;
				1578	written += ret;
				1579	entry = jl->j_list.next;
				1580
				1581	/* did we wrap? */
				1582	if (entry == &journal->j_journal_list) {
				1583	break;
				1584	}
				1585	jl = JOURNAL_LIST_ENTRY(entry);
				1586
				1587	/* don't bother with older transactions */
				1588	if (jl->j_trans_id <= orig_trans_id)
				1589	break;
				1590	}
				1591	if (chunk.nr) {
				1592	write_chunk(&chunk);
				1593	}
				1594
				1595	done:
				1596	up(&journal->j_flush_sem);
				1597	return ret;
				1598	}
				1599
				1600	/* for o_sync and fsync heavy applications, they tend to use
				1601	** all the journa list slots with tiny transactions. These
				1602	** trigger lots and lots of calls to update the header block, which
				1603	** adds seeks and slows things down.
				1604	**
				1605	** This function tries to clear out a large chunk of the journal lists
				1606	** at once, which makes everything faster since only the newest journal
				1607	** list updates the header block
				1608	*/
				1609	static int flush_used_journal_lists(struct super_block *s,
				1610	struct reiserfs_journal_list *jl) {
				1611	unsigned long len = 0;
				1612	unsigned long cur_len;
				1613	int ret;
				1614	int i;
				1615	int limit = 256;
				1616	struct reiserfs_journal_list *tjl;
				1617	struct reiserfs_journal_list *flush_jl;
				1618	unsigned long trans_id;
				1619	struct reiserfs_journal *journal = SB_JOURNAL (s);
				1620
				1621	flush_jl = tjl = jl;
				1622
				1623	/* in data logging mode, try harder to flush a lot of blocks */
				1624	if (reiserfs_data_log(s))
				1625	limit = 1024;
				1626	/* flush for 256 transactions or limit blocks, whichever comes first */
				1627	for(i = 0 ; i < 256 && len < limit ; i++) {
				1628	if (atomic_read(&tjl->j_commit_left) \|\|
				1629	tjl->j_trans_id < jl->j_trans_id) {
				1630	break;
				1631	}
				1632	cur_len = atomic_read(&tjl->j_nonzerolen);
				1633	if (cur_len > 0) {
				1634	tjl->j_state &= ~LIST_TOUCHED;
				1635	}
				1636	len += cur_len;
				1637	flush_jl = tjl;
				1638	if (tjl->j_list.next == &journal->j_journal_list)
				1639	break;
				1640	tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next);
				1641	}
				1642	/* try to find a group of blocks we can flush across all the
				1643	** transactions, but only bother if we've actually spanned
				1644	** across multiple lists
				1645	*/
				1646	if (flush_jl != jl) {
				1647	ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i);
				1648	}
				1649	flush_journal_list(s, flush_jl, 1);
				1650	return 0;
				1651	}
				1652
				1653	/*
				1654	** removes any nodes in table with name block and dev as bh.
				1655	** only touchs the hnext and hprev pointers.
				1656	*/
				1657	void remove_journal_hash(struct super_block *sb,
				1658	struct reiserfs_journal_cnode **table,
				1659	struct reiserfs_journal_list *jl,
				1660	unsigned long block, int remove_freed)
				1661	{
				1662	struct reiserfs_journal_cnode *cur ;
				1663	struct reiserfs_journal_cnode **head ;
				1664
				1665	head= &(journal_hash(table, sb, block)) ;
				1666	if (!head) {
				1667	return ;
				1668	}
				1669	cur = *head ;
				1670	while(cur) {
				1671	if (cur->blocknr == block && cur->sb == sb && (jl == NULL \|\| jl == cur->jlist) &&
				1672	(!test_bit(BLOCK_FREED, &cur->state) \|\| remove_freed)) {
				1673	if (cur->hnext) {
				1674	cur->hnext->hprev = cur->hprev ;
				1675	}
				1676	if (cur->hprev) {
				1677	cur->hprev->hnext = cur->hnext ;
				1678	} else {
				1679	*head = cur->hnext ;
				1680	}
				1681	cur->blocknr = 0 ;
				1682	cur->sb = NULL ;
				1683	cur->state = 0 ;
				1684	if (cur->bh && cur->jlist) /* anybody who clears the cur->bh will also dec the nonzerolen */
				1685	atomic_dec(&(cur->jlist->j_nonzerolen)) ;
				1686	cur->bh = NULL ;
				1687	cur->jlist = NULL ;
				1688	}
				1689	cur = cur->hnext ;
				1690	}
				1691	}
				1692
				1693	static void free_journal_ram(struct super_block *p_s_sb) {
				1694	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
				1695	reiserfs_kfree(journal->j_current_jl,
				1696	sizeof(struct reiserfs_journal_list), p_s_sb);
				1697	journal->j_num_lists--;
				1698
				1699	vfree(journal->j_cnode_free_orig) ;
				1700	free_list_bitmaps(p_s_sb, journal->j_list_bitmap) ;
				1701	free_bitmap_nodes(p_s_sb) ; /* must be after free_list_bitmaps */
				1702	if (journal->j_header_bh) {
				1703	brelse(journal->j_header_bh) ;
				1704	}
				1705	/* j_header_bh is on the journal dev, make sure not to release the journal
				1706	* dev until we brelse j_header_bh
				1707	*/
				1708	release_journal_dev(p_s_sb, journal);
				1709	vfree(journal) ;
				1710	}
				1711
				1712	/*
				1713	** call on unmount. Only set error to 1 if you haven't made your way out
				1714	** of read_super() yet. Any other caller must keep error at 0.
				1715	*/
				1716	static int do_journal_release(struct reiserfs_transaction_handle th, struct super_block p_s_sb, int error) {
				1717	struct reiserfs_transaction_handle myth ;
				1718	int flushed = 0;
				1719	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
				1720
				1721	/* we only want to flush out transactions if we were called with error == 0
				1722	*/
				1723	if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
				1724	/* end the current trans */
				1725	BUG_ON (!th->t_trans_id);
				1726	do_journal_end(th, p_s_sb,10, FLUSH_ALL) ;
				1727
				1728	/* make sure something gets logged to force our way into the flush code */
				1729	if (!journal_join(&myth, p_s_sb, 1)) {
				1730	reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
				1731	journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
				1732	do_journal_end(&myth, p_s_sb,1, FLUSH_ALL) ;
				1733	flushed = 1;
				1734	}
				1735	}
				1736
				1737	/* this also catches errors during the do_journal_end above */
				1738	if (!error && reiserfs_is_journal_aborted(journal)) {
				1739	memset(&myth, 0, sizeof(myth));
				1740	if (!journal_join_abort(&myth, p_s_sb, 1)) {
				1741	reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
				1742	journal_mark_dirty(&myth, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
				1743	do_journal_end(&myth, p_s_sb, 1, FLUSH_ALL) ;
				1744	}
				1745	}
				1746
				1747	reiserfs_mounted_fs_count-- ;
				1748	/* wait for all commits to finish */
				1749	cancel_delayed_work(&SB_JOURNAL(p_s_sb)->j_work);
				1750	flush_workqueue(commit_wq);
				1751	if (!reiserfs_mounted_fs_count) {
				1752	destroy_workqueue(commit_wq);
				1753	commit_wq = NULL;
				1754	}
				1755
				1756	free_journal_ram(p_s_sb) ;
				1757
				1758	return 0 ;
				1759	}
				1760
				1761	/*
				1762	** call on unmount. flush all journal trans, release all alloc'd ram
				1763	*/
				1764	int journal_release(struct reiserfs_transaction_handle th, struct super_block p_s_sb) {
				1765	return do_journal_release(th, p_s_sb, 0) ;
				1766	}
				1767	/*
				1768	** only call from an error condition inside reiserfs_read_super!
				1769	*/
				1770	int journal_release_error(struct reiserfs_transaction_handle th, struct super_block p_s_sb) {
				1771	return do_journal_release(th, p_s_sb, 1) ;
				1772	}
				1773
				1774	/* compares description block with commit block. returns 1 if they differ, 0 if they are the same */
				1775	static int journal_compare_desc_commit(struct super_block p_s_sb, struct reiserfs_journal_desc desc,
				1776	struct reiserfs_journal_commit *commit) {
				1777	if (get_commit_trans_id (commit) != get_desc_trans_id (desc) \|\|
				1778	get_commit_trans_len (commit) != get_desc_trans_len (desc) \|\|
				1779	get_commit_trans_len (commit) > SB_JOURNAL(p_s_sb)->j_trans_max \|\|
				1780	get_commit_trans_len (commit) <= 0
				1781	) {
				1782	return 1 ;
				1783	}
				1784	return 0 ;
				1785	}
				1786	/* returns 0 if it did not find a description block
				1787	** returns -1 if it found a corrupt commit block
				1788	** returns 1 if both desc and commit were valid
				1789	*/
				1790	static int journal_transaction_is_valid(struct super_block p_s_sb, struct buffer_head d_bh, unsigned long oldest_invalid_trans_id, unsigned long newest_mount_id) {
				1791	struct reiserfs_journal_desc *desc ;
				1792	struct reiserfs_journal_commit *commit ;
				1793	struct buffer_head *c_bh ;
				1794	unsigned long offset ;
				1795
				1796	if (!d_bh)
				1797	return 0 ;
				1798
				1799	desc = (struct reiserfs_journal_desc *)d_bh->b_data ;
				1800	if (get_desc_trans_len(desc) > 0 && !memcmp(get_journal_desc_magic (d_bh), JOURNAL_DESC_MAGIC, 8)) {
				1801	if (oldest_invalid_trans_id && oldest_invalid_trans_id && get_desc_trans_id(desc) > oldest_invalid_trans_id) {
				1802	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-986: transaction "
				1803	"is valid returning because trans_id %d is greater than "
				1804	"oldest_invalid %lu", get_desc_trans_id(desc),
				1805	*oldest_invalid_trans_id);
				1806	return 0 ;
				1807	}
				1808	if (newest_mount_id && *newest_mount_id > get_desc_mount_id (desc)) {
				1809	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1087: transaction "
				1810	"is valid returning because mount_id %d is less than "
				1811	"newest_mount_id %lu", get_desc_mount_id (desc),
				1812	*newest_mount_id) ;
				1813	return -1 ;
				1814	}
				1815	if ( get_desc_trans_len(desc) > SB_JOURNAL(p_s_sb)->j_trans_max ) {
				1816	reiserfs_warning(p_s_sb, "journal-2018: Bad transaction length %d encountered, ignoring transaction", get_desc_trans_len(desc));
				1817	return -1 ;
				1818	}
				1819	offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ;
				1820
				1821	/* ok, we have a journal description block, lets see if the transaction was valid */
				1822	c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
				1823	((offset + get_desc_trans_len(desc) + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ;
				1824	if (!c_bh)
				1825	return 0 ;
				1826	commit = (struct reiserfs_journal_commit *)c_bh->b_data ;
				1827	if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
				1828	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE,
				1829	"journal_transaction_is_valid, commit offset %ld had bad "
				1830	"time %d or length %d",
				1831	c_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
				1832	get_commit_trans_id (commit),
				1833	get_commit_trans_len(commit));
				1834	brelse(c_bh) ;
				1835	if (oldest_invalid_trans_id) {
				1836	*oldest_invalid_trans_id = get_desc_trans_id(desc) ;
				1837	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1004: "
				1838	"transaction_is_valid setting oldest invalid trans_id "
				1839	"to %d", get_desc_trans_id(desc)) ;
				1840	}
				1841	return -1;
				1842	}
				1843	brelse(c_bh) ;
				1844	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1006: found valid "
				1845	"transaction start offset %llu, len %d id %d",
				1846	d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
				1847	get_desc_trans_len(desc), get_desc_trans_id(desc)) ;
				1848	return 1 ;
				1849	} else {
				1850	return 0 ;
				1851	}
				1852	}
				1853
				1854	static void brelse_array(struct buffer_head **heads, int num) {
				1855	int i ;
				1856	for (i = 0 ; i < num ; i++) {
				1857	brelse(heads[i]) ;
				1858	}
				1859	}
				1860
				1861	/*
				1862	** given the start, and values for the oldest acceptable transactions,
				1863	** this either reads in a replays a transaction, or returns because the transaction
				1864	** is invalid, or too old.
				1865	*/
				1866	static int journal_read_transaction(struct super_block *p_s_sb, unsigned long cur_dblock, unsigned long oldest_start,
				1867	unsigned long oldest_trans_id, unsigned long newest_mount_id) {
				1868	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				1869	struct reiserfs_journal_desc *desc ;
				1870	struct reiserfs_journal_commit *commit ;
				1871	unsigned long trans_id = 0 ;
				1872	struct buffer_head *c_bh ;
				1873	struct buffer_head *d_bh ;
				1874	struct buffer_head **log_blocks = NULL ;
				1875	struct buffer_head **real_blocks = NULL ;
				1876	unsigned long trans_offset ;
				1877	int i;
				1878	int trans_half;
				1879
				1880	d_bh = journal_bread(p_s_sb, cur_dblock) ;
				1881	if (!d_bh)
				1882	return 1 ;
				1883	desc = (struct reiserfs_journal_desc *)d_bh->b_data ;
				1884	trans_offset = d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ;
				1885	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1037: "
				1886	"journal_read_transaction, offset %llu, len %d mount_id %d",
				1887	d_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
				1888	get_desc_trans_len(desc), get_desc_mount_id(desc)) ;
				1889	if (get_desc_trans_id(desc) < oldest_trans_id) {
				1890	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1039: "
				1891	"journal_read_trans skipping because %lu is too old",
				1892	cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)) ;
				1893	brelse(d_bh) ;
				1894	return 1 ;
				1895	}
				1896	if (get_desc_mount_id(desc) != newest_mount_id) {
				1897	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1146: "
				1898	"journal_read_trans skipping because %d is != "
				1899	"newest_mount_id %lu", get_desc_mount_id(desc),
				1900	newest_mount_id) ;
				1901	brelse(d_bh) ;
				1902	return 1 ;
				1903	}
				1904	c_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
				1905	((trans_offset + get_desc_trans_len(desc) + 1) %
				1906	SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ;
				1907	if (!c_bh) {
				1908	brelse(d_bh) ;
				1909	return 1 ;
				1910	}
				1911	commit = (struct reiserfs_journal_commit *)c_bh->b_data ;
				1912	if (journal_compare_desc_commit(p_s_sb, desc, commit)) {
				1913	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal_read_transaction, "
				1914	"commit offset %llu had bad time %d or length %d",
				1915	c_bh->b_blocknr - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
				1916	get_commit_trans_id(commit), get_commit_trans_len(commit));
				1917	brelse(c_bh) ;
				1918	brelse(d_bh) ;
				1919	return 1;
				1920	}
				1921	trans_id = get_desc_trans_id(desc) ;
				1922	/* now we know we've got a good transaction, and it was inside the valid time ranges */
				1923	log_blocks = reiserfs_kmalloc(get_desc_trans_len(desc) * sizeof(struct buffer_head *), GFP_NOFS, p_s_sb) ;
				1924	real_blocks = reiserfs_kmalloc(get_desc_trans_len(desc) * sizeof(struct buffer_head *), GFP_NOFS, p_s_sb) ;
				1925	if (!log_blocks \|\| !real_blocks) {
				1926	brelse(c_bh) ;
				1927	brelse(d_bh) ;
				1928	reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ;
				1929	reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ;
				1930	reiserfs_warning(p_s_sb, "journal-1169: kmalloc failed, unable to mount FS") ;
				1931	return -1 ;
				1932	}
				1933	/* get all the buffer heads */
				1934	trans_half = journal_trans_half (p_s_sb->s_blocksize) ;
				1935	for(i = 0 ; i < get_desc_trans_len(desc) ; i++) {
				1936	log_blocks[i] = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + (trans_offset + 1 + i) % SB_ONDISK_JOURNAL_SIZE(p_s_sb));
				1937	if (i < trans_half) {
				1938	real_blocks[i] = sb_getblk(p_s_sb, le32_to_cpu(desc->j_realblock[i])) ;
				1939	} else {
				1940	real_blocks[i] = sb_getblk(p_s_sb, le32_to_cpu(commit->j_realblock[i - trans_half])) ;
				1941	}
				1942	if ( real_blocks[i]->b_blocknr > SB_BLOCK_COUNT(p_s_sb) ) {
				1943	reiserfs_warning(p_s_sb, "journal-1207: REPLAY FAILURE fsck required! Block to replay is outside of filesystem");
				1944	goto abort_replay;
				1945	}
				1946	/* make sure we don't try to replay onto log or reserved area */
				1947	if (is_block_in_log_or_reserved_area(p_s_sb, real_blocks[i]->b_blocknr)) {
				1948	reiserfs_warning(p_s_sb, "journal-1204: REPLAY FAILURE fsck required! Trying to replay onto a log block") ;
				1949	abort_replay:
				1950	brelse_array(log_blocks, i) ;
				1951	brelse_array(real_blocks, i) ;
				1952	brelse(c_bh) ;
				1953	brelse(d_bh) ;
				1954	reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ;
				1955	reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ;
				1956	return -1 ;
				1957	}
				1958	}
				1959	/* read in the log blocks, memcpy to the corresponding real block */
				1960	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks) ;
				1961	for (i = 0 ; i < get_desc_trans_len(desc) ; i++) {
				1962	wait_on_buffer(log_blocks[i]) ;
				1963	if (!buffer_uptodate(log_blocks[i])) {
				1964	reiserfs_warning(p_s_sb, "journal-1212: REPLAY FAILURE fsck required! buffer write failed") ;
				1965	brelse_array(log_blocks + i, get_desc_trans_len(desc) - i) ;
				1966	brelse_array(real_blocks, get_desc_trans_len(desc)) ;
				1967	brelse(c_bh) ;
				1968	brelse(d_bh) ;
				1969	reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ;
				1970	reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ;
				1971	return -1 ;
				1972	}
				1973	memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data, real_blocks[i]->b_size) ;
				1974	set_buffer_uptodate(real_blocks[i]) ;
				1975	brelse(log_blocks[i]) ;
				1976	}
				1977	/* flush out the real blocks */
				1978	for (i = 0 ; i < get_desc_trans_len(desc) ; i++) {
				1979	set_buffer_dirty(real_blocks[i]) ;
				1980	ll_rw_block(WRITE, 1, real_blocks + i) ;
				1981	}
				1982	for (i = 0 ; i < get_desc_trans_len(desc) ; i++) {
				1983	wait_on_buffer(real_blocks[i]) ;
				1984	if (!buffer_uptodate(real_blocks[i])) {
				1985	reiserfs_warning(p_s_sb, "journal-1226: REPLAY FAILURE, fsck required! buffer write failed") ;
				1986	brelse_array(real_blocks + i, get_desc_trans_len(desc) - i) ;
				1987	brelse(c_bh) ;
				1988	brelse(d_bh) ;
				1989	reiserfs_kfree(log_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ;
				1990	reiserfs_kfree(real_blocks, get_desc_trans_len(desc) * sizeof(struct buffer_head *), p_s_sb) ;
				1991	return -1 ;
				1992	}
				1993	brelse(real_blocks[i]) ;
				1994	}
				1995	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + ((trans_offset + get_desc_trans_len(desc) + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb)) ;
				1996	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1095: setting journal "
				1997	"start to offset %ld",
				1998	cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb)) ;
				1999
				2000	/* init starting values for the first transaction, in case this is the last transaction to be replayed. */
				2001	journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ;
				2002	journal->j_last_flush_trans_id = trans_id ;
				2003	journal->j_trans_id = trans_id + 1;
				2004	brelse(c_bh) ;
				2005	brelse(d_bh) ;
				2006	reiserfs_kfree(log_blocks, le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), p_s_sb) ;
				2007	reiserfs_kfree(real_blocks, le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *), p_s_sb) ;
				2008	return 0 ;
				2009	}
				2010
				2011	/* This function reads blocks starting from block and to max_block of bufsize
				2012	size (but no more than BUFNR blocks at a time). This proved to improve
				2013	mounting speed on self-rebuilding raid5 arrays at least.
				2014	Right now it is only used from journal code. But later we might use it
				2015	from other places.
				2016	Note: Do not use journal_getblk/sb_getblk functions here! */
				2017	static struct buffer_head * reiserfs_breada (struct block_device *dev, int block, int bufsize,
				2018	unsigned int max_block)
				2019	{
				2020	struct buffer_head * bhlist[BUFNR];
				2021	unsigned int blocks = BUFNR;
				2022	struct buffer_head * bh;
				2023	int i, j;
				2024
				2025	bh = __getblk (dev, block, bufsize );
				2026	if (buffer_uptodate (bh))
				2027	return (bh);
				2028
				2029	if (block + BUFNR > max_block) {
				2030	blocks = max_block - block;
				2031	}
				2032	bhlist[0] = bh;
				2033	j = 1;
				2034	for (i = 1; i < blocks; i++) {
				2035	bh = __getblk (dev, block + i, bufsize);
				2036	if (buffer_uptodate (bh)) {
				2037	brelse (bh);
				2038	break;
				2039	}
				2040	else bhlist[j++] = bh;
				2041	}
				2042	ll_rw_block (READ, j, bhlist);
				2043	for(i = 1; i < j; i++)
				2044	brelse (bhlist[i]);
				2045	bh = bhlist[0];
				2046	wait_on_buffer (bh);
				2047	if (buffer_uptodate (bh))
				2048	return bh;
				2049	brelse (bh);
				2050	return NULL;
				2051	}
				2052
				2053	/*
				2054	** read and replay the log
				2055	** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
				2056	** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast.
				2057	**
				2058	** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
				2059	**
				2060	** On exit, it sets things up so the first transaction will work correctly.
				2061	*/
				2062	static int journal_read(struct super_block *p_s_sb) {
				2063	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				2064	struct reiserfs_journal_desc *desc ;
				2065	unsigned long oldest_trans_id = 0;
				2066	unsigned long oldest_invalid_trans_id = 0 ;
				2067	time_t start ;
				2068	unsigned long oldest_start = 0;
				2069	unsigned long cur_dblock = 0 ;
				2070	unsigned long newest_mount_id = 9 ;
				2071	struct buffer_head *d_bh ;
				2072	struct reiserfs_journal_header *jh ;
				2073	int valid_journal_header = 0 ;
				2074	int replay_count = 0 ;
				2075	int continue_replay = 1 ;
				2076	int ret ;
				2077	char b[BDEVNAME_SIZE];
				2078
				2079	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) ;
				2080	reiserfs_info (p_s_sb, "checking transaction log (%s)\n",
				2081	bdevname(journal->j_dev_bd, b));
				2082	start = get_seconds();
				2083
				2084	/* step 1, read in the journal header block. Check the transaction it says
				2085	** is the first unflushed, and if that transaction is not valid,
				2086	** replay is done
				2087	*/
				2088	journal->j_header_bh = journal_bread(p_s_sb,
				2089	SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
				2090	SB_ONDISK_JOURNAL_SIZE(p_s_sb));
				2091	if (!journal->j_header_bh) {
				2092	return 1 ;
				2093	}
				2094	jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data) ;
				2095	if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
				2096	le32_to_cpu(jh->j_first_unflushed_offset) < SB_ONDISK_JOURNAL_SIZE(p_s_sb) &&
				2097	le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
				2098	oldest_start = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
				2099	le32_to_cpu(jh->j_first_unflushed_offset) ;
				2100	oldest_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
				2101	newest_mount_id = le32_to_cpu(jh->j_mount_id);
				2102	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1153: found in "
				2103	"header: first_unflushed_offset %d, last_flushed_trans_id "
				2104	"%lu", le32_to_cpu(jh->j_first_unflushed_offset),
				2105	le32_to_cpu(jh->j_last_flush_trans_id)) ;
				2106	valid_journal_header = 1 ;
				2107
				2108	/* now, we try to read the first unflushed offset. If it is not valid,
				2109	** there is nothing more we can do, and it makes no sense to read
				2110	** through the whole log.
				2111	*/
				2112	d_bh = journal_bread(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + le32_to_cpu(jh->j_first_unflushed_offset)) ;
				2113	ret = journal_transaction_is_valid(p_s_sb, d_bh, NULL, NULL) ;
				2114	if (!ret) {
				2115	continue_replay = 0 ;
				2116	}
				2117	brelse(d_bh) ;
				2118	goto start_log_replay;
				2119	}
				2120
				2121	if (continue_replay && bdev_read_only(p_s_sb->s_bdev)) {
				2122	reiserfs_warning (p_s_sb,
				2123	"clm-2076: device is readonly, unable to replay log") ;
				2124	return -1 ;
				2125	}
				2126
				2127	/* ok, there are transactions that need to be replayed. start with the first log block, find
				2128	** all the valid transactions, and pick out the oldest.
				2129	*/
				2130	while(continue_replay && cur_dblock < (SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb))) {
				2131	/* Note that it is required for blocksize of primary fs device and journal
				2132	device to be the same */
				2133	d_bh = reiserfs_breada(journal->j_dev_bd, cur_dblock, p_s_sb->s_blocksize,
				2134	SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb)) ;
				2135	ret = journal_transaction_is_valid(p_s_sb, d_bh, &oldest_invalid_trans_id, &newest_mount_id) ;
				2136	if (ret == 1) {
				2137	desc = (struct reiserfs_journal_desc *)d_bh->b_data ;
				2138	if (oldest_start == 0) { /* init all oldest_ values */
				2139	oldest_trans_id = get_desc_trans_id(desc) ;
				2140	oldest_start = d_bh->b_blocknr ;
				2141	newest_mount_id = get_desc_mount_id(desc) ;
				2142	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1179: Setting "
				2143	"oldest_start to offset %llu, trans_id %lu",
				2144	oldest_start - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
				2145	oldest_trans_id) ;
				2146	} else if (oldest_trans_id > get_desc_trans_id(desc)) {
				2147	/* one we just read was older */
				2148	oldest_trans_id = get_desc_trans_id(desc) ;
				2149	oldest_start = d_bh->b_blocknr ;
				2150	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1180: Resetting "
				2151	"oldest_start to offset %lu, trans_id %lu",
				2152	oldest_start - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
				2153	oldest_trans_id) ;
				2154	}
				2155	if (newest_mount_id < get_desc_mount_id(desc)) {
				2156	newest_mount_id = get_desc_mount_id(desc) ;
				2157	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
				2158	"newest_mount_id to %d", get_desc_mount_id(desc));
				2159	}
				2160	cur_dblock += get_desc_trans_len(desc) + 2 ;
				2161	} else {
				2162	cur_dblock++ ;
				2163	}
				2164	brelse(d_bh) ;
				2165	}
				2166
				2167	start_log_replay:
				2168	cur_dblock = oldest_start ;
				2169	if (oldest_trans_id) {
				2170	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1206: Starting replay "
				2171	"from offset %llu, trans_id %lu",
				2172	cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
				2173	oldest_trans_id) ;
				2174
				2175	}
				2176	replay_count = 0 ;
				2177	while(continue_replay && oldest_trans_id > 0) {
				2178	ret = journal_read_transaction(p_s_sb, cur_dblock, oldest_start, oldest_trans_id, newest_mount_id) ;
				2179	if (ret < 0) {
				2180	return ret ;
				2181	} else if (ret != 0) {
				2182	break ;
				2183	}
				2184	cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start ;
				2185	replay_count++ ;
				2186	if (cur_dblock == oldest_start)
				2187	break;
				2188	}
				2189
				2190	if (oldest_trans_id == 0) {
				2191	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1225: No valid "
				2192	"transactions found") ;
				2193	}
				2194	/* j_start does not get set correctly if we don't replay any transactions.
				2195	** if we had a valid journal_header, set j_start to the first unflushed transaction value,
				2196	** copy the trans_id from the header
				2197	*/
				2198	if (valid_journal_header && replay_count == 0) {
				2199	journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset) ;
				2200	journal->j_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) + 1;
				2201	journal->j_last_flush_trans_id = le32_to_cpu(jh->j_last_flush_trans_id) ;
				2202	journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
				2203	} else {
				2204	journal->j_mount_id = newest_mount_id + 1 ;
				2205	}
				2206	reiserfs_debug(p_s_sb, REISERFS_DEBUG_CODE, "journal-1299: Setting "
				2207	"newest_mount_id to %lu", journal->j_mount_id) ;
				2208	journal->j_first_unflushed_offset = journal->j_start ;
				2209	if (replay_count > 0) {
				2210	reiserfs_info (p_s_sb, "replayed %d transactions in %lu seconds\n",
				2211	replay_count, get_seconds() - start) ;
				2212	}
				2213	if (!bdev_read_only(p_s_sb->s_bdev) &&
				2214	_update_journal_header_block(p_s_sb, journal->j_start,
				2215	journal->j_last_flush_trans_id))
				2216	{
				2217	/* replay failed, caller must call free_journal_ram and abort
				2218	** the mount
				2219	*/
				2220	return -1 ;
				2221	}
				2222	return 0 ;
				2223	}
				2224
				2225	static struct reiserfs_journal_list alloc_journal_list(struct super_block s)
				2226	{
				2227	struct reiserfs_journal_list *jl;
				2228	retry:
				2229	jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS, s);
				2230	if (!jl) {
				2231	yield();
				2232	goto retry;
				2233	}
				2234	memset(jl, 0, sizeof(*jl));
				2235	INIT_LIST_HEAD(&jl->j_list);
				2236	INIT_LIST_HEAD(&jl->j_working_list);
				2237	INIT_LIST_HEAD(&jl->j_tail_bh_list);
				2238	INIT_LIST_HEAD(&jl->j_bh_list);
				2239	sema_init(&jl->j_commit_lock, 1);
				2240	SB_JOURNAL(s)->j_num_lists++;
				2241	get_journal_list(jl);
				2242	return jl;
				2243	}
				2244
				2245	static void journal_list_init(struct super_block *p_s_sb) {
				2246	SB_JOURNAL(p_s_sb)->j_current_jl = alloc_journal_list(p_s_sb);
				2247	}
				2248
				2249	static int release_journal_dev( struct super_block *super,
				2250	struct reiserfs_journal *journal )
				2251	{
				2252	int result;
				2253
				2254	result = 0;
				2255
				2256	if( journal -> j_dev_file != NULL ) {
				2257	result = filp_close( journal -> j_dev_file, NULL );
				2258	journal -> j_dev_file = NULL;
				2259	journal -> j_dev_bd = NULL;
				2260	} else if( journal -> j_dev_bd != NULL ) {
				2261	result = blkdev_put( journal -> j_dev_bd );
				2262	journal -> j_dev_bd = NULL;
				2263	}
				2264
				2265	if( result != 0 ) {
				2266	reiserfs_warning(super, "sh-457: release_journal_dev: Cannot release journal device: %i", result );
				2267	}
				2268	return result;
				2269	}
				2270
				2271	static int journal_init_dev( struct super_block *super,
				2272	struct reiserfs_journal *journal,
				2273	const char *jdev_name )
				2274	{
				2275	int result;
				2276	dev_t jdev;
				2277	int blkdev_mode = FMODE_READ \| FMODE_WRITE;
				2278	char b[BDEVNAME_SIZE];
				2279
				2280	result = 0;
				2281
				2282	journal -> j_dev_bd = NULL;
				2283	journal -> j_dev_file = NULL;
				2284	jdev = SB_ONDISK_JOURNAL_DEVICE( super ) ?
				2285	new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
				2286
				2287	if (bdev_read_only(super->s_bdev))
				2288	blkdev_mode = FMODE_READ;
				2289
				2290	/* there is no "jdev" option and journal is on separate device */
				2291	if( ( !jdev_name \|\| !jdev_name[ 0 ] ) ) {
				2292	journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
				2293	if (IS_ERR(journal->j_dev_bd)) {
				2294	result = PTR_ERR(journal->j_dev_bd);
				2295	journal->j_dev_bd = NULL;
				2296	reiserfs_warning (super, "sh-458: journal_init_dev: "
				2297	"cannot init journal device '%s': %i",
				2298	__bdevname(jdev, b), result );
				2299	return result;
				2300	} else if (jdev != super->s_dev)
				2301	set_blocksize(journal->j_dev_bd, super->s_blocksize);
				2302	return 0;
				2303	}
				2304
				2305	journal -> j_dev_file = filp_open( jdev_name, 0, 0 );
				2306	if( !IS_ERR( journal -> j_dev_file ) ) {
				2307	struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
				2308	if( !S_ISBLK( jdev_inode -> i_mode ) ) {
				2309	reiserfs_warning (super, "journal_init_dev: '%s' is "
				2310	"not a block device", jdev_name );
				2311	result = -ENOTBLK;
				2312	} else {
				2313	/* ok */
				2314	journal->j_dev_bd = I_BDEV(jdev_inode);
				2315	set_blocksize(journal->j_dev_bd, super->s_blocksize);
				2316	}
				2317	} else {
				2318	result = PTR_ERR( journal -> j_dev_file );
				2319	journal -> j_dev_file = NULL;
				2320	reiserfs_warning (super,
				2321	"journal_init_dev: Cannot open '%s': %i",
				2322	jdev_name, result );
				2323	}
				2324	if( result != 0 ) {
				2325	release_journal_dev( super, journal );
				2326	}
				2327	reiserfs_info(super, "journal_init_dev: journal device: %s\n",
				2328	bdevname(journal->j_dev_bd, b));
				2329	return result;
				2330	}
				2331
				2332	/*
				2333	** must be called once on fs mount. calls journal_read for you
				2334	*/
				2335	int journal_init(struct super_block p_s_sb, const char j_dev_name, int old_format, unsigned int commit_max_age) {
				2336	int num_cnodes = SB_ONDISK_JOURNAL_SIZE(p_s_sb) * 2 ;
				2337	struct buffer_head *bhjh;
				2338	struct reiserfs_super_block * rs;
				2339	struct reiserfs_journal_header *jh;
				2340	struct reiserfs_journal *journal;
				2341	struct reiserfs_journal_list *jl;
				2342	char b[BDEVNAME_SIZE];
				2343
				2344	journal = SB_JOURNAL(p_s_sb) = vmalloc(sizeof (struct reiserfs_journal)) ;
				2345	if (!journal) {
				2346	reiserfs_warning (p_s_sb, "journal-1256: unable to get memory for journal structure") ;
				2347	return 1 ;
				2348	}
				2349	memset(journal, 0, sizeof(struct reiserfs_journal)) ;
				2350	INIT_LIST_HEAD(&journal->j_bitmap_nodes) ;
				2351	INIT_LIST_HEAD (&journal->j_prealloc_list);
				2352	INIT_LIST_HEAD(&journal->j_working_list);
				2353	INIT_LIST_HEAD(&journal->j_journal_list);
				2354	journal->j_persistent_trans = 0;
				2355	if (reiserfs_allocate_list_bitmaps(p_s_sb,
				2356	journal->j_list_bitmap,
				2357	SB_BMAP_NR(p_s_sb)))
				2358	goto free_and_return ;
				2359	allocate_bitmap_nodes(p_s_sb) ;
				2360
				2361	/* reserved for journal area support */
				2362	SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ?
				2363	REISERFS_OLD_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize +
				2364	SB_BMAP_NR(p_s_sb) + 1 :
				2365	REISERFS_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + 2);
				2366
				2367	/* Sanity check to see is the standard journal fitting withing first bitmap
				2368	(actual for small blocksizes) */
				2369	if ( !SB_ONDISK_JOURNAL_DEVICE( p_s_sb ) &&
				2370	(SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb) > p_s_sb->s_blocksize * 8) ) {
				2371	reiserfs_warning (p_s_sb, "journal-1393: journal does not fit for area "
				2372	"addressed by first of bitmap blocks. It starts at "
				2373	"%u and its size is %u. Block size %ld",
				2374	SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb),
				2375	SB_ONDISK_JOURNAL_SIZE(p_s_sb), p_s_sb->s_blocksize);
				2376	goto free_and_return;
				2377	}
				2378
				2379	if( journal_init_dev( p_s_sb, journal, j_dev_name ) != 0 ) {
				2380	reiserfs_warning (p_s_sb, "sh-462: unable to initialize jornal device");
				2381	goto free_and_return;
				2382	}
				2383
				2384	rs = SB_DISK_SUPER_BLOCK(p_s_sb);
				2385
				2386	/* read journal header */
				2387	bhjh = journal_bread(p_s_sb,
				2388	SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + SB_ONDISK_JOURNAL_SIZE(p_s_sb));
				2389	if (!bhjh) {
				2390	reiserfs_warning (p_s_sb, "sh-459: unable to read journal header");
				2391	goto free_and_return;
				2392	}
				2393	jh = (struct reiserfs_journal_header *)(bhjh->b_data);
				2394
				2395	/* make sure that journal matches to the super block */
				2396	if (is_reiserfs_jr(rs) && (jh->jh_journal.jp_journal_magic != sb_jp_journal_magic(rs))) {
				2397	reiserfs_warning (p_s_sb, "sh-460: journal header magic %x "
				2398	"(device %s) does not match to magic found in super "
				2399	"block %x",
				2400	jh->jh_journal.jp_journal_magic,
				2401	bdevname( journal->j_dev_bd, b),
				2402	sb_jp_journal_magic(rs));
				2403	brelse (bhjh);
				2404	goto free_and_return;
				2405	}
				2406
				2407	journal->j_trans_max = le32_to_cpu (jh->jh_journal.jp_journal_trans_max);
				2408	journal->j_max_batch = le32_to_cpu (jh->jh_journal.jp_journal_max_batch);
				2409	journal->j_max_commit_age = le32_to_cpu (jh->jh_journal.jp_journal_max_commit_age);
				2410	journal->j_max_trans_age = JOURNAL_MAX_TRANS_AGE;
				2411
				2412	if (journal->j_trans_max) {
				2413	/* make sure these parameters are available, assign it if they are not */
				2414	__u32 initial = journal->j_trans_max;
				2415	__u32 ratio = 1;
				2416
				2417	if (p_s_sb->s_blocksize < 4096)
				2418	ratio = 4096 / p_s_sb->s_blocksize;
				2419
				2420	if (SB_ONDISK_JOURNAL_SIZE(p_s_sb)/journal->j_trans_max < JOURNAL_MIN_RATIO)
				2421	journal->j_trans_max = SB_ONDISK_JOURNAL_SIZE(p_s_sb) / JOURNAL_MIN_RATIO;
				2422	if (journal->j_trans_max > JOURNAL_TRANS_MAX_DEFAULT / ratio)
				2423	journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT / ratio;
				2424	if (journal->j_trans_max < JOURNAL_TRANS_MIN_DEFAULT / ratio)
				2425	journal->j_trans_max = JOURNAL_TRANS_MIN_DEFAULT / ratio;
				2426
				2427	if (journal->j_trans_max != initial)
				2428	reiserfs_warning (p_s_sb, "sh-461: journal_init: wrong transaction max size (%u). Changed to %u",
				2429	initial, journal->j_trans_max);
				2430
				2431	journal->j_max_batch = journal->j_trans_max*
				2432	JOURNAL_MAX_BATCH_DEFAULT/JOURNAL_TRANS_MAX_DEFAULT;
				2433	}
				2434
				2435	if (!journal->j_trans_max) {
				2436	/*we have the file system was created by old version of mkreiserfs
				2437	so this field contains zero value */
				2438	journal->j_trans_max = JOURNAL_TRANS_MAX_DEFAULT ;
				2439	journal->j_max_batch = JOURNAL_MAX_BATCH_DEFAULT ;
				2440	journal->j_max_commit_age = JOURNAL_MAX_COMMIT_AGE ;
				2441
				2442	/* for blocksize >= 4096 - max transaction size is 1024. For block size < 4096
				2443	trans max size is decreased proportionally */
				2444	if (p_s_sb->s_blocksize < 4096) {
				2445	journal->j_trans_max /= (4096 / p_s_sb->s_blocksize) ;
				2446	journal->j_max_batch = (journal->j_trans_max) * 9 / 10 ;
				2447	}
				2448	}
				2449
				2450	journal->j_default_max_commit_age = journal->j_max_commit_age;
				2451
				2452	if (commit_max_age != 0) {
				2453	journal->j_max_commit_age = commit_max_age;
				2454	journal->j_max_trans_age = commit_max_age;
				2455	}
				2456
				2457	reiserfs_info (p_s_sb, "journal params: device %s, size %u, "
				2458	"journal first block %u, max trans len %u, max batch %u, "
				2459	"max commit age %u, max trans age %u\n",
				2460	bdevname( journal->j_dev_bd, b),
				2461	SB_ONDISK_JOURNAL_SIZE(p_s_sb),
				2462	SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb),
				2463	journal->j_trans_max,
				2464	journal->j_max_batch,
				2465	journal->j_max_commit_age,
				2466	journal->j_max_trans_age);
				2467
				2468	brelse (bhjh);
				2469
				2470	journal->j_list_bitmap_index = 0 ;
				2471	journal_list_init(p_s_sb) ;
				2472
				2473	memset(journal->j_list_hash_table, 0, JOURNAL_HASH_SIZE * sizeof(struct reiserfs_journal_cnode *)) ;
				2474
				2475	INIT_LIST_HEAD(&journal->j_dirty_buffers) ;
				2476	spin_lock_init(&journal->j_dirty_buffers_lock) ;
				2477
				2478	journal->j_start = 0 ;
				2479	journal->j_len = 0 ;
				2480	journal->j_len_alloc = 0 ;
				2481	atomic_set(&(journal->j_wcount), 0) ;
				2482	atomic_set(&(journal->j_async_throttle), 0) ;
				2483	journal->j_bcount = 0 ;
				2484	journal->j_trans_start_time = 0 ;
				2485	journal->j_last = NULL ;
				2486	journal->j_first = NULL ;
				2487	init_waitqueue_head(&(journal->j_join_wait)) ;
				2488	sema_init(&journal->j_lock, 1);
				2489	sema_init(&journal->j_flush_sem, 1);
				2490
				2491	journal->j_trans_id = 10 ;
				2492	journal->j_mount_id = 10 ;
				2493	journal->j_state = 0 ;
				2494	atomic_set(&(journal->j_jlock), 0) ;
				2495	journal->j_cnode_free_list = allocate_cnodes(num_cnodes) ;
				2496	journal->j_cnode_free_orig = journal->j_cnode_free_list ;
				2497	journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0 ;
				2498	journal->j_cnode_used = 0 ;
				2499	journal->j_must_wait = 0 ;
				2500
				2501	init_journal_hash(p_s_sb) ;
				2502	jl = journal->j_current_jl;
				2503	jl->j_list_bitmap = get_list_bitmap(p_s_sb, jl);
				2504	if (!jl->j_list_bitmap) {
				2505	reiserfs_warning(p_s_sb, "journal-2005, get_list_bitmap failed for journal list 0") ;
				2506	goto free_and_return;
				2507	}
				2508	if (journal_read(p_s_sb) < 0) {
				2509	reiserfs_warning(p_s_sb, "Replay Failure, unable to mount") ;
				2510	goto free_and_return;
				2511	}
				2512
				2513	reiserfs_mounted_fs_count++ ;
				2514	if (reiserfs_mounted_fs_count <= 1)
				2515	commit_wq = create_workqueue("reiserfs");
				2516
				2517	INIT_WORK(&journal->j_work, flush_async_commits, p_s_sb);
				2518	return 0 ;
				2519	free_and_return:
				2520	free_journal_ram(p_s_sb);
				2521	return 1;
				2522	}
				2523
				2524	/*
				2525	** test for a polite end of the current transaction. Used by file_write, and should
				2526	** be used by delete to make sure they don't write more than can fit inside a single
				2527	** transaction
				2528	*/
				2529	int journal_transaction_should_end(struct reiserfs_transaction_handle *th, int new_alloc) {
				2530	struct reiserfs_journal *journal = SB_JOURNAL (th->t_super);
				2531	time_t now = get_seconds() ;
				2532	/* cannot restart while nested */
				2533	BUG_ON (!th->t_trans_id);
				2534	if (th->t_refcount > 1)
				2535	return 0 ;
				2536	if ( journal->j_must_wait > 0 \|\|
				2537	(journal->j_len_alloc + new_alloc) >= journal->j_max_batch \|\|
				2538	atomic_read(&(journal->j_jlock)) \|\|
				2539	(now - journal->j_trans_start_time) > journal->j_max_trans_age \|\|
				2540	journal->j_cnode_free < (journal->j_trans_max * 3)) {
				2541	return 1 ;
				2542	}
				2543	return 0 ;
				2544	}
				2545
				2546	/* this must be called inside a transaction, and requires the
				2547	** kernel_lock to be held
				2548	*/
				2549	void reiserfs_block_writes(struct reiserfs_transaction_handle *th) {
				2550	struct reiserfs_journal *journal = SB_JOURNAL (th->t_super);
				2551	BUG_ON (!th->t_trans_id);
				2552	journal->j_must_wait = 1 ;
				2553	set_bit(J_WRITERS_BLOCKED, &journal->j_state) ;
				2554	return ;
				2555	}
				2556
				2557	/* this must be called without a transaction started, and does not
				2558	** require BKL
				2559	*/
				2560	void reiserfs_allow_writes(struct super_block *s) {
				2561	struct reiserfs_journal *journal = SB_JOURNAL (s);
				2562	clear_bit(J_WRITERS_BLOCKED, &journal->j_state) ;
				2563	wake_up(&journal->j_join_wait) ;
				2564	}
				2565
				2566	/* this must be called without a transaction started, and does not
				2567	** require BKL
				2568	*/
				2569	void reiserfs_wait_on_write_block(struct super_block *s) {
				2570	struct reiserfs_journal *journal = SB_JOURNAL (s);
				2571	wait_event(journal->j_join_wait,
				2572	!test_bit(J_WRITERS_BLOCKED, &journal->j_state)) ;
				2573	}
				2574
				2575	static void queue_log_writer(struct super_block *s) {
				2576	wait_queue_t wait;
				2577	struct reiserfs_journal *journal = SB_JOURNAL (s);
				2578	set_bit(J_WRITERS_QUEUED, &journal->j_state);
				2579
				2580	/*
				2581	* we don't want to use wait_event here because
				2582	* we only want to wait once.
				2583	*/
				2584	init_waitqueue_entry(&wait, current);
				2585	add_wait_queue(&journal->j_join_wait, &wait);
				2586	set_current_state(TASK_UNINTERRUPTIBLE);
				2587	if (test_bit(J_WRITERS_QUEUED, &journal->j_state))
				2588	schedule();
				2589	current->state = TASK_RUNNING;
				2590	remove_wait_queue(&journal->j_join_wait, &wait);
				2591	}
				2592
				2593	static void wake_queued_writers(struct super_block *s) {
				2594	struct reiserfs_journal *journal = SB_JOURNAL (s);
				2595	if (test_and_clear_bit(J_WRITERS_QUEUED, &journal->j_state))
				2596	wake_up(&journal->j_join_wait);
				2597	}
				2598
				2599	static void let_transaction_grow(struct super_block *sb,
				2600	unsigned long trans_id)
				2601	{
				2602	struct reiserfs_journal *journal = SB_JOURNAL (sb);
				2603	unsigned long bcount = journal->j_bcount;
				2604	while(1) {
				2605	set_current_state(TASK_UNINTERRUPTIBLE);
				2606	schedule_timeout(1);
				2607	journal->j_current_jl->j_state \|= LIST_COMMIT_PENDING;
				2608	while ((atomic_read(&journal->j_wcount) > 0 \|\|
				2609	atomic_read(&journal->j_jlock)) &&
				2610	journal->j_trans_id == trans_id) {
				2611	queue_log_writer(sb);
				2612	}
				2613	if (journal->j_trans_id != trans_id)
				2614	break;
				2615	if (bcount == journal->j_bcount)
				2616	break;
				2617	bcount = journal->j_bcount;
				2618	}
				2619	}
				2620
				2621	/* join == true if you must join an existing transaction.
				2622	** join == false if you can deal with waiting for others to finish
				2623	**
				2624	** this will block until the transaction is joinable. send the number of blocks you
				2625	** expect to use in nblocks.
				2626	*/
				2627	static int do_journal_begin_r(struct reiserfs_transaction_handle th, struct super_block p_s_sb,unsigned long nblocks,int join) {
				2628	time_t now = get_seconds() ;
				2629	int old_trans_id ;
				2630	struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
				2631	struct reiserfs_transaction_handle myth;
				2632	int sched_count = 0;
				2633	int retval;
				2634
				2635	reiserfs_check_lock_depth(p_s_sb, "journal_begin") ;
				2636
				2637	PROC_INFO_INC( p_s_sb, journal.journal_being );
				2638	/* set here for journal_join */
				2639	th->t_refcount = 1;
				2640	th->t_super = p_s_sb ;
				2641
				2642	relock:
				2643	lock_journal(p_s_sb) ;
				2644	if (join != JBEGIN_ABORT && reiserfs_is_journal_aborted (journal)) {
				2645	unlock_journal (p_s_sb);
				2646	retval = journal->j_errno;
				2647	goto out_fail;
				2648	}
				2649	journal->j_bcount++;
				2650
				2651	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
				2652	unlock_journal(p_s_sb) ;
				2653	reiserfs_wait_on_write_block(p_s_sb) ;
				2654	PROC_INFO_INC( p_s_sb, journal.journal_relock_writers );
				2655	goto relock ;
				2656	}
				2657	now = get_seconds();
				2658
				2659	/* if there is no room in the journal OR
				2660	** if this transaction is too old, and we weren't called joinable, wait for it to finish before beginning
				2661	** we don't sleep if there aren't other writers
				2662	*/
				2663
				2664	if ( (!join && journal->j_must_wait > 0) \|\|
				2665	( !join && (journal->j_len_alloc + nblocks + 2) >= journal->j_max_batch) \|\|
				2666	(!join && atomic_read(&journal->j_wcount) > 0 && journal->j_trans_start_time > 0 &&
				2667	(now - journal->j_trans_start_time) > journal->j_max_trans_age) \|\|
				2668	(!join && atomic_read(&journal->j_jlock)) \|\|
				2669	(!join && journal->j_cnode_free < (journal->j_trans_max * 3))) {
				2670
				2671	old_trans_id = journal->j_trans_id;
				2672	unlock_journal(p_s_sb) ; /* allow others to finish this transaction */
				2673
				2674	if (!join && (journal->j_len_alloc + nblocks + 2) >=
				2675	journal->j_max_batch &&
				2676	((journal->j_len + nblocks + 2) * 100) < (journal->j_len_alloc * 75))
				2677	{
				2678	if (atomic_read(&journal->j_wcount) > 10) {
				2679	sched_count++;
				2680	queue_log_writer(p_s_sb);
				2681	goto relock;
				2682	}
				2683	}
				2684	/* don't mess with joining the transaction if all we have to do is
				2685	* wait for someone else to do a commit
				2686	*/
				2687	if (atomic_read(&journal->j_jlock)) {
				2688	while (journal->j_trans_id == old_trans_id &&
				2689	atomic_read(&journal->j_jlock)) {
				2690	queue_log_writer(p_s_sb);
				2691	}
				2692	goto relock;
				2693	}
				2694	retval = journal_join(&myth, p_s_sb, 1) ;
				2695	if (retval)
				2696	goto out_fail;
				2697
				2698	/* someone might have ended the transaction while we joined */
				2699	if (old_trans_id != journal->j_trans_id) {
				2700	retval = do_journal_end(&myth, p_s_sb, 1, 0) ;
				2701	} else {
				2702	retval = do_journal_end(&myth, p_s_sb, 1, COMMIT_NOW) ;
				2703	}
				2704
				2705	if (retval)
				2706	goto out_fail;
				2707
				2708	PROC_INFO_INC( p_s_sb, journal.journal_relock_wcount );
				2709	goto relock ;
				2710	}
				2711	/* we are the first writer, set trans_id */
				2712	if (journal->j_trans_start_time == 0) {
				2713	journal->j_trans_start_time = get_seconds();
				2714	}
				2715	atomic_inc(&(journal->j_wcount)) ;
				2716	journal->j_len_alloc += nblocks ;
				2717	th->t_blocks_logged = 0 ;
				2718	th->t_blocks_allocated = nblocks ;
				2719	th->t_trans_id = journal->j_trans_id ;
				2720	unlock_journal(p_s_sb) ;
				2721	INIT_LIST_HEAD (&th->t_list);
				2722	return 0 ;
				2723
				2724	out_fail:
				2725	memset (th, 0, sizeof (*th));
				2726	/* Re-set th->t_super, so we can properly keep track of how many
				2727	* persistent transactions there are. We need to do this so if this
				2728	* call is part of a failed restart_transaction, we can free it later */
				2729	th->t_super = p_s_sb;
				2730	return retval;
				2731	}
				2732
				2733	struct reiserfs_transaction_handle *
				2734	reiserfs_persistent_transaction(struct super_block *s, int nblocks) {
				2735	int ret ;
				2736	struct reiserfs_transaction_handle *th ;
				2737
				2738	/* if we're nesting into an existing transaction. It will be
				2739	** persistent on its own
				2740	*/
				2741	if (reiserfs_transaction_running(s)) {
				2742	th = current->journal_info ;
				2743	th->t_refcount++ ;
				2744	if (th->t_refcount < 2) {
				2745	BUG() ;
				2746	}
				2747	return th ;
				2748	}
				2749	th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS, s) ;
				2750	if (!th)
				2751	return NULL;
				2752	ret = journal_begin(th, s, nblocks) ;
				2753	if (ret) {
				2754	reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ;
				2755	return NULL;
				2756	}
				2757
				2758	SB_JOURNAL(s)->j_persistent_trans++;
				2759	return th ;
				2760	}
				2761
				2762	int
				2763	reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th) {
				2764	struct super_block *s = th->t_super;
				2765	int ret = 0;
				2766	if (th->t_trans_id)
				2767	ret = journal_end(th, th->t_super, th->t_blocks_allocated);
				2768	else
				2769	ret = -EIO;
				2770	if (th->t_refcount == 0) {
				2771	SB_JOURNAL(s)->j_persistent_trans--;
				2772	reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle), s) ;
				2773	}
				2774	return ret;
				2775	}
				2776
				2777	static int journal_join(struct reiserfs_transaction_handle th, struct super_block p_s_sb, unsigned long nblocks) {
				2778	struct reiserfs_transaction_handle *cur_th = current->journal_info;
				2779
				2780	/* this keeps do_journal_end from NULLing out the current->journal_info
				2781	** pointer
				2782	*/
				2783	th->t_handle_save = cur_th ;
				2784	if (cur_th && cur_th->t_refcount > 1) {
				2785	BUG() ;
				2786	}
				2787	return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_JOIN) ;
				2788	}
				2789
				2790	int journal_join_abort(struct reiserfs_transaction_handle th, struct super_block p_s_sb, unsigned long nblocks) {
				2791	struct reiserfs_transaction_handle *cur_th = current->journal_info;
				2792
				2793	/* this keeps do_journal_end from NULLing out the current->journal_info
				2794	** pointer
				2795	*/
				2796	th->t_handle_save = cur_th ;
				2797	if (cur_th && cur_th->t_refcount > 1) {
				2798	BUG() ;
				2799	}
				2800	return do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_ABORT) ;
				2801	}
				2802
				2803	int journal_begin(struct reiserfs_transaction_handle th, struct super_block p_s_sb, unsigned long nblocks) {
				2804	struct reiserfs_transaction_handle *cur_th = current->journal_info ;
				2805	int ret ;
				2806
				2807	th->t_handle_save = NULL ;
				2808	if (cur_th) {
				2809	/* we are nesting into the current transaction */
				2810	if (cur_th->t_super == p_s_sb) {
				2811	BUG_ON (!cur_th->t_refcount);
				2812	cur_th->t_refcount++ ;
				2813	memcpy(th, cur_th, sizeof(*th));
				2814	if (th->t_refcount <= 1)
				2815	reiserfs_warning (p_s_sb, "BAD: refcount <= 1, but journal_info != 0");
				2816	return 0;
				2817	} else {
				2818	/* we've ended up with a handle from a different filesystem.
				2819	** save it and restore on journal_end. This should never
				2820	** really happen...
				2821	*/
				2822	reiserfs_warning(p_s_sb, "clm-2100: nesting info a different FS") ;
				2823	th->t_handle_save = current->journal_info ;
				2824	current->journal_info = th;
				2825	}
				2826	} else {
				2827	current->journal_info = th;
				2828	}
				2829	ret = do_journal_begin_r(th, p_s_sb, nblocks, JBEGIN_REG) ;
				2830	if (current->journal_info != th)
				2831	BUG() ;
				2832
				2833	/* I guess this boils down to being the reciprocal of clm-2100 above.
				2834	* If do_journal_begin_r fails, we need to put it back, since journal_end
				2835	* won't be called to do it. */
				2836	if (ret)
				2837	current->journal_info = th->t_handle_save;
				2838	else
				2839	BUG_ON (!th->t_refcount);
				2840
				2841	return ret ;
				2842	}
				2843
				2844	/*
				2845	** puts bh into the current transaction. If it was already there, reorders removes the
				2846	** old pointers from the hash, and puts new ones in (to make sure replay happen in the right order).
				2847	**
				2848	** if it was dirty, cleans and files onto the clean list. I can't let it be dirty again until the
				2849	** transaction is committed.
				2850	**
				2851	** if j_len, is bigger than j_len_alloc, it pushes j_len_alloc to 10 + j_len.
				2852	*/
				2853	int journal_mark_dirty(struct reiserfs_transaction_handle th, struct super_block p_s_sb, struct buffer_head *bh) {
				2854	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				2855	struct reiserfs_journal_cnode *cn = NULL;
				2856	int count_already_incd = 0 ;
				2857	int prepared = 0 ;
				2858	BUG_ON (!th->t_trans_id);
				2859
				2860	PROC_INFO_INC( p_s_sb, journal.mark_dirty );
				2861	if (th->t_trans_id != journal->j_trans_id) {
				2862	reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n",
				2863	th->t_trans_id, journal->j_trans_id);
				2864	}
				2865
				2866	p_s_sb->s_dirt = 1;
				2867
				2868	prepared = test_clear_buffer_journal_prepared (bh);
				2869	clear_buffer_journal_restore_dirty (bh);
				2870	/* already in this transaction, we are done */
				2871	if (buffer_journaled(bh)) {
				2872	PROC_INFO_INC( p_s_sb, journal.mark_dirty_already );
				2873	return 0 ;
				2874	}
				2875
				2876	/* this must be turned into a panic instead of a warning. We can't allow
				2877	** a dirty or journal_dirty or locked buffer to be logged, as some changes
				2878	** could get to disk too early. NOT GOOD.
				2879	*/
				2880	if (!prepared \|\| buffer_dirty(bh)) {
				2881	reiserfs_warning (p_s_sb, "journal-1777: buffer %llu bad state "
				2882	"%cPREPARED %cLOCKED %cDIRTY %cJDIRTY_WAIT",
				2883	(unsigned long long)bh->b_blocknr, prepared ? ' ' : '!',
				2884	buffer_locked(bh) ? ' ' : '!',
				2885	buffer_dirty(bh) ? ' ' : '!',
				2886	buffer_journal_dirty(bh) ? ' ' : '!') ;
				2887	}
				2888
				2889	if (atomic_read(&(journal->j_wcount)) <= 0) {
				2890	reiserfs_warning (p_s_sb, "journal-1409: journal_mark_dirty returning because j_wcount was %d", atomic_read(&(journal->j_wcount))) ;
				2891	return 1 ;
				2892	}
				2893	/* this error means I've screwed up, and we've overflowed the transaction.
				2894	** Nothing can be done here, except make the FS readonly or panic.
				2895	*/
				2896	if (journal->j_len >= journal->j_trans_max) {
				2897	reiserfs_panic(th->t_super, "journal-1413: journal_mark_dirty: j_len (%lu) is too big\n", journal->j_len) ;
				2898	}
				2899
				2900	if (buffer_journal_dirty(bh)) {
				2901	count_already_incd = 1 ;
				2902	PROC_INFO_INC( p_s_sb, journal.mark_dirty_notjournal );
				2903	clear_buffer_journal_dirty (bh);
				2904	}
				2905
				2906	if (journal->j_len > journal->j_len_alloc) {
				2907	journal->j_len_alloc = journal->j_len + JOURNAL_PER_BALANCE_CNT ;
				2908	}
				2909
				2910	set_buffer_journaled (bh);
				2911
				2912	/* now put this guy on the end */
				2913	if (!cn) {
				2914	cn = get_cnode(p_s_sb) ;
				2915	if (!cn) {
				2916	reiserfs_panic(p_s_sb, "get_cnode failed!\n");
				2917	}
				2918
				2919	if (th->t_blocks_logged == th->t_blocks_allocated) {
				2920	th->t_blocks_allocated += JOURNAL_PER_BALANCE_CNT ;
				2921	journal->j_len_alloc += JOURNAL_PER_BALANCE_CNT ;
				2922	}
				2923	th->t_blocks_logged++ ;
				2924	journal->j_len++ ;
				2925
				2926	cn->bh = bh ;
				2927	cn->blocknr = bh->b_blocknr ;
				2928	cn->sb = p_s_sb;
				2929	cn->jlist = NULL ;
				2930	insert_journal_hash(journal->j_hash_table, cn) ;
				2931	if (!count_already_incd) {
				2932	get_bh(bh) ;
				2933	}
				2934	}
				2935	cn->next = NULL ;
				2936	cn->prev = journal->j_last ;
				2937	cn->bh = bh ;
				2938	if (journal->j_last) {
				2939	journal->j_last->next = cn ;
				2940	journal->j_last = cn ;
				2941	} else {
				2942	journal->j_first = cn ;
				2943	journal->j_last = cn ;
				2944	}
				2945	return 0 ;
				2946	}
				2947
				2948	int journal_end(struct reiserfs_transaction_handle th, struct super_block p_s_sb, unsigned long nblocks) {
				2949	if (!current->journal_info && th->t_refcount > 1)
				2950	reiserfs_warning (p_s_sb, "REISER-NESTING: th NULL, refcount %d",
				2951	th->t_refcount);
				2952
				2953	if (!th->t_trans_id) {
				2954	WARN_ON (1);
				2955	return -EIO;
				2956	}
				2957
				2958	th->t_refcount--;
				2959	if (th->t_refcount > 0) {
				2960	struct reiserfs_transaction_handle *cur_th = current->journal_info ;
				2961
				2962	/* we aren't allowed to close a nested transaction on a different
				2963	** filesystem from the one in the task struct
				2964	*/
				2965	if (cur_th->t_super != th->t_super)
				2966	BUG() ;
				2967
				2968	if (th != cur_th) {
				2969	memcpy(current->journal_info, th, sizeof(*th));
				2970	th->t_trans_id = 0;
				2971	}
				2972	return 0;
				2973	} else {
				2974	return do_journal_end(th, p_s_sb, nblocks, 0) ;
				2975	}
				2976	}
				2977
				2978	/* removes from the current transaction, relsing and descrementing any counters.
				2979	** also files the removed buffer directly onto the clean list
				2980	**
				2981	** called by journal_mark_freed when a block has been deleted
				2982	**
				2983	** returns 1 if it cleaned and relsed the buffer. 0 otherwise
				2984	*/
				2985	static int remove_from_transaction(struct super_block *p_s_sb, b_blocknr_t blocknr, int already_cleaned) {
				2986	struct buffer_head *bh ;
				2987	struct reiserfs_journal_cnode *cn ;
				2988	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				2989	int ret = 0;
				2990
				2991	cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr) ;
				2992	if (!cn \|\| !cn->bh) {
				2993	return ret ;
				2994	}
				2995	bh = cn->bh ;
				2996	if (cn->prev) {
				2997	cn->prev->next = cn->next ;
				2998	}
				2999	if (cn->next) {
				3000	cn->next->prev = cn->prev ;
				3001	}
				3002	if (cn == journal->j_first) {
				3003	journal->j_first = cn->next ;
				3004	}
				3005	if (cn == journal->j_last) {
				3006	journal->j_last = cn->prev ;
				3007	}
				3008	if (bh)
				3009	remove_journal_hash(p_s_sb, journal->j_hash_table, NULL, bh->b_blocknr, 0) ;
				3010	clear_buffer_journaled (bh); /* don't log this one */
				3011
				3012	if (!already_cleaned) {
				3013	clear_buffer_journal_dirty (bh);
				3014	clear_buffer_dirty(bh);
				3015	clear_buffer_journal_test (bh);
				3016	put_bh(bh) ;
				3017	if (atomic_read(&(bh->b_count)) < 0) {
				3018	reiserfs_warning (p_s_sb, "journal-1752: remove from trans, b_count < 0");
				3019	}
				3020	ret = 1 ;
				3021	}
				3022	journal->j_len-- ;
				3023	journal->j_len_alloc-- ;
				3024	free_cnode(p_s_sb, cn) ;
				3025	return ret ;
				3026	}
				3027
				3028	/*
				3029	** for any cnode in a journal list, it can only be dirtied of all the
				3030	** transactions that include it are commited to disk.
				3031	** this checks through each transaction, and returns 1 if you are allowed to dirty,
				3032	** and 0 if you aren't
				3033	**
				3034	** it is called by dirty_journal_list, which is called after flush_commit_list has gotten all the log
				3035	** blocks for a given transaction on disk
				3036	**
				3037	*/
				3038	static int can_dirty(struct reiserfs_journal_cnode *cn) {
				3039	struct super_block *sb = cn->sb;
				3040	b_blocknr_t blocknr = cn->blocknr ;
				3041	struct reiserfs_journal_cnode *cur = cn->hprev ;
				3042	int can_dirty = 1 ;
				3043
				3044	/* first test hprev. These are all newer than cn, so any node here
				3045	** with the same block number and dev means this node can't be sent
				3046	** to disk right now.
				3047	*/
				3048	while(cur && can_dirty) {
				3049	if (cur->jlist && cur->bh && cur->blocknr && cur->sb == sb &&
				3050	cur->blocknr == blocknr) {
				3051	can_dirty = 0 ;
				3052	}
				3053	cur = cur->hprev ;
				3054	}
				3055	/* then test hnext. These are all older than cn. As long as they
				3056	** are committed to the log, it is safe to write cn to disk
				3057	*/
				3058	cur = cn->hnext ;
				3059	while(cur && can_dirty) {
				3060	if (cur->jlist && cur->jlist->j_len > 0 &&
				3061	atomic_read(&(cur->jlist->j_commit_left)) > 0 && cur->bh &&
				3062	cur->blocknr && cur->sb == sb && cur->blocknr == blocknr) {
				3063	can_dirty = 0 ;
				3064	}
				3065	cur = cur->hnext ;
				3066	}
				3067	return can_dirty ;
				3068	}
				3069
				3070	/* syncs the commit blocks, but does not force the real buffers to disk
				3071	** will wait until the current transaction is done/commited before returning
				3072	*/
				3073	int journal_end_sync(struct reiserfs_transaction_handle th, struct super_block p_s_sb, unsigned long nblocks) {
				3074	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				3075
				3076	BUG_ON (!th->t_trans_id);
				3077	/* you can sync while nested, very, very bad */
				3078	if (th->t_refcount > 1) {
				3079	BUG() ;
				3080	}
				3081	if (journal->j_len == 0) {
				3082	reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
				3083	journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
				3084	}
				3085	return do_journal_end(th, p_s_sb, nblocks, COMMIT_NOW \| WAIT) ;
				3086	}
				3087
				3088	/*
				3089	** writeback the pending async commits to disk
				3090	*/
				3091	static void flush_async_commits(void *p) {
				3092	struct super_block *p_s_sb = p;
				3093	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				3094	struct reiserfs_journal_list *jl;
				3095	struct list_head *entry;
				3096
				3097	lock_kernel();
				3098	if (!list_empty(&journal->j_journal_list)) {
				3099	/* last entry is the youngest, commit it and you get everything */
				3100	entry = journal->j_journal_list.prev;
				3101	jl = JOURNAL_LIST_ENTRY(entry);
				3102	flush_commit_list(p_s_sb, jl, 1);
				3103	}
				3104	unlock_kernel();
				3105	/*
				3106	* this is a little racey, but there's no harm in missing
				3107	* the filemap_fdata_write
				3108	*/
				3109	if (!atomic_read(&journal->j_async_throttle) && !reiserfs_is_journal_aborted (journal)) {
				3110	atomic_inc(&journal->j_async_throttle);
				3111	filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping);
				3112	atomic_dec(&journal->j_async_throttle);
				3113	}
				3114	}
				3115
				3116	/*
				3117	** flushes any old transactions to disk
				3118	** ends the current transaction if it is too old
				3119	*/
				3120	int reiserfs_flush_old_commits(struct super_block *p_s_sb) {
				3121	time_t now ;
				3122	struct reiserfs_transaction_handle th ;
				3123	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				3124
				3125	now = get_seconds();
				3126	/* safety check so we don't flush while we are replaying the log during
				3127	* mount
				3128	*/
				3129	if (list_empty(&journal->j_journal_list)) {
				3130	return 0 ;
				3131	}
				3132
				3133	/* check the current transaction. If there are no writers, and it is
				3134	* too old, finish it, and force the commit blocks to disk
				3135	*/
				3136	if (atomic_read(&journal->j_wcount) <= 0 &&
				3137	journal->j_trans_start_time > 0 &&
				3138	journal->j_len > 0 &&
				3139	(now - journal->j_trans_start_time) > journal->j_max_trans_age)
				3140	{
				3141	if (!journal_join(&th, p_s_sb, 1)) {
				3142	reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
				3143	journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
				3144
				3145	/* we're only being called from kreiserfsd, it makes no sense to do
				3146	** an async commit so that kreiserfsd can do it later
				3147	*/
				3148	do_journal_end(&th, p_s_sb,1, COMMIT_NOW \| WAIT) ;
				3149	}
				3150	}
				3151	return p_s_sb->s_dirt;
				3152	}
				3153
				3154	/*
				3155	** returns 0 if do_journal_end should return right away, returns 1 if do_journal_end should finish the commit
				3156	**
				3157	** if the current transaction is too old, but still has writers, this will wait on j_join_wait until all
				3158	** the writers are done. By the time it wakes up, the transaction it was called has already ended, so it just
				3159	** flushes the commit list and returns 0.
				3160	**
				3161	** Won't batch when flush or commit_now is set. Also won't batch when others are waiting on j_join_wait.
				3162	**
				3163	** Note, we can't allow the journal_end to proceed while there are still writers in the log.
				3164	*/
				3165	static int check_journal_end(struct reiserfs_transaction_handle th, struct super_block p_s_sb,
				3166	unsigned long nblocks, int flags) {
				3167
				3168	time_t now ;
				3169	int flush = flags & FLUSH_ALL ;
				3170	int commit_now = flags & COMMIT_NOW ;
				3171	int wait_on_commit = flags & WAIT ;
				3172	struct reiserfs_journal_list *jl;
				3173	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				3174
				3175	BUG_ON (!th->t_trans_id);
				3176
				3177	if (th->t_trans_id != journal->j_trans_id) {
				3178	reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n",
				3179	th->t_trans_id, journal->j_trans_id);
				3180	}
				3181
				3182	journal->j_len_alloc -= (th->t_blocks_allocated - th->t_blocks_logged) ;
				3183	if (atomic_read(&(journal->j_wcount)) > 0) { /* <= 0 is allowed. unmounting might not call begin */
				3184	atomic_dec(&(journal->j_wcount)) ;
				3185	}
				3186
				3187	/* BUG, deal with case where j_len is 0, but people previously freed blocks need to be released
				3188	** will be dealt with by next transaction that actually writes something, but should be taken
				3189	** care of in this trans
				3190	*/
				3191	if (journal->j_len == 0) {
				3192	BUG();
				3193	}
				3194	/* if wcount > 0, and we are called to with flush or commit_now,
				3195	** we wait on j_join_wait. We will wake up when the last writer has
				3196	** finished the transaction, and started it on its way to the disk.
				3197	** Then, we flush the commit or journal list, and just return 0
				3198	** because the rest of journal end was already done for this transaction.
				3199	*/
				3200	if (atomic_read(&(journal->j_wcount)) > 0) {
				3201	if (flush \|\| commit_now) {
				3202	unsigned trans_id ;
				3203
				3204	jl = journal->j_current_jl;
				3205	trans_id = jl->j_trans_id;
				3206	if (wait_on_commit)
				3207	jl->j_state \|= LIST_COMMIT_PENDING;
				3208	atomic_set(&(journal->j_jlock), 1) ;
				3209	if (flush) {
				3210	journal->j_next_full_flush = 1 ;
				3211	}
				3212	unlock_journal(p_s_sb) ;
				3213
				3214	/* sleep while the current transaction is still j_jlocked */
				3215	while(journal->j_trans_id == trans_id) {
				3216	if (atomic_read(&journal->j_jlock)) {
				3217	queue_log_writer(p_s_sb);
				3218	} else {
				3219	lock_journal(p_s_sb);
				3220	if (journal->j_trans_id == trans_id) {
				3221	atomic_set(&(journal->j_jlock), 1) ;
				3222	}
				3223	unlock_journal(p_s_sb);
				3224	}
				3225	}
				3226	if (journal->j_trans_id == trans_id) {
				3227	BUG();
				3228	}
				3229	if (commit_now && journal_list_still_alive(p_s_sb, trans_id) &&
				3230	wait_on_commit)
				3231	{
				3232	flush_commit_list(p_s_sb, jl, 1) ;
				3233	}
				3234	return 0 ;
				3235	}
				3236	unlock_journal(p_s_sb) ;
				3237	return 0 ;
				3238	}
				3239
				3240	/* deal with old transactions where we are the last writers */
				3241	now = get_seconds();
				3242	if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
				3243	commit_now = 1 ;
				3244	journal->j_next_async_flush = 1 ;
				3245	}
				3246	/* don't batch when someone is waiting on j_join_wait */
				3247	/* don't batch when syncing the commit or flushing the whole trans */
				3248	if (!(journal->j_must_wait > 0) && !(atomic_read(&(journal->j_jlock))) && !flush && !commit_now &&
				3249	(journal->j_len < journal->j_max_batch) &&
				3250	journal->j_len_alloc < journal->j_max_batch && journal->j_cnode_free > (journal->j_trans_max * 3)) {
				3251	journal->j_bcount++ ;
				3252	unlock_journal(p_s_sb) ;
				3253	return 0 ;
				3254	}
				3255
				3256	if (journal->j_start > SB_ONDISK_JOURNAL_SIZE(p_s_sb)) {
				3257	reiserfs_panic(p_s_sb, "journal-003: journal_end: j_start (%ld) is too high\n", journal->j_start) ;
				3258	}
				3259	return 1 ;
				3260	}
				3261
				3262	/*
				3263	** Does all the work that makes deleting blocks safe.
				3264	** when deleting a block mark BH_JNew, just remove it from the current transaction, clean it's buffer_head and move on.
				3265	**
				3266	** otherwise:
				3267	** set a bit for the block in the journal bitmap. That will prevent it from being allocated for unformatted nodes
				3268	** before this transaction has finished.
				3269	**
				3270	** mark any cnodes for this block as BLOCK_FREED, and clear their bh pointers. That will prevent any old transactions with
				3271	** this block from trying to flush to the real location. Since we aren't removing the cnode from the journal_list_hash,
				3272	** the block can't be reallocated yet.
				3273	**
				3274	** Then remove it from the current transaction, decrementing any counters and filing it on the clean list.
				3275	*/
				3276	int journal_mark_freed(struct reiserfs_transaction_handle th, struct super_block p_s_sb, b_blocknr_t blocknr) {
				3277	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				3278	struct reiserfs_journal_cnode *cn = NULL ;
				3279	struct buffer_head *bh = NULL ;
				3280	struct reiserfs_list_bitmap *jb = NULL ;
				3281	int cleaned = 0 ;
				3282	BUG_ON (!th->t_trans_id);
				3283
				3284	cn = get_journal_hash_dev(p_s_sb, journal->j_hash_table, blocknr);
				3285	if (cn && cn->bh) {
				3286	bh = cn->bh ;
				3287	get_bh(bh) ;
				3288	}
				3289	/* if it is journal new, we just remove it from this transaction */
				3290	if (bh && buffer_journal_new(bh)) {
				3291	clear_buffer_journal_new (bh);
				3292	clear_prepared_bits(bh) ;
				3293	reiserfs_clean_and_file_buffer(bh) ;
				3294	cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ;
				3295	} else {
				3296	/* set the bit for this block in the journal bitmap for this transaction */
				3297	jb = journal->j_current_jl->j_list_bitmap;
				3298	if (!jb) {
				3299	reiserfs_panic(p_s_sb, "journal-1702: journal_mark_freed, journal_list_bitmap is NULL\n") ;
				3300	}
				3301	set_bit_in_list_bitmap(p_s_sb, blocknr, jb) ;
				3302
				3303	/* Note, the entire while loop is not allowed to schedule. */
				3304
				3305	if (bh) {
				3306	clear_prepared_bits(bh) ;
				3307	reiserfs_clean_and_file_buffer(bh) ;
				3308	}
				3309	cleaned = remove_from_transaction(p_s_sb, blocknr, cleaned) ;
				3310
				3311	/* find all older transactions with this block, make sure they don't try to write it out */
				3312	cn = get_journal_hash_dev(p_s_sb,journal->j_list_hash_table, blocknr) ;
				3313	while (cn) {
				3314	if (p_s_sb == cn->sb && blocknr == cn->blocknr) {
				3315	set_bit(BLOCK_FREED, &cn->state) ;
				3316	if (cn->bh) {
				3317	if (!cleaned) {
				3318	/* remove_from_transaction will brelse the buffer if it was
				3319	** in the current trans
				3320	*/
				3321	clear_buffer_journal_dirty (cn->bh);
				3322	clear_buffer_dirty(cn->bh);
				3323	clear_buffer_journal_test(cn->bh);
				3324	cleaned = 1 ;
				3325	put_bh(cn->bh) ;
				3326	if (atomic_read(&(cn->bh->b_count)) < 0) {
				3327	reiserfs_warning (p_s_sb, "journal-2138: cn->bh->b_count < 0");
				3328	}
				3329	}
				3330	if (cn->jlist) { /* since we are clearing the bh, we MUST dec nonzerolen */
				3331	atomic_dec(&(cn->jlist->j_nonzerolen)) ;
				3332	}
				3333	cn->bh = NULL ;
				3334	}
				3335	}
				3336	cn = cn->hnext ;
				3337	}
				3338	}
				3339
				3340	if (bh) {
				3341	put_bh(bh) ; /* get_hash grabs the buffer */
				3342	if (atomic_read(&(bh->b_count)) < 0) {
				3343	reiserfs_warning (p_s_sb, "journal-2165: bh->b_count < 0");
				3344	}
				3345	}
				3346	return 0 ;
				3347	}
				3348
				3349	void reiserfs_update_inode_transaction(struct inode *inode) {
				3350	struct reiserfs_journal *journal = SB_JOURNAL (inode->i_sb);
				3351	REISERFS_I(inode)->i_jl = journal->j_current_jl;
				3352	REISERFS_I(inode)->i_trans_id = journal->j_trans_id ;
				3353	}
				3354
				3355	/*
				3356	* returns -1 on error, 0 if no commits/barriers were done and 1
				3357	* if a transaction was actually committed and the barrier was done
				3358	*/
				3359	static int __commit_trans_jl(struct inode *inode, unsigned long id,
				3360	struct reiserfs_journal_list *jl)
				3361	{
				3362	struct reiserfs_transaction_handle th ;
				3363	struct super_block *sb = inode->i_sb ;
				3364	struct reiserfs_journal *journal = SB_JOURNAL (sb);
				3365	int ret = 0;
				3366
				3367	/* is it from the current transaction, or from an unknown transaction? */
				3368	if (id == journal->j_trans_id) {
				3369	jl = journal->j_current_jl;
				3370	/* try to let other writers come in and grow this transaction */
				3371	let_transaction_grow(sb, id);
				3372	if (journal->j_trans_id != id) {
				3373	goto flush_commit_only;
				3374	}
				3375
				3376	ret = journal_begin(&th, sb, 1) ;
				3377	if (ret)
				3378	return ret;
				3379
				3380	/* someone might have ended this transaction while we joined */
				3381	if (journal->j_trans_id != id) {
				3382	reiserfs_prepare_for_journal(sb, SB_BUFFER_WITH_SB(sb), 1) ;
				3383	journal_mark_dirty(&th, sb, SB_BUFFER_WITH_SB(sb)) ;
				3384	ret = journal_end(&th, sb, 1) ;
				3385	goto flush_commit_only;
				3386	}
				3387
				3388	ret = journal_end_sync(&th, sb, 1) ;
				3389	if (!ret)
				3390	ret = 1;
				3391
				3392	} else {
				3393	/* this gets tricky, we have to make sure the journal list in
				3394	* the inode still exists. We know the list is still around
				3395	* if we've got a larger transaction id than the oldest list
				3396	*/
				3397	flush_commit_only:
				3398	if (journal_list_still_alive(inode->i_sb, id)) {
				3399	/*
				3400	* we only set ret to 1 when we know for sure
				3401	* the barrier hasn't been started yet on the commit
				3402	* block.
				3403	*/
				3404	if (atomic_read(&jl->j_commit_left) > 1)
				3405	ret = 1;
				3406	flush_commit_list(sb, jl, 1) ;
				3407	if (journal->j_errno)
				3408	ret = journal->j_errno;
				3409	}
				3410	}
				3411	/* otherwise the list is gone, and long since committed */
				3412	return ret;
				3413	}
				3414
				3415	int reiserfs_commit_for_inode(struct inode *inode) {
				3416	unsigned long id = REISERFS_I(inode)->i_trans_id;
				3417	struct reiserfs_journal_list *jl = REISERFS_I(inode)->i_jl;
				3418
				3419	/* for the whole inode, assume unset id means it was
				3420	* changed in the current transaction. More conservative
				3421	*/
				3422	if (!id \|\| !jl) {
				3423	reiserfs_update_inode_transaction(inode) ;
				3424	id = REISERFS_I(inode)->i_trans_id;
				3425	/* jl will be updated in __commit_trans_jl */
				3426	}
				3427
				3428	return __commit_trans_jl(inode, id, jl);
				3429	}
				3430
				3431	void reiserfs_restore_prepared_buffer(struct super_block *p_s_sb,
				3432	struct buffer_head *bh) {
				3433	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				3434	PROC_INFO_INC( p_s_sb, journal.restore_prepared );
				3435	if (!bh) {
				3436	return ;
				3437	}
				3438	if (test_clear_buffer_journal_restore_dirty (bh) &&
				3439	buffer_journal_dirty(bh)) {
				3440	struct reiserfs_journal_cnode *cn;
				3441	cn = get_journal_hash_dev(p_s_sb,
				3442	journal->j_list_hash_table,
				3443	bh->b_blocknr);
				3444	if (cn && can_dirty(cn)) {
				3445	set_buffer_journal_test (bh);
				3446	mark_buffer_dirty(bh);
				3447	}
				3448	}
				3449	clear_buffer_journal_prepared (bh);
				3450	}
				3451
				3452	extern struct tree_balance *cur_tb ;
				3453	/*
				3454	** before we can change a metadata block, we have to make sure it won't
				3455	** be written to disk while we are altering it. So, we must:
				3456	** clean it
				3457	** wait on it.
				3458	**
				3459	*/
				3460	int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
				3461	struct buffer_head *bh, int wait) {
				3462	PROC_INFO_INC( p_s_sb, journal.prepare );
				3463
				3464	if (test_set_buffer_locked(bh)) {
				3465	if (!wait)
				3466	return 0;
				3467	lock_buffer(bh);
				3468	}
				3469	set_buffer_journal_prepared (bh);
				3470	if (test_clear_buffer_dirty(bh) && buffer_journal_dirty(bh)) {
				3471	clear_buffer_journal_test (bh);
				3472	set_buffer_journal_restore_dirty (bh);
				3473	}
				3474	unlock_buffer(bh);
				3475	return 1;
				3476	}
				3477
				3478	static void flush_old_journal_lists(struct super_block *s) {
				3479	struct reiserfs_journal *journal = SB_JOURNAL (s);
				3480	struct reiserfs_journal_list *jl;
				3481	struct list_head *entry;
				3482	time_t now = get_seconds();
				3483
				3484	while(!list_empty(&journal->j_journal_list)) {
				3485	entry = journal->j_journal_list.next;
				3486	jl = JOURNAL_LIST_ENTRY(entry);
				3487	/* this check should always be run, to send old lists to disk */
				3488	if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4))) {
				3489	flush_used_journal_lists(s, jl);
				3490	} else {
				3491	break;
				3492	}
				3493	}
				3494	}
				3495
				3496	/*
				3497	** long and ugly. If flush, will not return until all commit
				3498	** blocks and all real buffers in the trans are on disk.
				3499	** If no_async, won't return until all commit blocks are on disk.
				3500	**
				3501	** keep reading, there are comments as you go along
				3502	**
				3503	** If the journal is aborted, we just clean up. Things like flushing
				3504	** journal lists, etc just won't happen.
				3505	*/
				3506	static int do_journal_end(struct reiserfs_transaction_handle th, struct super_block p_s_sb, unsigned long nblocks,
				3507	int flags) {
				3508	struct reiserfs_journal *journal = SB_JOURNAL (p_s_sb);
				3509	struct reiserfs_journal_cnode cn, next, *jl_cn;
				3510	struct reiserfs_journal_cnode *last_cn = NULL;
				3511	struct reiserfs_journal_desc *desc ;
				3512	struct reiserfs_journal_commit *commit ;
				3513	struct buffer_head c_bh ; / commit bh */
				3514	struct buffer_head d_bh ; / desc bh */
				3515	int cur_write_start = 0 ; /* start index of current log write */
				3516	int old_start ;
				3517	int i ;
				3518	int flush = flags & FLUSH_ALL ;
				3519	int wait_on_commit = flags & WAIT ;
				3520	struct reiserfs_journal_list jl, temp_jl;
				3521	struct list_head entry, safe;
				3522	unsigned long jindex;
				3523	unsigned long commit_trans_id;
				3524	int trans_half;
				3525
				3526	BUG_ON (th->t_refcount > 1);
				3527	BUG_ON (!th->t_trans_id);
				3528
				3529	current->journal_info = th->t_handle_save;
				3530	reiserfs_check_lock_depth(p_s_sb, "journal end");
				3531	if (journal->j_len == 0) {
				3532	reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
				3533	journal_mark_dirty(th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
				3534	}
				3535
				3536	lock_journal(p_s_sb) ;
				3537	if (journal->j_next_full_flush) {
				3538	flags \|= FLUSH_ALL ;
				3539	flush = 1 ;
				3540	}
				3541	if (journal->j_next_async_flush) {
				3542	flags \|= COMMIT_NOW \| WAIT;
				3543	wait_on_commit = 1;
				3544	}
				3545
				3546	/* check_journal_end locks the journal, and unlocks if it does not return 1
				3547	** it tells us if we should continue with the journal_end, or just return
				3548	*/
				3549	if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
				3550	p_s_sb->s_dirt = 1;
				3551	wake_queued_writers(p_s_sb);
				3552	reiserfs_async_progress_wait(p_s_sb);
				3553	goto out ;
				3554	}
				3555
				3556	/* check_journal_end might set these, check again */
				3557	if (journal->j_next_full_flush) {
				3558	flush = 1 ;
				3559	}
				3560
				3561	/*
				3562	** j must wait means we have to flush the log blocks, and the real blocks for
				3563	** this transaction
				3564	*/
				3565	if (journal->j_must_wait > 0) {
				3566	flush = 1 ;
				3567	}
				3568
				3569	#ifdef REISERFS_PREALLOCATE
				3570	/* quota ops might need to nest, setup the journal_info pointer for them */
				3571	current->journal_info = th ;
				3572	reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
				3573	* the transaction */
				3574	current->journal_info = th->t_handle_save ;
				3575	#endif
				3576
				3577	/* setup description block */
				3578	d_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) + journal->j_start) ;
				3579	set_buffer_uptodate(d_bh);
				3580	desc = (struct reiserfs_journal_desc *)(d_bh)->b_data ;
				3581	memset(d_bh->b_data, 0, d_bh->b_size) ;
				3582	memcpy(get_journal_desc_magic (d_bh), JOURNAL_DESC_MAGIC, 8) ;
				3583	set_desc_trans_id(desc, journal->j_trans_id) ;
				3584
				3585	/* setup commit block. Don't write (keep it clean too) this one until after everyone else is written */
				3586	c_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
				3587	((journal->j_start + journal->j_len + 1) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ;
				3588	commit = (struct reiserfs_journal_commit *)c_bh->b_data ;
				3589	memset(c_bh->b_data, 0, c_bh->b_size) ;
				3590	set_commit_trans_id(commit, journal->j_trans_id) ;
				3591	set_buffer_uptodate(c_bh) ;
				3592
				3593	/* init this journal list */
				3594	jl = journal->j_current_jl;
				3595
				3596	/* we lock the commit before doing anything because
				3597	* we want to make sure nobody tries to run flush_commit_list until
				3598	* the new transaction is fully setup, and we've already flushed the
				3599	* ordered bh list
				3600	*/
				3601	down(&jl->j_commit_lock);
				3602
				3603	/* save the transaction id in case we need to commit it later */
				3604	commit_trans_id = jl->j_trans_id;
				3605
				3606	atomic_set(&jl->j_older_commits_done, 0) ;
				3607	jl->j_trans_id = journal->j_trans_id ;
				3608	jl->j_timestamp = journal->j_trans_start_time ;
				3609	jl->j_commit_bh = c_bh ;
				3610	jl->j_start = journal->j_start ;
				3611	jl->j_len = journal->j_len ;
				3612	atomic_set(&jl->j_nonzerolen, journal->j_len) ;
				3613	atomic_set(&jl->j_commit_left, journal->j_len + 2);
				3614	jl->j_realblock = NULL ;
				3615
				3616	/* The ENTIRE FOR LOOP MUST not cause schedule to occur.
				3617	** for each real block, add it to the journal list hash,
				3618	** copy into real block index array in the commit or desc block
				3619	*/
				3620	trans_half = journal_trans_half(p_s_sb->s_blocksize);
				3621	for (i = 0, cn = journal->j_first ; cn ; cn = cn->next, i++) {
				3622	if (buffer_journaled (cn->bh)) {
				3623	jl_cn = get_cnode(p_s_sb) ;
				3624	if (!jl_cn) {
				3625	reiserfs_panic(p_s_sb, "journal-1676, get_cnode returned NULL\n") ;
				3626	}
				3627	if (i == 0) {
				3628	jl->j_realblock = jl_cn ;
				3629	}
				3630	jl_cn->prev = last_cn ;
				3631	jl_cn->next = NULL ;
				3632	if (last_cn) {
				3633	last_cn->next = jl_cn ;
				3634	}
				3635	last_cn = jl_cn ;
				3636	/* make sure the block we are trying to log is not a block
				3637	of journal or reserved area */
				3638
				3639	if (is_block_in_log_or_reserved_area(p_s_sb, cn->bh->b_blocknr)) {
				3640	reiserfs_panic(p_s_sb, "journal-2332: Trying to log block %lu, which is a log block\n", cn->bh->b_blocknr) ;
				3641	}
				3642	jl_cn->blocknr = cn->bh->b_blocknr ;
				3643	jl_cn->state = 0 ;
				3644	jl_cn->sb = p_s_sb;
				3645	jl_cn->bh = cn->bh ;
				3646	jl_cn->jlist = jl;
				3647	insert_journal_hash(journal->j_list_hash_table, jl_cn) ;
				3648	if (i < trans_half) {
				3649	desc->j_realblock[i] = cpu_to_le32(cn->bh->b_blocknr) ;
				3650	} else {
				3651	commit->j_realblock[i - trans_half] = cpu_to_le32(cn->bh->b_blocknr) ;
				3652	}
				3653	} else {
				3654	i-- ;
				3655	}
				3656	}
				3657	set_desc_trans_len(desc, journal->j_len) ;
				3658	set_desc_mount_id(desc, journal->j_mount_id) ;
				3659	set_desc_trans_id(desc, journal->j_trans_id) ;
				3660	set_commit_trans_len(commit, journal->j_len);
				3661
				3662	/* special check in case all buffers in the journal were marked for not logging */
				3663	if (journal->j_len == 0) {
				3664	BUG();
				3665	}
				3666
				3667	/* we're about to dirty all the log blocks, mark the description block
				3668	* dirty now too. Don't mark the commit block dirty until all the
				3669	* others are on disk
				3670	*/
				3671	mark_buffer_dirty(d_bh);
				3672
				3673	/* first data block is j_start + 1, so add one to cur_write_start wherever you use it */
				3674	cur_write_start = journal->j_start ;
				3675	cn = journal->j_first ;
				3676	jindex = 1 ; /* start at one so we don't get the desc again */
				3677	while(cn) {
				3678	clear_buffer_journal_new (cn->bh);
				3679	/* copy all the real blocks into log area. dirty log blocks */
				3680	if (buffer_journaled (cn->bh)) {
				3681	struct buffer_head *tmp_bh ;
				3682	char *addr;
				3683	struct page *page;
				3684	tmp_bh = journal_getblk(p_s_sb, SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb) +
				3685	((cur_write_start + jindex) % SB_ONDISK_JOURNAL_SIZE(p_s_sb))) ;
				3686	set_buffer_uptodate(tmp_bh);
				3687	page = cn->bh->b_page;
				3688	addr = kmap(page);
				3689	memcpy(tmp_bh->b_data, addr + offset_in_page(cn->bh->b_data),
				3690	cn->bh->b_size);
				3691	kunmap(page);
				3692	mark_buffer_dirty(tmp_bh);
				3693	jindex++ ;
				3694	set_buffer_journal_dirty (cn->bh);
				3695	clear_buffer_journaled (cn->bh);
				3696	} else {
				3697	/* JDirty cleared sometime during transaction. don't log this one */
				3698	reiserfs_warning(p_s_sb, "journal-2048: do_journal_end: BAD, buffer in journal hash, but not JDirty!") ;
				3699	brelse(cn->bh) ;
				3700	}
				3701	next = cn->next ;
				3702	free_cnode(p_s_sb, cn) ;
				3703	cn = next ;
				3704	cond_resched();
				3705	}
				3706
				3707	/* we are done with both the c_bh and d_bh, but
				3708	** c_bh must be written after all other commit blocks,
				3709	** so we dirty/relse c_bh in flush_commit_list, with commit_left <= 1.
				3710	*/
				3711
				3712	journal->j_current_jl = alloc_journal_list(p_s_sb);
				3713
				3714	/* now it is safe to insert this transaction on the main list */
				3715	list_add_tail(&jl->j_list, &journal->j_journal_list);
				3716	list_add_tail(&jl->j_working_list, &journal->j_working_list);
				3717	journal->j_num_work_lists++;
				3718
				3719	/* reset journal values for the next transaction */
				3720	old_start = journal->j_start ;
				3721	journal->j_start = (journal->j_start + journal->j_len + 2) % SB_ONDISK_JOURNAL_SIZE(p_s_sb);
				3722	atomic_set(&(journal->j_wcount), 0) ;
				3723	journal->j_bcount = 0 ;
				3724	journal->j_last = NULL ;
				3725	journal->j_first = NULL ;
				3726	journal->j_len = 0 ;
				3727	journal->j_trans_start_time = 0 ;
				3728	journal->j_trans_id++ ;
				3729	journal->j_current_jl->j_trans_id = journal->j_trans_id;
				3730	journal->j_must_wait = 0 ;
				3731	journal->j_len_alloc = 0 ;
				3732	journal->j_next_full_flush = 0 ;
				3733	journal->j_next_async_flush = 0 ;
				3734	init_journal_hash(p_s_sb) ;
				3735
				3736	// make sure reiserfs_add_jh sees the new current_jl before we
				3737	// write out the tails
				3738	smp_mb();
				3739
				3740	/* tail conversion targets have to hit the disk before we end the
				3741	* transaction. Otherwise a later transaction might repack the tail
				3742	* before this transaction commits, leaving the data block unflushed and
				3743	* clean, if we crash before the later transaction commits, the data block
				3744	* is lost.
				3745	*/
				3746	if (!list_empty(&jl->j_tail_bh_list)) {
				3747	unlock_kernel();
				3748	write_ordered_buffers(&journal->j_dirty_buffers_lock,
				3749	journal, jl, &jl->j_tail_bh_list);
				3750	lock_kernel();
				3751	}
				3752	if (!list_empty(&jl->j_tail_bh_list))
				3753	BUG();
				3754	up(&jl->j_commit_lock);
				3755
				3756	/* honor the flush wishes from the caller, simple commits can
				3757	** be done outside the journal lock, they are done below
				3758	**
				3759	** if we don't flush the commit list right now, we put it into
				3760	** the work queue so the people waiting on the async progress work
				3761	** queue don't wait for this proc to flush journal lists and such.
				3762	*/
				3763	if (flush) {
				3764	flush_commit_list(p_s_sb, jl, 1) ;
				3765	flush_journal_list(p_s_sb, jl, 1) ;
				3766	} else if (!(jl->j_state & LIST_COMMIT_PENDING))
				3767	queue_delayed_work(commit_wq, &journal->j_work, HZ/10);
				3768
				3769
				3770	/* if the next transaction has any chance of wrapping, flush
				3771	** transactions that might get overwritten. If any journal lists are very
				3772	** old flush them as well.
				3773	*/
				3774	first_jl:
				3775	list_for_each_safe(entry, safe, &journal->j_journal_list) {
				3776	temp_jl = JOURNAL_LIST_ENTRY(entry);
				3777	if (journal->j_start <= temp_jl->j_start) {
				3778	if ((journal->j_start + journal->j_trans_max + 1) >=
				3779	temp_jl->j_start)
				3780	{
				3781	flush_used_journal_lists(p_s_sb, temp_jl);
				3782	goto first_jl;
				3783	} else if ((journal->j_start +
				3784	journal->j_trans_max + 1) <
				3785	SB_ONDISK_JOURNAL_SIZE(p_s_sb))
				3786	{
				3787	/* if we don't cross into the next transaction and we don't
				3788	* wrap, there is no way we can overlap any later transactions
				3789	* break now
				3790	*/
				3791	break;
				3792	}
				3793	} else if ((journal->j_start +
				3794	journal->j_trans_max + 1) >
				3795	SB_ONDISK_JOURNAL_SIZE(p_s_sb))
				3796	{
				3797	if (((journal->j_start + journal->j_trans_max + 1) %
				3798	SB_ONDISK_JOURNAL_SIZE(p_s_sb)) >= temp_jl->j_start)
				3799	{
				3800	flush_used_journal_lists(p_s_sb, temp_jl);
				3801	goto first_jl;
				3802	} else {
				3803	/* we don't overlap anything from out start to the end of the
				3804	* log, and our wrapped portion doesn't overlap anything at
				3805	* the start of the log. We can break
				3806	*/
				3807	break;
				3808	}
				3809	}
				3810	}
				3811	flush_old_journal_lists(p_s_sb);
				3812
				3813	journal->j_current_jl->j_list_bitmap = get_list_bitmap(p_s_sb, journal->j_current_jl) ;
				3814
				3815	if (!(journal->j_current_jl->j_list_bitmap)) {
				3816	reiserfs_panic(p_s_sb, "journal-1996: do_journal_end, could not get a list bitmap\n") ;
				3817	}
				3818
				3819	atomic_set(&(journal->j_jlock), 0) ;
				3820	unlock_journal(p_s_sb) ;
				3821	/* wake up any body waiting to join. */
				3822	clear_bit(J_WRITERS_QUEUED, &journal->j_state);
				3823	wake_up(&(journal->j_join_wait)) ;
				3824
				3825	if (!flush && wait_on_commit &&
				3826	journal_list_still_alive(p_s_sb, commit_trans_id)) {
				3827	flush_commit_list(p_s_sb, jl, 1) ;
				3828	}
				3829	out:
				3830	reiserfs_check_lock_depth(p_s_sb, "journal end2");
				3831
				3832	memset (th, 0, sizeof (*th));
				3833	/* Re-set th->t_super, so we can properly keep track of how many
				3834	* persistent transactions there are. We need to do this so if this
				3835	* call is part of a failed restart_transaction, we can free it later */
				3836	th->t_super = p_s_sb;
				3837
				3838	return journal->j_errno;
				3839	}
				3840
				3841	static void
				3842	__reiserfs_journal_abort_hard (struct super_block *sb)
				3843	{
				3844	struct reiserfs_journal *journal = SB_JOURNAL (sb);
				3845	if (test_bit (J_ABORTED, &journal->j_state))
				3846	return;
				3847
				3848	printk (KERN_CRIT "REISERFS: Aborting journal for filesystem on %s\n",
				3849	reiserfs_bdevname (sb));
				3850
				3851	sb->s_flags \|= MS_RDONLY;
				3852	set_bit (J_ABORTED, &journal->j_state);
				3853
				3854	#ifdef CONFIG_REISERFS_CHECK
				3855	dump_stack();
				3856	#endif
				3857	}
				3858
				3859	static void
				3860	__reiserfs_journal_abort_soft (struct super_block *sb, int errno)
				3861	{
				3862	struct reiserfs_journal *journal = SB_JOURNAL (sb);
				3863	if (test_bit (J_ABORTED, &journal->j_state))
				3864	return;
				3865
				3866	if (!journal->j_errno)
				3867	journal->j_errno = errno;
				3868
				3869	__reiserfs_journal_abort_hard (sb);
				3870	}
				3871
				3872	void
				3873	reiserfs_journal_abort (struct super_block *sb, int errno)
				3874	{
				3875	return __reiserfs_journal_abort_soft (sb, errno);
				3876	}