reiserfs: locking, handle nested locks properly The reiserfs write lock replaced the BKL and uses similar semantics. Frederic's locking code makes a distinction between when the lock is nested and when it's being acquired/released, but I don't think that's the right distinction to make. The right distinction is between the lock being released at end-of-use and the lock being released for a schedule. The unlock should return the depth and the lock should restore it, rather than the other way around as it is now. This patch implements that and adds a number of places where the lock should be dropped. Signed-off-by: Jeff Mahoney <jeffm@suse.com>

commit: 278f6679f454bf185a07d9a4ca355b153482d17a [log] [tgz]
author: Jeff Mahoney <jeffm@suse.com> Thu Aug 08 17:34:46 2013 -0400
committer: Jeff Mahoney <jeffm@suse.de> Thu Aug 08 17:34:46 2013 -0400
tree: ffead073e67cfdc1ddfc3949ebc93c06dcaaab8f
parent: 4c05141df57f4ffc1a9a28f1925434924179bfe4 [diff]
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index a98b774..881f8ea 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c

@@ -1340,10 +1340,11 @@
 		                 "reading failed", __func__, block);
 	else {
 		if (buffer_locked(bh)) {
+			int depth;
 			PROC_INFO_INC(sb, scan_bitmap.wait);
-			reiserfs_write_unlock(sb);
+			depth = reiserfs_write_unlock_nested(sb);
 			__wait_on_buffer(bh);
-			reiserfs_write_lock(sb);
+			reiserfs_write_lock_nested(sb, depth);
 		}
 		BUG_ON(!buffer_uptodate(bh));
 		BUG_ON(atomic_read(&bh->b_count) == 0);

diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 03e4ca5..1fd2051 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c

@@ -71,6 +71,7 @@
 	char small_buf[32];	/* avoid kmalloc if we can */
 	struct reiserfs_dir_entry de;
 	int ret = 0;
+	int depth;
 
 	reiserfs_write_lock(inode->i_sb);
 
@@ -181,17 +182,17 @@
 				 * Since filldir might sleep, we can release
 				 * the write lock here for other waiters
 				 */
-				reiserfs_write_unlock(inode->i_sb);
+				depth = reiserfs_write_unlock_nested(inode->i_sb);
 				if (!dir_emit
 				    (ctx, local_buf, d_reclen, d_ino,
 				     DT_UNKNOWN)) {
-					reiserfs_write_lock(inode->i_sb);
+					reiserfs_write_lock_nested(inode->i_sb, depth);
 					if (local_buf != small_buf) {
 						kfree(local_buf);
 					}
 					goto end;
 				}
-				reiserfs_write_lock(inode->i_sb);
+				reiserfs_write_lock_nested(inode->i_sb, depth);
 				if (local_buf != small_buf) {
 					kfree(local_buf);
 				}

diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 430e065..dc4d415 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c

@@ -1022,9 +1022,9 @@
 	if (buffer_locked(*pcom_father)) {
 
 		/* Release the write lock while the buffer is busy */
-		reiserfs_write_unlock(tb->tb_sb);
+		int depth = reiserfs_write_unlock_nested(tb->tb_sb);
 		__wait_on_buffer(*pcom_father);
-		reiserfs_write_lock(tb->tb_sb);
+		reiserfs_write_lock_nested(tb->tb_sb, depth);
 		if (FILESYSTEM_CHANGED_TB(tb)) {
 			brelse(*pcom_father);
 			return REPEAT_SEARCH;
@@ -1929,9 +1929,9 @@
 		return REPEAT_SEARCH;
 
 	if (buffer_locked(bh)) {
-		reiserfs_write_unlock(tb->tb_sb);
+		int depth = reiserfs_write_unlock_nested(tb->tb_sb);
 		__wait_on_buffer(bh);
-		reiserfs_write_lock(tb->tb_sb);
+		reiserfs_write_lock_nested(tb->tb_sb, depth);
 		if (FILESYSTEM_CHANGED_TB(tb))
 			return REPEAT_SEARCH;
 	}
@@ -1952,6 +1952,7 @@
 	unsigned long son_number;
 	struct super_block *sb = tb->tb_sb;
 	struct buffer_head *bh;
+	int depth;
 
 	PROC_INFO_INC(sb, get_neighbors[h]);
 
@@ -1969,9 +1970,9 @@
 		     tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
 								       FL[h]);
 		son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
-		reiserfs_write_unlock(sb);
+		depth = reiserfs_write_unlock_nested(tb->tb_sb);
 		bh = sb_bread(sb, son_number);
-		reiserfs_write_lock(sb);
+		reiserfs_write_lock_nested(tb->tb_sb, depth);
 		if (!bh)
 			return IO_ERROR;
 		if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2009,9 +2010,9 @@
 		child_position =
 		    (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
 		son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
-		reiserfs_write_unlock(sb);
+		depth = reiserfs_write_unlock_nested(tb->tb_sb);
 		bh = sb_bread(sb, son_number);
-		reiserfs_write_lock(sb);
+		reiserfs_write_lock_nested(tb->tb_sb, depth);
 		if (!bh)
 			return IO_ERROR;
 		if (FILESYSTEM_CHANGED_TB(tb)) {
@@ -2272,6 +2273,7 @@
 		}
 
 		if (locked) {
+			int depth;
 #ifdef CONFIG_REISERFS_CHECK
 			repeat_counter++;
 			if ((repeat_counter % 10000) == 0) {
@@ -2286,9 +2288,9 @@
 				    REPEAT_SEARCH : CARRY_ON;
 			}
 #endif
-			reiserfs_write_unlock(tb->tb_sb);
+			depth = reiserfs_write_unlock_nested(tb->tb_sb);
 			__wait_on_buffer(locked);
-			reiserfs_write_lock(tb->tb_sb);
+			reiserfs_write_lock_nested(tb->tb_sb, depth);
 			if (FILESYSTEM_CHANGED_TB(tb))
 				return REPEAT_SEARCH;
 		}
@@ -2359,9 +2361,9 @@
 
 	/* if it possible in indirect_to_direct conversion */
 	if (buffer_locked(tbS0)) {
-		reiserfs_write_unlock(tb->tb_sb);
+		int depth = reiserfs_write_unlock_nested(tb->tb_sb);
 		__wait_on_buffer(tbS0);
-		reiserfs_write_lock(tb->tb_sb);
+		reiserfs_write_lock_nested(tb->tb_sb, depth);
 		if (FILESYSTEM_CHANGED_TB(tb))
 			return REPEAT_SEARCH;
 	}

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index bf1331a..4a3a57c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c

@@ -41,11 +41,10 @@
 
 	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
 	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {	/* also handles bad_inode case */
-		int depth;
 
 		reiserfs_delete_xattrs(inode);
 
-		depth = reiserfs_write_lock_once(inode->i_sb);
+		reiserfs_write_lock(inode->i_sb);
 
 		if (journal_begin(&th, inode->i_sb, jbegin_count))
 			goto out;
@@ -74,7 +73,7 @@
 		remove_save_link(inode, 0 /* not truncate */ );	/* we can't do anything
 								 * about an error here */
 out:
-		reiserfs_write_unlock_once(inode->i_sb, depth);
+		reiserfs_write_unlock(inode->i_sb);
 	} else {
 		/* no object items are in the tree */
 		;
@@ -611,7 +610,6 @@
 	__le32 *item;
 	int done;
 	int fs_gen;
-	int lock_depth;
 	struct reiserfs_transaction_handle *th = NULL;
 	/* space reserved in transaction batch:
 	   . 3 balancings in direct->indirect conversion
@@ -627,11 +625,11 @@
 	loff_t new_offset =
 	    (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
 
-	lock_depth = reiserfs_write_lock_once(inode->i_sb);
+	reiserfs_write_lock(inode->i_sb);
 	version = get_inode_item_key_version(inode);
 
 	if (!file_capable(inode, block)) {
-		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+		reiserfs_write_unlock(inode->i_sb);
 		return -EFBIG;
 	}
 
@@ -643,7 +641,7 @@
 		/* find number of block-th logical block of the file */
 		ret = _get_block_create_0(inode, block, bh_result,
 					  create | GET_BLOCK_READ_DIRECT);
-		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+		reiserfs_write_unlock(inode->i_sb);
 		return ret;
 	}
 	/*
@@ -761,7 +759,7 @@
 		if (!dangle && th)
 			retval = reiserfs_end_persistent_transaction(th);
 
-		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+		reiserfs_write_unlock(inode->i_sb);
 
 		/* the item was found, so new blocks were not added to the file
 		 ** there is no need to make sure the inode is updated with this
@@ -1012,11 +1010,7 @@
 		 * long time.  reschedule if needed and also release the write
 		 * lock for others.
 		 */
-		if (need_resched()) {
-			reiserfs_write_unlock_once(inode->i_sb, lock_depth);
-			schedule();
-			lock_depth = reiserfs_write_lock_once(inode->i_sb);
-		}
+		reiserfs_cond_resched(inode->i_sb);
 
 		retval = search_for_position_by_key(inode->i_sb, &key, &path);
 		if (retval == IO_ERROR) {
@@ -1051,7 +1045,7 @@
 			retval = err;
 	}
 
-	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+	reiserfs_write_unlock(inode->i_sb);
 	reiserfs_check_path(&path);
 	return retval;
 }
@@ -1510,14 +1504,15 @@
 {
 	struct inode *inode;
 	struct reiserfs_iget_args args;
+	int depth;
 
 	args.objectid = key->on_disk_key.k_objectid;
 	args.dirid = key->on_disk_key.k_dir_id;
-	reiserfs_write_unlock(s);
+	depth = reiserfs_write_unlock_nested(s);
 	inode = iget5_locked(s, key->on_disk_key.k_objectid,
 			     reiserfs_find_actor, reiserfs_init_locked_inode,
 			     (void *)(&args));
-	reiserfs_write_lock(s);
+	reiserfs_write_lock_nested(s, depth);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
@@ -1781,6 +1776,7 @@
 	struct stat_data sd;
 	int retval;
 	int err;
+	int depth;
 
 	BUG_ON(!th->t_trans_id);
 
@@ -1813,10 +1809,10 @@
 	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
 	args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
 
-	reiserfs_write_unlock(inode->i_sb);
+	depth = reiserfs_write_unlock_nested(inode->i_sb);
 	err = insert_inode_locked4(inode, args.objectid,
 			     reiserfs_find_actor, &args);
-	reiserfs_write_lock(inode->i_sb);
+	reiserfs_write_lock_nested(inode->i_sb, depth);
 	if (err) {
 		err = -EINVAL;
 		goto out_bad_inode;
@@ -2108,9 +2104,8 @@
 	int error;
 	struct buffer_head *bh = NULL;
 	int err2;
-	int lock_depth;
 
-	lock_depth = reiserfs_write_lock_once(inode->i_sb);
+	reiserfs_write_lock(inode->i_sb);
 
 	if (inode->i_size > 0) {
 		error = grab_tail_page(inode, &page, &bh);
@@ -2179,7 +2174,7 @@
 		page_cache_release(page);
 	}
 
-	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+	reiserfs_write_unlock(inode->i_sb);
 
 	return 0;
       out:
@@ -2188,7 +2183,7 @@
 		page_cache_release(page);
 	}
 
-	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+	reiserfs_write_unlock(inode->i_sb);
 
 	return error;
 }
@@ -2653,10 +2648,11 @@
 	struct inode *inode = page->mapping->host;
 	int ret;
 	int old_ref = 0;
+	int depth;
 
-	reiserfs_write_unlock(inode->i_sb);
+	depth = reiserfs_write_unlock_nested(inode->i_sb);
 	reiserfs_wait_on_write_block(inode->i_sb);
-	reiserfs_write_lock(inode->i_sb);
+	reiserfs_write_lock_nested(inode->i_sb, depth);
 
 	fix_tail_page_for_writing(page);
 	if (reiserfs_transaction_running(inode->i_sb)) {
@@ -2713,7 +2709,6 @@
 	int update_sd = 0;
 	struct reiserfs_transaction_handle *th;
 	unsigned start;
-	int lock_depth = 0;
 	bool locked = false;
 
 	if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
@@ -2742,7 +2737,7 @@
 	 */
 	if (pos + copied > inode->i_size) {
 		struct reiserfs_transaction_handle myth;
-		lock_depth = reiserfs_write_lock_once(inode->i_sb);
+		reiserfs_write_lock(inode->i_sb);
 		locked = true;
 		/* If the file have grown beyond the border where it
 		   can have a tail, unmark it as needing a tail
@@ -2773,7 +2768,7 @@
 	}
 	if (th) {
 		if (!locked) {
-			lock_depth = reiserfs_write_lock_once(inode->i_sb);
+			reiserfs_write_lock(inode->i_sb);
 			locked = true;
 		}
 		if (!update_sd)
@@ -2785,7 +2780,7 @@
 
       out:
 	if (locked)
-		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+		reiserfs_write_unlock(inode->i_sb);
 	unlock_page(page);
 	page_cache_release(page);
 
@@ -2795,7 +2790,7 @@
 	return ret == 0 ? copied : ret;
 
       journal_error:
-	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+	reiserfs_write_unlock(inode->i_sb);
 	locked = false;
 	if (th) {
 		if (!update_sd)
@@ -2813,10 +2808,11 @@
 	int ret = 0;
 	int update_sd = 0;
 	struct reiserfs_transaction_handle *th = NULL;
+	int depth;
 
-	reiserfs_write_unlock(inode->i_sb);
+	depth = reiserfs_write_unlock_nested(inode->i_sb);
 	reiserfs_wait_on_write_block(inode->i_sb);
-	reiserfs_write_lock(inode->i_sb);
+	reiserfs_write_lock_nested(inode->i_sb, depth);
 
 	if (reiserfs_transaction_running(inode->i_sb)) {
 		th = current->journal_info;
@@ -3115,7 +3111,6 @@
 {
 	struct inode *inode = dentry->d_inode;
 	unsigned int ia_valid;
-	int depth;
 	int error;
 
 	error = inode_change_ok(inode, attr);
@@ -3127,14 +3122,14 @@
 
 	if (is_quota_modification(inode, attr))
 		dquot_initialize(inode);
-	depth = reiserfs_write_lock_once(inode->i_sb);
+	reiserfs_write_lock(inode->i_sb);
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* version 2 items will be caught by the s_maxbytes check
 		 ** done for us in vmtruncate
 		 */
 		if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
 		    attr->ia_size > MAX_NON_LFS) {
-			reiserfs_write_unlock_once(inode->i_sb, depth);
+			reiserfs_write_unlock(inode->i_sb);
 			error = -EFBIG;
 			goto out;
 		}
@@ -3157,7 +3152,7 @@
 					error = err;
 			}
 			if (error) {
-				reiserfs_write_unlock_once(inode->i_sb, depth);
+				reiserfs_write_unlock(inode->i_sb);
 				goto out;
 			}
 			/*
@@ -3167,7 +3162,7 @@
 			attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
 		}
 	}
-	reiserfs_write_unlock_once(inode->i_sb, depth);
+	reiserfs_write_unlock(inode->i_sb);
 
 	if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) ||
 	     ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) &&
@@ -3192,16 +3187,16 @@
 			return error;
 
 		/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-		depth = reiserfs_write_lock_once(inode->i_sb);
+		reiserfs_write_lock(inode->i_sb);
 		error = journal_begin(&th, inode->i_sb, jbegin_count);
-		reiserfs_write_unlock_once(inode->i_sb, depth);
+		reiserfs_write_unlock(inode->i_sb);
 		if (error)
 			goto out;
 		error = dquot_transfer(inode, attr);
-		depth = reiserfs_write_lock_once(inode->i_sb);
+		reiserfs_write_lock(inode->i_sb);
 		if (error) {
 			journal_end(&th, inode->i_sb, jbegin_count);
-			reiserfs_write_unlock_once(inode->i_sb, depth);
+			reiserfs_write_unlock(inode->i_sb);
 			goto out;
 		}
 
@@ -3213,7 +3208,7 @@
 			inode->i_gid = attr->ia_gid;
 		mark_inode_dirty(inode);
 		error = journal_end(&th, inode->i_sb, jbegin_count);
-		reiserfs_write_unlock_once(inode->i_sb, depth);
+		reiserfs_write_unlock(inode->i_sb);
 		if (error)
 			goto out;
 	}

diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 15cb5fe..946ccbf 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c

@@ -167,7 +167,6 @@
 int reiserfs_unpack(struct inode *inode, struct file *filp)
 {
 	int retval = 0;
-	int depth;
 	int index;
 	struct page *page;
 	struct address_space *mapping;
@@ -183,11 +182,11 @@
 		return 0;
 	}
 
-	depth = reiserfs_write_lock_once(inode->i_sb);
-
 	/* we need to make sure nobody is changing the file size beneath us */
 	reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
 
+	reiserfs_write_lock(inode->i_sb);
+
 	write_from = inode->i_size & (blocksize - 1);
 	/* if we are on a block boundary, we are already unpacked.  */
 	if (write_from == 0) {
@@ -221,6 +220,6 @@
 
       out:
 	mutex_unlock(&inode->i_mutex);
-	reiserfs_write_unlock_once(inode->i_sb, depth);
+	reiserfs_write_unlock(inode->i_sb);
 	return retval;
 }

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 742fdd4..73feacc4 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c

@@ -947,9 +947,11 @@
 	struct reiserfs_journal *j = SB_JOURNAL(s);
 
 	if (atomic_read(&j->j_async_throttle)) {
-		reiserfs_write_unlock(s);
+		int depth;
+
+		depth = reiserfs_write_unlock_nested(s);
 		congestion_wait(BLK_RW_ASYNC, HZ / 10);
-		reiserfs_write_lock(s);
+		reiserfs_write_lock_nested(s, depth);
 	}
 
 	return 0;
@@ -972,6 +974,7 @@
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	int retval = 0;
 	int write_len;
+	int depth;
 
 	reiserfs_check_lock_depth(s, "flush_commit_list");
 
@@ -1018,12 +1021,12 @@
 		 * We might sleep in numerous places inside
 		 * write_ordered_buffers. Relax the write lock.
 		 */
-		reiserfs_write_unlock(s);
+		depth = reiserfs_write_unlock_nested(s);
 		ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
 					    journal, jl, &jl->j_bh_list);
 		if (ret < 0 && retval == 0)
 			retval = ret;
-		reiserfs_write_lock(s);
+		reiserfs_write_lock_nested(s, depth);
 	}
 	BUG_ON(!list_empty(&jl->j_bh_list));
 	/*
@@ -1043,9 +1046,9 @@
 		tbh = journal_find_get_block(s, bn);
 		if (tbh) {
 			if (buffer_dirty(tbh)) {
-		            reiserfs_write_unlock(s);
+		            depth = reiserfs_write_unlock_nested(s);
 			    ll_rw_block(WRITE, 1, &tbh);
-			    reiserfs_write_lock(s);
+			    reiserfs_write_lock_nested(s, depth);
 			}
 			put_bh(tbh) ;
 		}
@@ -1057,17 +1060,17 @@
 		    (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s);
 		tbh = journal_find_get_block(s, bn);
 
-		reiserfs_write_unlock(s);
-		wait_on_buffer(tbh);
-		reiserfs_write_lock(s);
+		depth = reiserfs_write_unlock_nested(s);
+		__wait_on_buffer(tbh);
+		reiserfs_write_lock_nested(s, depth);
 		// since we're using ll_rw_blk above, it might have skipped over
 		// a locked buffer.  Double check here
 		//
 		/* redundant, sync_dirty_buffer() checks */
 		if (buffer_dirty(tbh)) {
-			reiserfs_write_unlock(s);
+			depth = reiserfs_write_unlock_nested(s);
 			sync_dirty_buffer(tbh);
-			reiserfs_write_lock(s);
+			reiserfs_write_lock_nested(s, depth);
 		}
 		if (unlikely(!buffer_uptodate(tbh))) {
 #ifdef CONFIG_REISERFS_CHECK
@@ -1091,12 +1094,12 @@
 		if (buffer_dirty(jl->j_commit_bh))
 			BUG();
 		mark_buffer_dirty(jl->j_commit_bh) ;
-		reiserfs_write_unlock(s);
+		depth = reiserfs_write_unlock_nested(s);
 		if (reiserfs_barrier_flush(s))
 			__sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA);
 		else
 			sync_dirty_buffer(jl->j_commit_bh);
-		reiserfs_write_lock(s);
+		reiserfs_write_lock_nested(s, depth);
 	}
 
 	/* If there was a write error in the journal - we can't commit this
@@ -1228,15 +1231,16 @@
 {
 	struct reiserfs_journal_header *jh;
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
+	int depth;
 
 	if (reiserfs_is_journal_aborted(journal))
 		return -EIO;
 
 	if (trans_id >= journal->j_last_flush_trans_id) {
 		if (buffer_locked((journal->j_header_bh))) {
-			reiserfs_write_unlock(sb);
-			wait_on_buffer((journal->j_header_bh));
-			reiserfs_write_lock(sb);
+			depth = reiserfs_write_unlock_nested(sb);
+			__wait_on_buffer(journal->j_header_bh);
+			reiserfs_write_lock_nested(sb, depth);
 			if (unlikely(!buffer_uptodate(journal->j_header_bh))) {
 #ifdef CONFIG_REISERFS_CHECK
 				reiserfs_warning(sb, "journal-699",
@@ -1254,14 +1258,14 @@
 		jh->j_mount_id = cpu_to_le32(journal->j_mount_id);
 
 		set_buffer_dirty(journal->j_header_bh);
-		reiserfs_write_unlock(sb);
+		depth = reiserfs_write_unlock_nested(sb);
 
 		if (reiserfs_barrier_flush(sb))
 			__sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA);
 		else
 			sync_dirty_buffer(journal->j_header_bh);
 
-		reiserfs_write_lock(sb);
+		reiserfs_write_lock_nested(sb, depth);
 		if (!buffer_uptodate(journal->j_header_bh)) {
 			reiserfs_warning(sb, "journal-837",
 					 "IO error during journal replay");
@@ -1341,6 +1345,7 @@
 	unsigned long j_len_saved = jl->j_len;
 	struct reiserfs_journal *journal = SB_JOURNAL(s);
 	int err = 0;
+	int depth;
 
 	BUG_ON(j_len_saved <= 0);
 
@@ -1495,9 +1500,9 @@
 						       "cn->bh is NULL");
 				}
 
-				reiserfs_write_unlock(s);
-				wait_on_buffer(cn->bh);
-				reiserfs_write_lock(s);
+				depth = reiserfs_write_unlock_nested(s);
+				__wait_on_buffer(cn->bh);
+				reiserfs_write_lock_nested(s, depth);
 
 				if (!cn->bh) {
 					reiserfs_panic(s, "journal-1012",
@@ -1974,6 +1979,7 @@
 /* returns 0 if it did not find a description block
 ** returns -1 if it found a corrupt commit block
 ** returns 1 if both desc and commit were valid
+** NOTE: only called during fs mount
 */
 static int journal_transaction_is_valid(struct super_block *sb,
 					struct buffer_head *d_bh,
@@ -2073,8 +2079,9 @@
 
 /*
 ** given the start, and values for the oldest acceptable transactions,
-** this either reads in a replays a transaction, or returns because the transaction
-** is invalid, or too old.
+** this either reads in a replays a transaction, or returns because the
+** transaction is invalid, or too old.
+** NOTE: only called during fs mount
 */
 static int journal_read_transaction(struct super_block *sb,
 				    unsigned long cur_dblock,
@@ -2208,10 +2215,7 @@
 	ll_rw_block(READ, get_desc_trans_len(desc), log_blocks);
 	for (i = 0; i < get_desc_trans_len(desc); i++) {
 
-		reiserfs_write_unlock(sb);
 		wait_on_buffer(log_blocks[i]);
-		reiserfs_write_lock(sb);
-
 		if (!buffer_uptodate(log_blocks[i])) {
 			reiserfs_warning(sb, "journal-1212",
 					 "REPLAY FAILURE fsck required! "
@@ -2318,12 +2322,13 @@
 
 /*
 ** read and replay the log
-** on a clean unmount, the journal header's next unflushed pointer will be to an invalid
-** transaction.  This tests that before finding all the transactions in the log, which makes normal mount times fast.
-**
-** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid.
-**
+** on a clean unmount, the journal header's next unflushed pointer will
+** be to an invalid transaction.  This tests that before finding all the
+** transactions in the log, which makes normal mount times fast.
+** After a crash, this starts with the next unflushed transaction, and
+** replays until it finds one too old, or invalid.
 ** On exit, it sets things up so the first transaction will work correctly.
+** NOTE: only called during fs mount
 */
 static int journal_read(struct super_block *sb)
 {
@@ -2501,14 +2506,18 @@
 			      "replayed %d transactions in %lu seconds\n",
 			      replay_count, get_seconds() - start);
 	}
+	/* needed to satisfy the locking in _update_journal_header_block */
+	reiserfs_write_lock(sb);
 	if (!bdev_read_only(sb->s_bdev) &&
 	    _update_journal_header_block(sb, journal->j_start,
 					 journal->j_last_flush_trans_id)) {
+		reiserfs_write_unlock(sb);
 		/* replay failed, caller must call free_journal_ram and abort
 		 ** the mount
 		 */
 		return -1;
 	}
+	reiserfs_write_unlock(sb);
 	return 0;
 }
 
@@ -2828,13 +2837,7 @@
 		goto free_and_return;
 	}
 
-	/*
-	 * Journal_read needs to be inspected in order to push down
-	 * the lock further inside (or even remove it).
-	 */
-	reiserfs_write_lock(sb);
 	ret = journal_read(sb);
-	reiserfs_write_unlock(sb);
 	if (ret < 0) {
 		reiserfs_warning(sb, "reiserfs-2006",
 				 "Replay Failure, unable to mount");
@@ -2923,9 +2926,9 @@
 	add_wait_queue(&journal->j_join_wait, &wait);
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) {
-		reiserfs_write_unlock(s);
+		int depth = reiserfs_write_unlock_nested(s);
 		schedule();
-		reiserfs_write_lock(s);
+		reiserfs_write_lock_nested(s, depth);
 	}
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&journal->j_join_wait, &wait);
@@ -2943,9 +2946,12 @@
 	struct reiserfs_journal *journal = SB_JOURNAL(sb);
 	unsigned long bcount = journal->j_bcount;
 	while (1) {
-		reiserfs_write_unlock(sb);
+		int depth;
+
+		depth = reiserfs_write_unlock_nested(sb);
 		schedule_timeout_uninterruptible(1);
-		reiserfs_write_lock(sb);
+		reiserfs_write_lock_nested(sb, depth);
+
 		journal->j_current_jl->j_state |= LIST_COMMIT_PENDING;
 		while ((atomic_read(&journal->j_wcount) > 0 ||
 			atomic_read(&journal->j_jlock)) &&
@@ -2976,6 +2982,7 @@
 	struct reiserfs_transaction_handle myth;
 	int sched_count = 0;
 	int retval;
+	int depth;
 
 	reiserfs_check_lock_depth(sb, "journal_begin");
 	BUG_ON(nblocks > journal->j_trans_max);
@@ -2996,9 +3003,9 @@
 
 	if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) {
 		unlock_journal(sb);
-		reiserfs_write_unlock(sb);
+		depth = reiserfs_write_unlock_nested(sb);
 		reiserfs_wait_on_write_block(sb);
-		reiserfs_write_lock(sb);
+		reiserfs_write_lock_nested(sb, depth);
 		PROC_INFO_INC(sb, journal.journal_relock_writers);
 		goto relock;
 	}
@@ -3821,6 +3828,7 @@
 	if (test_clear_buffer_journal_restore_dirty(bh) &&
 	    buffer_journal_dirty(bh)) {
 		struct reiserfs_journal_cnode *cn;
+		reiserfs_write_lock(sb);
 		cn = get_journal_hash_dev(sb,
 					  journal->j_list_hash_table,
 					  bh->b_blocknr);
@@ -3828,6 +3836,7 @@
 			set_buffer_journal_test(bh);
 			mark_buffer_dirty(bh);
 		}
+		reiserfs_write_unlock(sb);
 	}
 	clear_buffer_journal_prepared(bh);
 }
@@ -3911,6 +3920,7 @@
 	unsigned long jindex;
 	unsigned int commit_trans_id;
 	int trans_half;
+	int depth;
 
 	BUG_ON(th->t_refcount > 1);
 	BUG_ON(!th->t_trans_id);
@@ -4116,9 +4126,7 @@
 		next = cn->next;
 		free_cnode(sb, cn);
 		cn = next;
-		reiserfs_write_unlock(sb);
-		cond_resched();
-		reiserfs_write_lock(sb);
+		reiserfs_cond_resched(sb);
 	}
 
 	/* we are done  with both the c_bh and d_bh, but
@@ -4165,10 +4173,10 @@
 	 * is lost.
 	 */
 	if (!list_empty(&jl->j_tail_bh_list)) {
-		reiserfs_write_unlock(sb);
+		depth = reiserfs_write_unlock_nested(sb);
 		write_ordered_buffers(&journal->j_dirty_buffers_lock,
 				      journal, jl, &jl->j_tail_bh_list);
-		reiserfs_write_lock(sb);
+		reiserfs_write_lock_nested(sb, depth);
 	}
 	BUG_ON(!list_empty(&jl->j_tail_bh_list));
 	mutex_unlock(&jl->j_commit_mutex);

diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index d735bc8..045b83ef 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c

@@ -48,30 +48,35 @@
 	}
 }
 
-/*
- * If we already own the lock, just exit and don't increase the depth.
- * Useful when we don't want to lock more than once.
- *
- * We always return the lock_depth we had before calling
- * this function.
- */
-int reiserfs_write_lock_once(struct super_block *s)
+int __must_check reiserfs_write_unlock_nested(struct super_block *s)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
+	int depth;
+
+	/* this can happen when the lock isn't always held */
+	if (sb_i->lock_owner != current)
+		return -1;
+
+	depth = sb_i->lock_depth;
+
+	sb_i->lock_depth = -1;
+	sb_i->lock_owner = NULL;
+	mutex_unlock(&sb_i->lock);
+
+	return depth;
+}
+
+void reiserfs_write_lock_nested(struct super_block *s, int depth)
 {
 	struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
 
-	if (sb_i->lock_owner != current) {
-		mutex_lock(&sb_i->lock);
-		sb_i->lock_owner = current;
-		return sb_i->lock_depth++;
-	}
+	/* this can happen when the lock isn't always held */
+	if (depth == -1)
+		return;
 
-	return sb_i->lock_depth;
-}
-
-void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
-{
-	if (lock_depth == -1)
-		reiserfs_write_unlock(s);
+	mutex_lock(&sb_i->lock);
+	sb_i->lock_owner = current;
+	sb_i->lock_depth = depth;
 }
 
 /*
@@ -82,9 +87,7 @@
 {
 	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
 
-	if (sb_i->lock_depth < 0)
-		reiserfs_panic(sb, "%s called without kernel lock held %d",
-			       caller);
+	WARN_ON(sb_i->lock_depth < 0);
 }
 
 #ifdef CONFIG_REISERFS_CHECK

diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 8567fb8..dc5236f 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c

@@ -325,7 +325,6 @@
 				      unsigned int flags)
 {
 	int retval;
-	int lock_depth;
 	struct inode *inode = NULL;
 	struct reiserfs_dir_entry de;
 	INITIALIZE_PATH(path_to_entry);
@@ -333,12 +332,7 @@
 	if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	/*
-	 * Might be called with or without the write lock, must be careful
-	 * to not recursively hold it in case we want to release the lock
-	 * before rescheduling.
-	 */
-	lock_depth = reiserfs_write_lock_once(dir->i_sb);
+	reiserfs_write_lock(dir->i_sb);
 
 	de.de_gen_number_bit_string = NULL;
 	retval =
@@ -349,7 +343,7 @@
 		inode = reiserfs_iget(dir->i_sb,
 				      (struct cpu_key *)&(de.de_dir_id));
 		if (!inode || IS_ERR(inode)) {
-			reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+			reiserfs_write_unlock(dir->i_sb);
 			return ERR_PTR(-EACCES);
 		}
 
@@ -358,7 +352,7 @@
 		if (IS_PRIVATE(dir))
 			inode->i_flags |= S_PRIVATE;
 	}
-	reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+	reiserfs_write_unlock(dir->i_sb);
 	if (retval == IO_ERROR) {
 		return ERR_PTR(-EIO);
 	}
@@ -727,7 +721,6 @@
 	struct inode *inode;
 	struct reiserfs_transaction_handle th;
 	struct reiserfs_security_handle security;
-	int lock_depth;
 	/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
 	int jbegin_count =
 	    JOURNAL_PER_BALANCE_CNT * 3 +
@@ -753,7 +746,7 @@
 		return retval;
 	}
 	jbegin_count += retval;
-	lock_depth = reiserfs_write_lock_once(dir->i_sb);
+	reiserfs_write_lock(dir->i_sb);
 
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
 	if (retval) {
@@ -804,7 +797,7 @@
 	d_instantiate(dentry, inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 out_failed:
-	reiserfs_write_unlock_once(dir->i_sb, lock_depth);
+	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 }
 
@@ -920,7 +913,6 @@
 	struct reiserfs_transaction_handle th;
 	int jbegin_count;
 	unsigned long savelink;
-	int depth;
 
 	dquot_initialize(dir);
 
@@ -934,7 +926,7 @@
 	    JOURNAL_PER_BALANCE_CNT * 2 + 2 +
 	    4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
 
-	depth = reiserfs_write_lock_once(dir->i_sb);
+	reiserfs_write_lock(dir->i_sb);
 	retval = journal_begin(&th, dir->i_sb, jbegin_count);
 	if (retval)
 		goto out_unlink;
@@ -995,7 +987,7 @@
 
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 	reiserfs_check_path(&path);
-	reiserfs_write_unlock_once(dir->i_sb, depth);
+	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 
       end_unlink:
@@ -1005,7 +997,7 @@
 	if (err)
 		retval = err;
       out_unlink:
-	reiserfs_write_unlock_once(dir->i_sb, depth);
+	reiserfs_write_unlock(dir->i_sb);
 	return retval;
 }
 

diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index c0b1112..54944d5 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c

@@ -358,12 +358,13 @@
 	dump_stack();
 #endif
 	if (sb)
-		panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
+		printk(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
 		      sb->s_id, id ? id : "", id ? " " : "",
 		      function, error_buf);
 	else
-		panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
+		printk(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
 		      id ? id : "", id ? " " : "", function, error_buf);
+	BUG();
 }
 
 void __reiserfs_error(struct super_block *sb, const char *id,

diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 3df5ce6..f8adaee 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h

@@ -630,8 +630,8 @@
  */
 void reiserfs_write_lock(struct super_block *s);
 void reiserfs_write_unlock(struct super_block *s);
-int reiserfs_write_lock_once(struct super_block *s);
-void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
+int __must_check reiserfs_write_unlock_nested(struct super_block *s);
+void reiserfs_write_lock_nested(struct super_block *s, int depth);
 
 #ifdef CONFIG_REISERFS_CHECK
 void reiserfs_lock_check_recursive(struct super_block *s);
@@ -667,31 +667,33 @@
  * - The inode mutex
  */
 static inline void reiserfs_mutex_lock_safe(struct mutex *m,
-			       struct super_block *s)
+					    struct super_block *s)
 {
-	reiserfs_lock_check_recursive(s);
-	reiserfs_write_unlock(s);
+	int depth;
+
+	depth = reiserfs_write_unlock_nested(s);
 	mutex_lock(m);
-	reiserfs_write_lock(s);
+	reiserfs_write_lock_nested(s, depth);
 }
 
 static inline void
 reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
-			       struct super_block *s)
+				struct super_block *s)
 {
-	reiserfs_lock_check_recursive(s);
-	reiserfs_write_unlock(s);
+	int depth;
+
+	depth = reiserfs_write_unlock_nested(s);
 	mutex_lock_nested(m, subclass);
-	reiserfs_write_lock(s);
+	reiserfs_write_lock_nested(s, depth);
 }
 
 static inline void
 reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
 {
-	reiserfs_lock_check_recursive(s);
-	reiserfs_write_unlock(s);
-	down_read(sem);
-	reiserfs_write_lock(s);
+       int depth;
+       depth = reiserfs_write_unlock_nested(s);
+       down_read(sem);
+       reiserfs_write_lock_nested(s, depth);
 }
 
 /*
@@ -701,9 +703,11 @@
 static inline void reiserfs_cond_resched(struct super_block *s)
 {
 	if (need_resched()) {
-		reiserfs_write_unlock(s);
+		int depth;
+
+		depth = reiserfs_write_unlock_nested(s);
 		schedule();
-		reiserfs_write_lock(s);
+		reiserfs_write_lock_nested(s, depth);
 	}
 }
 

diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c
index 3ce02cf..a4ef5cd 100644
--- a/fs/reiserfs/resize.c
+++ b/fs/reiserfs/resize.c

@@ -34,6 +34,7 @@
 	unsigned long int block_count, free_blocks;
 	int i;
 	int copy_size;
+	int depth;
 
 	sb = SB_DISK_SUPER_BLOCK(s);
 
@@ -43,7 +44,9 @@
 	}
 
 	/* check the device size */
+	depth = reiserfs_write_unlock_nested(s);
 	bh = sb_bread(s, block_count_new - 1);
+	reiserfs_write_lock_nested(s, depth);
 	if (!bh) {
 		printk("reiserfs_resize: can\'t read last block\n");
 		return -EINVAL;
@@ -125,9 +128,12 @@
 		 * transaction begins, and the new bitmaps don't matter if the
 		 * transaction fails. */
 		for (i = bmap_nr; i < bmap_nr_new; i++) {
+			int depth;
 			/* don't use read_bitmap_block since it will cache
 			 * the uninitialized bitmap */
+			depth = reiserfs_write_unlock_nested(s);
 			bh = sb_bread(s, i * s->s_blocksize * 8);
+			reiserfs_write_lock_nested(s, depth);
 			if (!bh) {
 				vfree(bitmap);
 				return -EIO;
@@ -138,9 +144,9 @@
 
 			set_buffer_uptodate(bh);
 			mark_buffer_dirty(bh);
-			reiserfs_write_unlock(s);
+			depth = reiserfs_write_unlock_nested(s);
 			sync_dirty_buffer(bh);
-			reiserfs_write_lock(s);
+			reiserfs_write_lock_nested(s, depth);
 			// update bitmap_info stuff
 			bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
 			brelse(bh);

diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 2f40a4c..4d7d476 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c

@@ -524,14 +524,14 @@
  * the caller (search_by_key) will perform other schedule-unsafe
  * operations just after calling this function.
  *
- * @return true if we have unlocked
+ * @return depth of lock to be restored after read completes
  */
-static bool search_by_key_reada(struct super_block *s,
+static int search_by_key_reada(struct super_block *s,
 				struct buffer_head **bh,
 				b_blocknr_t *b, int num)
 {
 	int i, j;
-	bool unlocked = false;
+	int depth = -1;
 
 	for (i = 0; i < num; i++) {
 		bh[i] = sb_getblk(s, b[i]);
@@ -549,15 +549,13 @@
 		 * you have to make sure the prepared bit isn't set on this buffer
 		 */
 		if (!buffer_uptodate(bh[j])) {
-			if (!unlocked) {
-				reiserfs_write_unlock(s);
-				unlocked = true;
-			}
+			if (depth == -1)
+				depth = reiserfs_write_unlock_nested(s);
 			ll_rw_block(READA, 1, bh + j);
 		}
 		brelse(bh[j]);
 	}
-	return unlocked;
+	return depth;
 }
 
 /**************************************************************************
@@ -645,26 +643,26 @@
 		   have a pointer to it. */
 		if ((bh = last_element->pe_buffer =
 		     sb_getblk(sb, block_number))) {
-			bool unlocked = false;
+
+			/*
+			 * We'll need to drop the lock if we encounter any
+			 * buffers that need to be read. If all of them are
+			 * already up to date, we don't need to drop the lock.
+			 */
+			int depth = -1;
 
 			if (!buffer_uptodate(bh) && reada_count > 1)
-				/* may unlock the write lock */
-				unlocked = search_by_key_reada(sb, reada_bh,
+				depth = search_by_key_reada(sb, reada_bh,
 						    reada_blocks, reada_count);
-			/*
-			 * If we haven't already unlocked the write lock,
-			 * then we need to do that here before reading
-			 * the current block
-			 */
-			if (!buffer_uptodate(bh) && !unlocked) {
-				reiserfs_write_unlock(sb);
-				unlocked = true;
-			}
+
+			if (!buffer_uptodate(bh) && depth == -1)
+				depth = reiserfs_write_unlock_nested(sb);
+
 			ll_rw_block(READ, 1, &bh);
 			wait_on_buffer(bh);
 
-			if (unlocked)
-				reiserfs_write_lock(sb);
+			if (depth != -1)
+				reiserfs_write_lock_nested(sb, depth);
 			if (!buffer_uptodate(bh))
 				goto io_error;
 		} else {
@@ -1059,9 +1057,7 @@
 			reiserfs_free_block(th, inode, block, 1);
 		    }
 
-		    reiserfs_write_unlock(sb);
-		    cond_resched();
-		    reiserfs_write_lock(sb);
+		    reiserfs_cond_resched(sb);
 
 		    if (item_moved (&s_ih, path))  {
 			need_re_search = 1;

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7e81d97..60d0932 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c

@@ -624,7 +624,6 @@
 	struct reiserfs_transaction_handle th;
 
 	int err = 0;
-	int lock_depth;
 
 	if (inode->i_sb->s_flags & MS_RDONLY) {
 		reiserfs_warning(inode->i_sb, "clm-6006",
@@ -632,7 +631,7 @@
 				 inode->i_ino);
 		return;
 	}
-	lock_depth = reiserfs_write_lock_once(inode->i_sb);
+	reiserfs_write_lock(inode->i_sb);
 
 	/* this is really only used for atime updates, so they don't have
 	 ** to be included in O_SYNC or fsync
@@ -645,7 +644,7 @@
 	journal_end(&th, inode->i_sb, 1);
 
 out:
-	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
+	reiserfs_write_unlock(inode->i_sb);
 }
 
 static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
commit	278f6679f454bf185a07d9a4ca355b153482d17a	[log] [tgz]
author	Jeff Mahoney <jeffm@suse.com>	Thu Aug 08 17:34:46 2013 -0400
committer	Jeff Mahoney <jeffm@suse.de>	Thu Aug 08 17:34:46 2013 -0400
tree	ffead073e67cfdc1ddfc3949ebc93c06dcaaab8f
parent	4c05141df57f4ffc1a9a28f1925434924179bfe4 [diff]