[PATCH] mutex subsystem, semaphore to mutex: VFS, ->i_sem

This patch converts the inode semaphore to a mutex. I have tested it on
XFS and compiled as much as one can consider on an ia64. Anyway your
luck with it might be different.

Modified-by: Ingo Molnar <mingo@elte.hu>

(finished the conversion)

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/mm/filemap.c b/mm/filemap.c
index 478f4c7..5fca273 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -61,7 +61,7 @@
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
  *
- *  ->i_sem
+ *  ->i_mutex
  *    ->i_mmap_lock		(truncate->unmap_mapping_range)
  *
  *  ->mmap_sem
@@ -73,9 +73,9 @@
  *    ->lock_page		(access_process_vm)
  *
  *  ->mmap_sem
- *    ->i_sem			(msync)
+ *    ->i_mutex			(msync)
  *
- *  ->i_sem
+ *  ->i_mutex
  *    ->i_alloc_sem             (various)
  *
  *  ->inode_lock
@@ -276,7 +276,7 @@
  * integrity" operation.  It waits upon in-flight writeout before starting and
  * waiting upon new writeout.  If there was an IO error, return it.
  *
- * We need to re-take i_sem during the generic_osync_inode list walk because
+ * We need to re-take i_mutex during the generic_osync_inode list walk because
  * it is otherwise livelockable.
  */
 int sync_page_range(struct inode *inode, struct address_space *mapping,
@@ -290,9 +290,9 @@
 		return 0;
 	ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
 	if (ret == 0) {
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 		ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	}
 	if (ret == 0)
 		ret = wait_on_page_writeback_range(mapping, start, end);
@@ -301,7 +301,7 @@
 EXPORT_SYMBOL(sync_page_range);
 
 /*
- * Note: Holding i_sem across sync_page_range_nolock is not a good idea
+ * Note: Holding i_mutex across sync_page_range_nolock is not a good idea
  * as it forces O_SYNC writers to different parts of the same file
  * to be serialised right until io completion.
  */
@@ -1892,7 +1892,7 @@
 	/*
 	 * Sync the fs metadata but not the minor inode changes and
 	 * of course not the data as we did direct DMA for the IO.
-	 * i_sem is held, which protects generic_osync_inode() from
+	 * i_mutex is held, which protects generic_osync_inode() from
 	 * livelocking.
 	 */
 	if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
@@ -2195,10 +2195,10 @@
 
 	BUG_ON(iocb->ki_pos != pos);
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
 						&iocb->ki_pos);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		ssize_t err;
@@ -2220,9 +2220,9 @@
 	struct iovec local_iov = { .iov_base = (void __user *)buf,
 					.iov_len = count };
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	ret = __generic_file_write_nolock(file, &local_iov, 1, ppos);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		ssize_t err;
@@ -2256,9 +2256,9 @@
 	struct inode *inode = mapping->host;
 	ssize_t ret;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		int err;
@@ -2272,7 +2272,7 @@
 EXPORT_SYMBOL(generic_file_writev);
 
 /*
- * Called under i_sem for writes to S_ISREG files.   Returns -EIO if something
+ * Called under i_mutex for writes to S_ISREG files.   Returns -EIO if something
  * went wrong during pagecache shootdown.
  */
 static ssize_t
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 9cf687e..e2b34e9 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -338,7 +338,7 @@
 	*ppos = pos;
 	/*
 	 * No need to use i_size_read() here, the i_size
-	 * cannot change under us because we hold i_sem.
+	 * cannot change under us because we hold i_mutex.
 	 */
 	if (pos > inode->i_size) {
 		i_size_write(inode, pos);
@@ -358,7 +358,7 @@
 	loff_t pos;
 	ssize_t ret;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 
 	if (!access_ok(VERIFY_READ, buf, len)) {
 		ret=-EFAULT;
@@ -390,7 +390,7 @@
  out_backing:
 	current->backing_dev_info = NULL;
  out_up:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(xip_file_write);
diff --git a/mm/memory.c b/mm/memory.c
index 3944fec..7a11ddd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1784,13 +1784,13 @@
 	if (!inode->i_op || !inode->i_op->truncate_range)
 		return -ENOSYS;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 	down_write(&inode->i_alloc_sem);
 	unmap_mapping_range(mapping, offset, (end - offset), 1);
 	truncate_inode_pages_range(mapping, offset, end);
 	inode->i_op->truncate_range(inode, offset, end);
 	up_write(&inode->i_alloc_sem);
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 
 	return 0;
 }
diff --git a/mm/msync.c b/mm/msync.c
index 1b5b6f6..3563a56 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -137,7 +137,7 @@
 			ret = filemap_fdatawrite(mapping);
 			if (file->f_op && file->f_op->fsync) {
 				/*
-				 * We don't take i_sem here because mmap_sem
+				 * We don't take i_mutex here because mmap_sem
 				 * is already held.
 				 */
 				err = file->f_op->fsync(file,file->f_dentry,1);
diff --git a/mm/rmap.c b/mm/rmap.c
index 66ec430..dfbb89f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -20,13 +20,13 @@
 /*
  * Lock ordering in mm:
  *
- * inode->i_sem	(while writing or truncating, not reading or faulting)
+ * inode->i_mutex	(while writing or truncating, not reading or faulting)
  *   inode->i_alloc_sem
  *
  * When a page fault occurs in writing from user to file, down_read
- * of mmap_sem nests within i_sem; in sys_msync, i_sem nests within
- * down_read of mmap_sem; i_sem and down_write of mmap_sem are never
- * taken together; in truncation, i_sem is taken outermost.
+ * of mmap_sem nests within i_mutex; in sys_msync, i_mutex nests within
+ * down_read of mmap_sem; i_mutex and down_write of mmap_sem are never
+ * taken together; in truncation, i_mutex is taken outermost.
  *
  * mm->mmap_sem
  *   page->flags PG_locked (lock_page)
diff --git a/mm/shmem.c b/mm/shmem.c
index a1f2f02..343b3c0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1370,7 +1370,7 @@
 	if (!access_ok(VERIFY_READ, buf, count))
 		return -EFAULT;
 
-	down(&inode->i_sem);
+	mutex_lock(&inode->i_mutex);
 
 	pos = *ppos;
 	written = 0;
@@ -1455,7 +1455,7 @@
 	if (written)
 		err = written;
 out:
-	up(&inode->i_sem);
+	mutex_unlock(&inode->i_mutex);
 	return err;
 }
 
@@ -1491,7 +1491,7 @@
 
 		/*
 		 * We must evaluate after, since reads (unlike writes)
-		 * are called without i_sem protection against truncate
+		 * are called without i_mutex protection against truncate
 		 */
 		nr = PAGE_CACHE_SIZE;
 		i_size = i_size_read(inode);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 80f948a..6544565 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1187,9 +1187,9 @@
 		set_blocksize(bdev, p->old_block_size);
 		bd_release(bdev);
 	} else {
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 		inode->i_flags &= ~S_SWAPFILE;
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	}
 	filp_close(swap_file, NULL);
 	err = 0;
@@ -1406,7 +1406,7 @@
 		p->bdev = bdev;
 	} else if (S_ISREG(inode->i_mode)) {
 		p->bdev = inode->i_sb->s_bdev;
-		down(&inode->i_sem);
+		mutex_lock(&inode->i_mutex);
 		did_down = 1;
 		if (IS_SWAPFILE(inode)) {
 			error = -EBUSY;
@@ -1596,7 +1596,7 @@
 	if (did_down) {
 		if (!error)
 			inode->i_flags |= S_SWAPFILE;
-		up(&inode->i_sem);
+		mutex_unlock(&inode->i_mutex);
 	}
 	return error;
 }
diff --git a/mm/truncate.c b/mm/truncate.c
index b1a463d..6cb3fff 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -196,7 +196,7 @@
  * @mapping: mapping to truncate
  * @lstart: offset from which to truncate
  *
- * Called under (and serialised by) inode->i_sem.
+ * Called under (and serialised by) inode->i_mutex.
  */
 void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 {