vfs: pagecache usage optimization for pagesize!=blocksize

When we read some part of a file through pagecache, if there is a
pagecache of corresponding index but this page is not uptodate, read IO
is issued and this page will be uptodate.

I think this is good for pagesize == blocksize environment but there is
room for improvement on pagesize != blocksize environment.  Because in
this case a page can have multiple buffers and even if a page is not
uptodate, some buffers can be uptodate.

So I suggest that when all buffers which correspond to a part of a file
that we want to read are uptodate, use this pagecache and copy data from
this pagecache to user buffer even if a page is not uptodate.  This can
reduce read IO and improve system throughput.

I wrote a benchmark program and got result number with this program.

This benchmark do:

  1: mount and open a test file.

  2: create a 512MB file.

  3: close a file and umount.

  4: mount and again open a test file.

  5: pwrite randomly 300000 times on a test file.  offset is aligned
     by IO size(1024bytes).

  6: measure time of preading randomly 100000 times on a test file.

The result was:
	2.6.26
        330 sec

	2.6.26-patched
        226 sec

Arch:i386
Filesystem:ext3
Blocksize:1024 bytes
Memory: 1GB

On ext3/4, a file is written through buffer/block.  So random read/write
mixed workloads or random read after random write workloads are optimized
with this patch under pagesize != blocksize environment.  This test result
showed this.

The benchmark program is as follows:

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <time.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>

#define LEN 1024
#define LOOP 1024*512 /* 512MB */

main(void)
{
	unsigned long i, offset, filesize;
	int fd;
	char buf[LEN];
	time_t t1, t2;

	if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
		perror("cannot mount\n");
		exit(1);
	}
	memset(buf, 0, LEN);
	fd = open("/root/test1/testfile", O_CREAT|O_RDWR|O_TRUNC);
	if (fd < 0) {
		perror("cannot open file\n");
		exit(1);
	}
	for (i = 0; i < LOOP; i++)
		write(fd, buf, LEN);
	close(fd);
	if (umount("/root/test1/") < 0) {
		perror("cannot umount\n");
		exit(1);
	}
	if (mount("/dev/sda1", "/root/test1/", "ext3", 0, 0) < 0) {
		perror("cannot mount\n");
		exit(1);
	}
	fd = open("/root/test1/testfile", O_RDWR);
	if (fd < 0) {
		perror("cannot open file\n");
		exit(1);
	}

	filesize = LEN * LOOP;
	for (i = 0; i < 300000; i++){
		offset = (random() % filesize) & (~(LEN - 1));
		pwrite(fd, buf, LEN, offset);
	}
	printf("start test\n");
	time(&t1);
	for (i = 0; i < 100000; i++){
		offset = (random() % filesize) & (~(LEN - 1));
		pread(fd, buf, LEN, offset);
	}
	time(&t2);
	printf("%ld sec\n", t2-t1);
	close(fd);
	if (umount("/root/test1/") < 0) {
		perror("cannot umount\n");
		exit(1);
	}
}

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Jan Kara <jack@ucw.cz>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763..9843b04 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2806,59 +2806,63 @@
 }
 
 static const struct address_space_operations ext4_ordered_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_normal_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_ordered_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_normal_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_ordered_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_writeback_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_normal_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_writeback_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_normal_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_writeback_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_journalled_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_journalled_writepage,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_write_begin,
-	.write_end	= ext4_journalled_write_end,
-	.set_page_dirty	= ext4_journalled_set_page_dirty,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_invalidatepage,
-	.releasepage	= ext4_releasepage,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_journalled_writepage,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_write_begin,
+	.write_end		= ext4_journalled_write_end,
+	.set_page_dirty		= ext4_journalled_set_page_dirty,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 static const struct address_space_operations ext4_da_aops = {
-	.readpage	= ext4_readpage,
-	.readpages	= ext4_readpages,
-	.writepage	= ext4_da_writepage,
-	.writepages	= ext4_da_writepages,
-	.sync_page	= block_sync_page,
-	.write_begin	= ext4_da_write_begin,
-	.write_end	= ext4_da_write_end,
-	.bmap		= ext4_bmap,
-	.invalidatepage	= ext4_da_invalidatepage,
-	.releasepage	= ext4_releasepage,
-	.direct_IO	= ext4_direct_IO,
-	.migratepage	= buffer_migrate_page,
+	.readpage		= ext4_readpage,
+	.readpages		= ext4_readpages,
+	.writepage		= ext4_da_writepage,
+	.writepages		= ext4_da_writepages,
+	.sync_page		= block_sync_page,
+	.write_begin		= ext4_da_write_begin,
+	.write_end		= ext4_da_write_end,
+	.bmap			= ext4_bmap,
+	.invalidatepage		= ext4_da_invalidatepage,
+	.releasepage		= ext4_releasepage,
+	.direct_IO		= ext4_direct_IO,
+	.migratepage		= buffer_migrate_page,
+	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
 void ext4_set_aops(struct inode *inode)