NFS: add I/O performance counters

Invoke the byte and event counter macros where we want to count bytes and
events.

Clean-up: fix a possible NULL dereference in nfs_lock, and simplify
nfs_file_open.

Test-plan:
fsx and iozone on UP and SMP systems, with and without pre-emption.  Watch
for memory overwrite bugs, and performance loss (significantly more CPU
required per op).

Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a1554be..151b8dd 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFS_PARANOIA 1
 /* #define NFS_DEBUG_VERBOSE 1 */
@@ -507,6 +508,8 @@
 	struct nfs_fattr fattr;
 	long		res;
 
+	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
 	lock_kernel();
 
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -713,6 +716,7 @@
 	parent = dget_parent(dentry);
 	lock_kernel();
 	dir = parent->d_inode;
+	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
 	inode = dentry->d_inode;
 
 	if (!inode) {
@@ -844,6 +848,7 @@
 
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);
+	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
 
 	res = ERR_PTR(-ENAMETOOLONG);
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -1241,6 +1246,7 @@
 	dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name, 
 		atomic_read(&dentry->d_count));
+	nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 #ifdef NFS_PARANOIA
 if (!dentry->d_inode)
@@ -1640,6 +1646,8 @@
 	struct rpc_cred *cred;
 	int res = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
 	if (mask == 0)
 		goto out;
 	/* Is this sys_access() ? */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4e9b3a1..fc07ce4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -54,6 +54,8 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_VFS
 #define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)
 
@@ -67,6 +69,7 @@
 	struct kref		kref;		/* release manager */
 	struct list_head	list;		/* nfs_read_data structs */
 	wait_queue_head_t	wait;		/* wait for i/o completion */
+	struct inode *		inode;		/* target file of I/O */
 	struct page **		pages;		/* pages in our buffer */
 	unsigned int		npages;		/* count of pages */
 	atomic_t		complete,	/* i/os we're waiting for */
@@ -357,7 +360,9 @@
 
 	dreq->pages = pages;
 	dreq->npages = nr_pages;
+	dreq->inode = inode;
 
+	nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
 	rpc_clnt_sigmask(clnt, &oldset);
 	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
 				 file_offset);
@@ -572,6 +577,7 @@
                         return page_count;
                 }
 
+		nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, size);
 		result = nfs_direct_write_seg(inode, ctx, user_addr, size,
 				file_offset, pages, page_count);
 		nfs_free_user_pages(pages, page_count, 0);
@@ -581,6 +587,7 @@
 				break;
 			return result;
 		}
+		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
 		tot_bytes += result;
 		file_offset += result;
 		if (result < size)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 387809f..1cf07e4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -32,6 +32,7 @@
 #include <asm/system.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
@@ -102,18 +103,15 @@
 static int
 nfs_file_open(struct inode *inode, struct file *filp)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
-	int (*open)(struct inode *, struct file *);
 	int res;
 
 	res = nfs_check_flags(filp->f_flags);
 	if (res)
 		return res;
 
+	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	/* Do NFSv4 open() call */
-	if ((open = server->rpc_ops->file_open) != NULL)
-		res = open(inode, filp);
+	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
@@ -124,6 +122,7 @@
 	/* Ensure that dirty pages are flushed out with the right creds */
 	if (filp->f_mode & FMODE_WRITE)
 		filemap_fdatawrite(filp->f_mapping);
+	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
 	return NFS_PROTO(inode)->file_release(inode, filp);
 }
 
@@ -199,6 +198,7 @@
 
 	if ((file->f_mode & FMODE_WRITE) == 0)
 		return 0;
+	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 	lock_kernel();
 	/* Ensure that data+attribute caches are up to date after close() */
 	status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@
 		(unsigned long) count, (unsigned long) pos);
 
 	result = nfs_revalidate_file(inode, iocb->ki_filp);
+	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
 	if (!result)
 		result = generic_file_aio_read(iocb, buf, count, pos);
 	return result;
@@ -282,6 +283,7 @@
 
 	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 	lock_kernel();
 	status = nfs_wb_all(inode);
 	if (!status) {
@@ -378,6 +380,7 @@
 	if (!count)
 		goto out;
 
+	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
 	result = generic_file_aio_write(iocb, buf, count, pos);
 out:
 	return result;
@@ -517,9 +520,7 @@
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags,
 			(long long)fl->fl_start, (long long)fl->fl_end);
-
-	if (!inode)
-		return -EINVAL;
+	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
 
 	/* No mandatory locks over NFS */
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -544,9 +545,6 @@
 			inode->i_sb->s_id, inode->i_ino,
 			fl->fl_type, fl->fl_flags);
 
-	if (!inode)
-		return -EINVAL;
-
 	/*
 	 * No BSD flocks over NFS allowed.
 	 * Note: we could try to fake a POSIX lock request here by
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 86b756f..8ee7411 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -753,6 +753,8 @@
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int mode = inode->i_mode;
 
+	nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
 	NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
 	NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 
@@ -940,6 +942,8 @@
 	struct nfs_fattr fattr;
 	int error;
 
+	nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
 			attr->ia_valid &= ~ATTR_SIZE;
@@ -993,6 +997,7 @@
 		spin_unlock(&inode->i_lock);
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
+		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		inode->i_size = attr->ia_size;
 		vmtruncate(inode, attr->ia_size);
 	}
@@ -1278,6 +1283,7 @@
  */
 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
+	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 	if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
 			&& !nfs_attribute_timeout(inode))
 		return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1294,6 +1300,7 @@
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 		if (S_ISREG(inode->i_mode))
 			nfs_sync_mapping(mapping);
 		invalidate_inode_pages2(mapping);
@@ -1615,6 +1622,7 @@
 
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
+		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 	} else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 05eb43f..ae3ddd2 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,6 +31,8 @@
 
 #include <asm/system.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
@@ -133,6 +135,8 @@
 		}
 		count -= result;
 		rdata->args.pgbase += result;
+		nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
 		/* Note: result == 0 should only happen if we're caching
 		 * a write that extends the file and punches a hole.
 		 */
@@ -458,8 +462,11 @@
 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
 		task->tk_pid, status);
 
+	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
+
 	/* Is this a short read? */
 	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Yes, so retry the read at the end of the data */
@@ -491,6 +498,9 @@
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
 		page, PAGE_CACHE_SIZE, page->index);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
 	/*
 	 * Try to flush any pending writes to the file..
 	 *
@@ -570,6 +580,7 @@
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			nr_pages);
+	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +593,7 @@
 	if (!list_empty(&head)) {
 		int err = nfs_pagein_list(&head, server->rpages);
 		if (!ret)
+			nfs_add_stats(inode, NFSIOS_READPAGES, err);
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 92ecf24..e7c8361 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -63,6 +63,7 @@
 #include <linux/smp_lock.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -134,6 +135,7 @@
 	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
 	if (i_size >= end)
 		return;
+	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 	i_size_write(inode, end);
 }
 
@@ -223,6 +225,7 @@
 	        wdata->args.pgbase += result;
 		written += result;
 		count -= result;
+		nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
 	} while (count);
 	/* Update file length */
 	nfs_grow_file(page, offset, written);
@@ -279,6 +282,9 @@
 	int priority = wb_priority(wbc);
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
 	/*
 	 * Note: We need to ensure that we have a reference to the inode
 	 *       if we are to do asynchronous writes. If not, waiting
@@ -343,6 +349,8 @@
 	struct inode *inode = mapping->host;
 	int err;
 
+	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
 	err = generic_writepages(mapping, wbc);
 	if (err)
 		return err;
@@ -354,6 +362,7 @@
 	err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
 	if (err < 0)
 		goto out;
+	nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
 	wbc->nr_to_write -= err;
 	if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
 		err = nfs_wait_on_requests(inode, 0, 0);
@@ -596,6 +605,9 @@
 
 	if (!bdi_write_congested(bdi))
 		return 0;
+
+	nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
 	if (intr) {
 		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
 		sigset_t oldset;
@@ -749,6 +761,8 @@
 	struct nfs_page	*req;
 	int		status = 0;
 
+	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
 	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
 		file->f_dentry->d_parent->d_name.name,
 		file->f_dentry->d_name.name, count,
@@ -1152,6 +1166,8 @@
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
+	nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
@@ -1177,6 +1193,8 @@
 	if (task->tk_status >= 0 && resp->count < argp->count) {
 		static unsigned long    complain;
 
+		nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
 			/* Was this an NFSv2 write or an NFSv3 stable write? */