[GFS2] Red Hat bz 228540: owner references

In Testing the previously posted and accepted patch for
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=228540
I uncovered some gfs2 badness.  It turns out that the current
gfs2 code saves off a process pointer when glocks is taken
in both the glock and glock holder structures.  Those
structures will persist in memory long after the process has
ended; pointers to poisoned memory.

This problem isn't caused by the 228540 fix; the new capability
introduced by the fix just uncovered the problem.

I wrote this patch that avoids saving process pointers
and instead saves off the process pid.  Rather than
referencing the bad pointers, it now does process lookups.
There is special code that makes the output nicer for
printing holder information for processes that have ended.

This patch also adds a stub for the new "sprint_symbol"
function that exists in Andrew Morton's -mm patch set, but
won't go into the base kernel until 2.6.22, since it adds
functionality but doesn't fix a bug.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8aa816..d2e3094 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -25,6 +25,8 @@
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -54,6 +56,7 @@
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
 
 static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
+static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
 static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
 static void gfs2_glock_drop_th(struct gfs2_glock *gl);
 static DECLARE_RWSEM(gfs2_umount_flush_sem);
@@ -64,6 +67,7 @@
 #define GFS2_GL_HASH_MASK       (GFS2_GL_HASH_SIZE - 1)
 
 static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
+static struct dentry *gfs2_root;
 
 /*
  * Despite what you might think, the numbers below are not arbitrary :-)
@@ -312,7 +316,7 @@
 	atomic_set(&gl->gl_ref, 1);
 	gl->gl_state = LM_ST_UNLOCKED;
 	gl->gl_hash = hash;
-	gl->gl_owner = NULL;
+	gl->gl_owner_pid = 0;
 	gl->gl_ip = 0;
 	gl->gl_ops = glops;
 	gl->gl_req_gh = NULL;
@@ -376,7 +380,7 @@
 	INIT_LIST_HEAD(&gh->gh_list);
 	gh->gh_gl = gl;
 	gh->gh_ip = (unsigned long)__builtin_return_address(0);
-	gh->gh_owner = current;
+	gh->gh_owner_pid = current->pid;
 	gh->gh_state = state;
 	gh->gh_flags = flags;
 	gh->gh_error = 0;
@@ -601,7 +605,7 @@
 	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
 		list_add_tail(&gh.gh_list, &gl->gl_waiters1);
 	} else {
-		gl->gl_owner = current;
+		gl->gl_owner_pid = current->pid;
 		gl->gl_ip = (unsigned long)__builtin_return_address(0);
 		clear_bit(HIF_WAIT, &gh.gh_iflags);
 		smp_mb();
@@ -628,7 +632,7 @@
 	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
 		acquired = 0;
 	} else {
-		gl->gl_owner = current;
+		gl->gl_owner_pid = current->pid;
 		gl->gl_ip = (unsigned long)__builtin_return_address(0);
 	}
 	spin_unlock(&gl->gl_spin);
@@ -646,7 +650,7 @@
 {
 	spin_lock(&gl->gl_spin);
 	clear_bit(GLF_LOCK, &gl->gl_flags);
-	gl->gl_owner = NULL;
+	gl->gl_owner_pid = 0;
 	gl->gl_ip = 0;
 	run_queue(gl);
 	BUG_ON(!spin_is_locked(&gl->gl_spin));
@@ -999,12 +1003,12 @@
 }
 
 static inline struct gfs2_holder *
-find_holder_by_owner(struct list_head *head, struct task_struct *owner)
+find_holder_by_owner(struct list_head *head, pid_t pid)
 {
 	struct gfs2_holder *gh;
 
 	list_for_each_entry(gh, head, gh_list) {
-		if (gh->gh_owner == owner)
+		if (gh->gh_owner_pid == pid)
 			return gh;
 	}
 
@@ -1036,24 +1040,24 @@
 	struct gfs2_glock *gl = gh->gh_gl;
 	struct gfs2_holder *existing;
 
-	BUG_ON(!gh->gh_owner);
+	BUG_ON(!gh->gh_owner_pid);
 	if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
 		BUG();
 
-	existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
+	existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid);
 	if (existing) {
 		print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
-		printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid);
+		printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
 		printk(KERN_INFO "lock type : %d lock state : %d\n",
 				existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state);
 		print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
-		printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid);
+		printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
 		printk(KERN_INFO "lock type : %d lock state : %d\n",
 				gl->gl_name.ln_type, gl->gl_state);
 		BUG();
 	}
 
-	existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
+	existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid);
 	if (existing) {
 		print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
 		print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
@@ -1756,6 +1760,22 @@
  *  Diagnostic routines to help debug distributed deadlock
  */
 
+static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt,
+                              unsigned long address)
+{
+/* when sprint_symbol becomes available in the new kernel, replace this */
+/* function with:
+        char buffer[KSYM_SYMBOL_LEN];
+
+        sprint_symbol(buffer, address);
+        print_dbg(gi, fmt, buffer);
+*/
+        if (gi)
+                print_dbg(gi, fmt, address);
+        else
+                print_symbol(fmt, address);
+}
+
 /**
  * dump_holder - print information about a glock holder
  * @str: a string naming the type of holder
@@ -1768,10 +1788,18 @@
 		       struct gfs2_holder *gh)
 {
 	unsigned int x;
+	struct task_struct *gh_owner;
 
 	print_dbg(gi, "  %s\n", str);
-	print_dbg(gi, "    owner = %ld\n",
-		   (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
+	if (gh->gh_owner_pid) {
+		print_dbg(gi, "    owner = %ld ", (long)gh->gh_owner_pid);
+		gh_owner = find_task_by_pid(gh->gh_owner_pid);
+		if (gh_owner)
+			print_dbg(gi, "(%s)\n", gh_owner->comm);
+		else
+			print_dbg(gi, "(ended)\n");
+	} else
+		print_dbg(gi, "    owner = -1\n");
 	print_dbg(gi, "    gh_state = %u\n", gh->gh_state);
 	print_dbg(gi, "    gh_flags =");
 	for (x = 0; x < 32; x++)
@@ -1784,10 +1812,7 @@
 		if (test_bit(x, &gh->gh_iflags))
 			print_dbg(gi, " %u", x);
 	print_dbg(gi, " \n");
-	if (gi)
-		print_dbg(gi, "    initialized at: 0x%x\n", gh->gh_ip);
-	else
-		print_symbol(KERN_INFO "    initialized at: %s\n", gh->gh_ip);
+        gfs2_print_symbol(gi, "    initialized at: %s\n", gh->gh_ip);
 
 	return 0;
 }
@@ -1828,6 +1853,7 @@
 	struct gfs2_holder *gh;
 	unsigned int x;
 	int error = -ENOBUFS;
+	struct task_struct *gl_owner;
 
 	spin_lock(&gl->gl_spin);
 
@@ -1838,10 +1864,21 @@
 		if (test_bit(x, &gl->gl_flags))
 			print_dbg(gi, " %u", x);
 	}
+	if (!test_bit(GLF_LOCK, &gl->gl_flags))
+		print_dbg(gi, " (unlocked)");
 	print_dbg(gi, " \n");
 	print_dbg(gi, "  gl_ref = %d\n", atomic_read(&gl->gl_ref));
 	print_dbg(gi, "  gl_state = %u\n", gl->gl_state);
-	print_dbg(gi, "  gl_owner = %s\n", gl->gl_owner->comm);
+	if (gl->gl_owner_pid) {
+		gl_owner = find_task_by_pid(gl->gl_owner_pid);
+		if (gl_owner)
+			print_dbg(gi, "  gl_owner = pid %d (%s)\n",
+				  gl->gl_owner_pid, gl_owner->comm);
+		else
+			print_dbg(gi, "  gl_owner = %d (ended)\n",
+				  gl->gl_owner_pid);
+	} else
+		print_dbg(gi, "  gl_owner = -1\n");
 	print_dbg(gi, "  gl_ip = %lu\n", gl->gl_ip);
 	print_dbg(gi, "  req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
 	print_dbg(gi, "  req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 5e662ea..11477ca 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -38,7 +38,7 @@
 	/* Look in glock's list of holders for one with current task as owner */
 	spin_lock(&gl->gl_spin);
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
-		if (gh->gh_owner == current) {
+		if (gh->gh_owner_pid == current->pid) {
 			locked = 1;
 			break;
 		}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 9c12582..fdf0470 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -127,7 +127,7 @@
 	struct list_head gh_list;
 
 	struct gfs2_glock *gh_gl;
-	struct task_struct *gh_owner;
+	pid_t gh_owner_pid;
 	unsigned int gh_state;
 	unsigned gh_flags;
 
@@ -155,7 +155,7 @@
 	unsigned int gl_hash;
 	unsigned int gl_demote_state; /* state requested by remote node */
 	unsigned long gl_demote_time; /* time of first demote request */
-	struct task_struct *gl_owner;
+	pid_t gl_owner_pid;
 	unsigned long gl_ip;
 	struct list_head gl_holders;
 	struct list_head gl_waiters1;	/* HIF_MUTEX */