ceph: do not confuse stale and dead (unreconnected) caps

We were using the cap_gen to track both stale caps (caps that timed out
due to temporarily losing touch with the mds) and dead caps that did not
reconnect after an MDS failure.  Introduce a recon_gen counter to track
reconnections to restarted MDSs and kill dead caps based on that instead.

Rename gen to cap_gen while we're at it to make it more clear which is
which.

Signed-off-by: Sage Weil <sage@newdream.net>
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 8b863db..775e6f6 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -609,7 +609,8 @@
 	cap->seq = seq;
 	cap->issue_seq = seq;
 	cap->mseq = mseq;
-	cap->gen = session->s_cap_gen;
+	cap->cap_gen = session->s_cap_gen;
+	cap->recon_gen = session->s_recon_gen;
 
 	if (fmode >= 0)
 		__ceph_get_fmode(ci, fmode);
@@ -626,17 +627,25 @@
 static int __cap_is_valid(struct ceph_cap *cap)
 {
 	unsigned long ttl;
-	u32 gen;
+	u32 gen, recon_gen;
 
 	spin_lock(&cap->session->s_cap_lock);
 	gen = cap->session->s_cap_gen;
+	recon_gen = cap->session->s_recon_gen;
 	ttl = cap->session->s_cap_ttl;
 	spin_unlock(&cap->session->s_cap_lock);
 
-	if (cap->gen < gen || time_after_eq(jiffies, ttl)) {
+	if (cap->recon_gen != recon_gen) {
+		dout("__cap_is_valid %p cap %p issued %s "
+		     "but DEAD (recon_gen %u vs %u)\n", &cap->ci->vfs_inode,
+		     cap, ceph_cap_string(cap->issued), cap->recon_gen,
+		     recon_gen);
+		return 0;
+	}
+	if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
 		dout("__cap_is_valid %p cap %p issued %s "
 		     "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode,
-		     cap, ceph_cap_string(cap->issued), cap->gen, gen);
+		     cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
 		return 0;
 	}
 
@@ -2203,7 +2212,8 @@
 	issued = __ceph_caps_issued(ci, &implemented);
 	issued |= implemented | __ceph_caps_dirty(ci);
 
-	cap->gen = session->s_cap_gen;
+	cap->cap_gen = session->s_cap_gen;
+	cap->recon_gen = session->s_recon_gen;
 
 	__check_cap_issue(ci, cap, newcaps);