ceph: drop messages on unregistered mds sessions; cleanup

Verify the mds session is currently registered before handling
incoming messages.  Clean up message handlers to pull mds out
of session->s_mds instead of less trustworthy src field.

Clean up con_{get,put} debug output.

Signed-off-by: Sage Weil <sage@newdream.net>
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b6154ff..bb84616 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2600,7 +2600,7 @@
 	struct inode *inode;
 	struct ceph_cap *cap;
 	struct ceph_mds_caps *h;
-	int mds = le64_to_cpu(msg->hdr.src.name.num);
+	int mds = session->s_mds;
 	int op;
 	u32 seq;
 	struct ceph_vino vino;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 124c0c1..4d00ea2 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -309,6 +309,15 @@
 	return mdsc->sessions[mds];
 }
 
+static int __verify_registered_session(struct ceph_mds_client *mdsc,
+				       struct ceph_mds_session *s)
+{
+	if (s->s_mds >= mdsc->max_sessions ||
+	    mdsc->sessions[s->s_mds] != s)
+		return -ENOENT;
+	return 0;
+}
+
 /*
  * create+register a new session for given mds.
  * called under mdsc->mutex.
@@ -382,10 +391,11 @@
 /*
  * called under mdsc->mutex
  */
-static void unregister_session(struct ceph_mds_client *mdsc,
+static void __unregister_session(struct ceph_mds_client *mdsc,
 			       struct ceph_mds_session *s)
 {
-	dout("unregister_session mds%d %p\n", s->s_mds, s);
+	dout("__unregister_session mds%d %p\n", s->s_mds, s);
+	BUG_ON(mdsc->sessions[s->s_mds] != s);
 	mdsc->sessions[s->s_mds] = NULL;
 	ceph_con_close(&s->s_con);
 	ceph_put_mds_session(s);
@@ -1740,10 +1750,8 @@
 	struct ceph_mds_reply_info_parsed *rinfo;  /* parsed reply info */
 	u64 tid;
 	int err, result;
-	int mds;
+	int mds = session->s_mds;
 
-	if (msg->hdr.src.name.type != CEPH_ENTITY_TYPE_MDS)
-		return;
 	if (msg->front.iov_len < sizeof(*head)) {
 		pr_err("mdsc_handle_reply got corrupt (short) reply\n");
 		ceph_msg_dump(msg);
@@ -1760,7 +1768,6 @@
 		return;
 	}
 	dout("handle_reply %p\n", req);
-	mds = le64_to_cpu(msg->hdr.src.name.num);
 
 	/* correct session? */
 	if (!req->r_session && req->r_session != session) {
@@ -1884,7 +1891,9 @@
 /*
  * handle mds notification that our request has been forwarded.
  */
-static void handle_forward(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
+static void handle_forward(struct ceph_mds_client *mdsc,
+			   struct ceph_mds_session *session,
+			   struct ceph_msg *msg)
 {
 	struct ceph_mds_request *req;
 	u64 tid;
@@ -1894,11 +1903,7 @@
 	int err = -EINVAL;
 	void *p = msg->front.iov_base;
 	void *end = p + msg->front.iov_len;
-	int from_mds, state;
-
-	if (msg->hdr.src.name.type != CEPH_ENTITY_TYPE_MDS)
-		goto bad;
-	from_mds = le64_to_cpu(msg->hdr.src.name.num);
+	int state;
 
 	ceph_decode_need(&p, end, sizeof(u64)+2*sizeof(u32), bad);
 	tid = ceph_decode_64(&p);
@@ -1915,6 +1920,9 @@
 		goto out;  /* dup reply? */
 	}
 
+	if (next_mds >= mdsc->max_sessions)
+		goto out;
+
 	state = mdsc->sessions[next_mds]->s_state;
 	if (fwd_seq <= req->r_num_fwd) {
 		dout("forward %llu to mds%d - old seq %d <= %d\n",
@@ -1945,14 +1953,10 @@
 	struct ceph_mds_client *mdsc = session->s_mdsc;
 	u32 op;
 	u64 seq;
-	int mds;
+	int mds = session->s_mds;
 	struct ceph_mds_session_head *h = msg->front.iov_base;
 	int wake = 0;
 
-	if (msg->hdr.src.name.type != CEPH_ENTITY_TYPE_MDS)
-		return;
-	mds = le64_to_cpu(msg->hdr.src.name.num);
-
 	/* decode */
 	if (msg->front.iov_len != sizeof(*h))
 		goto bad;
@@ -1960,6 +1964,8 @@
 	seq = le64_to_cpu(h->seq);
 
 	mutex_lock(&mdsc->mutex);
+	if (op == CEPH_SESSION_CLOSE)
+		__unregister_session(mdsc, session);
 	/* FIXME: this ttl calculation is generous */
 	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
 	mutex_unlock(&mdsc->mutex);
@@ -1990,7 +1996,6 @@
 		break;
 
 	case CEPH_SESSION_CLOSE:
-		unregister_session(mdsc, session);
 		remove_session_caps(session);
 		wake = 1; /* for good measure */
 		complete(&mdsc->session_close_waiters);
@@ -2269,7 +2274,7 @@
 				/* the session never opened, just close it
 				 * out now */
 				__wake_requests(mdsc, &s->s_waiting);
-				unregister_session(mdsc, s);
+				__unregister_session(mdsc, s);
 			} else {
 				/* just close it */
 				mutex_unlock(&mdsc->mutex);
@@ -2329,24 +2334,22 @@
 	di->lease_session = NULL;
 }
 
-static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
+static void handle_lease(struct ceph_mds_client *mdsc,
+			 struct ceph_mds_session *session,
+			 struct ceph_msg *msg)
 {
 	struct super_block *sb = mdsc->client->sb;
 	struct inode *inode;
-	struct ceph_mds_session *session;
 	struct ceph_inode_info *ci;
 	struct dentry *parent, *dentry;
 	struct ceph_dentry_info *di;
-	int mds;
+	int mds = session->s_mds;
 	struct ceph_mds_lease *h = msg->front.iov_base;
 	struct ceph_vino vino;
 	int mask;
 	struct qstr dname;
 	int release = 0;
 
-	if (msg->hdr.src.name.type != CEPH_ENTITY_TYPE_MDS)
-		return;
-	mds = le64_to_cpu(msg->hdr.src.name.num);
 	dout("handle_lease from mds%d\n", mds);
 
 	/* decode */
@@ -2360,15 +2363,6 @@
 	if (dname.len != get_unaligned_le32(h+1))
 		goto bad;
 
-	/* find session */
-	mutex_lock(&mdsc->mutex);
-	session = __ceph_lookup_mds_session(mdsc, mds);
-	mutex_unlock(&mdsc->mutex);
-	if (!session) {
-		pr_err("handle_lease got lease but no session mds%d\n", mds);
-		return;
-	}
-
 	mutex_lock(&session->s_mutex);
 	session->s_seq++;
 
@@ -2437,7 +2431,6 @@
 out:
 	iput(inode);
 	mutex_unlock(&session->s_mutex);
-	ceph_put_mds_session(session);
 	return;
 
 bad:
@@ -2794,7 +2787,7 @@
 	for (i = 0; i < mdsc->max_sessions; i++) {
 		if (mdsc->sessions[i]) {
 			session = get_session(mdsc->sessions[i]);
-			unregister_session(mdsc, session);
+			__unregister_session(mdsc, session);
 			mutex_unlock(&mdsc->mutex);
 			mutex_lock(&session->s_mutex);
 			remove_session_caps(session);
@@ -2891,8 +2884,7 @@
 	struct ceph_mds_session *s = con->private;
 
 	if (get_session(s)) {
-		dout("mdsc con_get %p %d -> %d\n", s,
-		     atomic_read(&s->s_ref) - 1, atomic_read(&s->s_ref));
+		dout("mdsc con_get %p ok (%d)\n", s, atomic_read(&s->s_ref));
 		return con;
 	}
 	dout("mdsc con_get %p FAIL\n", s);
@@ -2903,9 +2895,8 @@
 {
 	struct ceph_mds_session *s = con->private;
 
-	dout("mdsc con_put %p %d -> %d\n", s, atomic_read(&s->s_ref),
-	     atomic_read(&s->s_ref) - 1);
 	ceph_put_mds_session(s);
+	dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref));
 }
 
 /*
@@ -2926,6 +2917,13 @@
 	struct ceph_mds_client *mdsc = s->s_mdsc;
 	int type = le16_to_cpu(msg->hdr.type);
 
+	mutex_lock(&mdsc->mutex);
+	if (__verify_registered_session(mdsc, s) < 0) {
+		mutex_unlock(&mdsc->mutex);
+		goto out;
+	}
+	mutex_unlock(&mdsc->mutex);
+
 	switch (type) {
 	case CEPH_MSG_MDS_MAP:
 		ceph_mdsc_handle_map(mdsc, msg);
@@ -2937,22 +2935,23 @@
 		handle_reply(s, msg);
 		break;
 	case CEPH_MSG_CLIENT_REQUEST_FORWARD:
-		handle_forward(mdsc, msg);
+		handle_forward(mdsc, s, msg);
 		break;
 	case CEPH_MSG_CLIENT_CAPS:
 		ceph_handle_caps(s, msg);
 		break;
 	case CEPH_MSG_CLIENT_SNAP:
-		ceph_handle_snap(mdsc, msg);
+		ceph_handle_snap(mdsc, s, msg);
 		break;
 	case CEPH_MSG_CLIENT_LEASE:
-		handle_lease(mdsc, msg);
+		handle_lease(mdsc, s, msg);
 		break;
 
 	default:
 		pr_err("received unknown message type %d %s\n", type,
 		       ceph_msg_type_name(type));
 	}
+out:
 	ceph_msg_put(msg);
 }
 
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 49d0c4c..bf2a5f3 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -713,11 +713,11 @@
  * directory into another realm.
  */
 void ceph_handle_snap(struct ceph_mds_client *mdsc,
+		      struct ceph_mds_session *session,
 		      struct ceph_msg *msg)
 {
 	struct super_block *sb = mdsc->client->sb;
-	struct ceph_mds_session *session;
-	int mds;
+	int mds = session->s_mds;
 	u64 split;
 	int op;
 	int trace_len;
@@ -730,10 +730,6 @@
 	int i;
 	int locked_rwsem = 0;
 
-	if (msg->hdr.src.name.type != CEPH_ENTITY_TYPE_MDS)
-		return;
-	mds = le64_to_cpu(msg->hdr.src.name.num);
-
 	/* decode */
 	if (msg->front.iov_len < sizeof(*h))
 		goto bad;
@@ -749,15 +745,6 @@
 	dout("handle_snap from mds%d op %s split %llx tracelen %d\n", mds,
 	     ceph_snap_op_name(op), split, trace_len);
 
-	/* find session */
-	mutex_lock(&mdsc->mutex);
-	session = __ceph_lookup_mds_session(mdsc, mds);
-	mutex_unlock(&mdsc->mutex);
-	if (!session) {
-		dout("WTF, got snap but no session for mds%d\n", mds);
-		return;
-	}
-
 	mutex_lock(&session->s_mutex);
 	session->s_seq++;
 	mutex_unlock(&session->s_mutex);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 384f0e2..ff7aaa3 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -707,6 +707,7 @@
 extern int ceph_update_snap_trace(struct ceph_mds_client *m,
 				  void *p, void *e, bool deletion);
 extern void ceph_handle_snap(struct ceph_mds_client *mdsc,
+			     struct ceph_mds_session *session,
 			     struct ceph_msg *msg);
 extern void ceph_queue_cap_snap(struct ceph_inode_info *ci,
 				struct ceph_snap_context *snapc);