ceph: reset osd after relevant messages timed out

This simplifies the process of timing out messages. We
keep lru of current messages that are in flight. If a
timeout has passed, we reset the osd connection, so that
messages will be retransmitted.  This is a failsafe in case
we hit some sort of problem sending out message to the OSD.
Normally, we'll get notification via an updated osdmap if
there are problems.

If a request is older than the keepalive timeout, send a
keepalive to ensure we detect any breaks in the TCP connection.

Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index f256eba..1b1a3ca 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -36,12 +36,15 @@
 	void *o_authorizer_buf, *o_authorizer_reply_buf;
 	size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
 	unsigned long lru_ttl;
+	int o_marked_for_keepalive;
+	struct list_head o_keepalive_item;
 };
 
 /* an in-flight request */
 struct ceph_osd_request {
 	u64             r_tid;              /* unique for this client */
 	struct rb_node  r_node;
+	struct list_head r_req_lru_item;
 	struct list_head r_osd_item;
 	struct ceph_osd *r_osd;
 	struct ceph_pg   r_pgid;
@@ -67,7 +70,7 @@
 
 	char              r_oid[40];          /* object name */
 	int               r_oid_len;
-	unsigned long     r_timeout_stamp;
+	unsigned long     r_sent_stamp;
 	bool              r_resend;           /* msg send failed, needs retry */
 
 	struct ceph_file_layout r_file_layout;
@@ -92,6 +95,7 @@
 	u64                    timeout_tid;   /* tid of timeout triggering rq */
 	u64                    last_tid;      /* tid of last request */
 	struct rb_root         requests;      /* pending requests */
+	struct list_head       req_lru;	      /* pending requests lru */
 	int                    num_requests;
 	struct delayed_work    timeout_work;
 	struct delayed_work    osds_timeout_work;