ceph: reset osd after relevant messages timed out This simplifies the process of timing out messages. We keep lru of current messages that are in flight. If a timeout has passed, we reset the osd connection, so that messages will be retransmitted. This is a failsafe in case we hit some sort of problem sending out message to the OSD. Normally, we'll get notification via an updated osdmap if there are problems. If a request is older than the keepalive timeout, send a keepalive to ensure we detect any breaks in the TCP connection. Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net> Signed-off-by: Sage Weil <sage@newdream.net>

commit: 422d2cb8f9afadba1ecd3614f658b6daaaa480fb [log] [tgz]
author: Yehuda Sadeh <yehuda@hq.newdream.net> Fri Feb 26 15:32:31 2010 -0800
committer: Sage Weil <sage@newdream.net> Thu Mar 04 11:26:35 2010 -0800
tree: 22e1a61acdbbe1459b190c4dbb6019360464b2e9
parent: e9964c102312967a4bc1fd501cb628c4a3b19034 [diff] [blame]
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index f256eba..1b1a3ca 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h

@@ -36,12 +36,15 @@
 	void *o_authorizer_buf, *o_authorizer_reply_buf;
 	size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
 	unsigned long lru_ttl;
+	int o_marked_for_keepalive;
+	struct list_head o_keepalive_item;
 };
 
 /* an in-flight request */
 struct ceph_osd_request {
 	u64             r_tid;              /* unique for this client */
 	struct rb_node  r_node;
+	struct list_head r_req_lru_item;
 	struct list_head r_osd_item;
 	struct ceph_osd *r_osd;
 	struct ceph_pg   r_pgid;
@@ -67,7 +70,7 @@
 
 	char              r_oid[40];          /* object name */
 	int               r_oid_len;
-	unsigned long     r_timeout_stamp;
+	unsigned long     r_sent_stamp;
 	bool              r_resend;           /* msg send failed, needs retry */
 
 	struct ceph_file_layout r_file_layout;
@@ -92,6 +95,7 @@
 	u64                    timeout_tid;   /* tid of timeout triggering rq */
 	u64                    last_tid;      /* tid of last request */
 	struct rb_root         requests;      /* pending requests */
+	struct list_head       req_lru;	      /* pending requests lru */
 	int                    num_requests;
 	struct delayed_work    timeout_work;
 	struct delayed_work    osds_timeout_work;
commit	422d2cb8f9afadba1ecd3614f658b6daaaa480fb	[log] [tgz]
author	Yehuda Sadeh <yehuda@hq.newdream.net>	Fri Feb 26 15:32:31 2010 -0800
committer	Sage Weil <sage@newdream.net>	Thu Mar 04 11:26:35 2010 -0800
tree	22e1a61acdbbe1459b190c4dbb6019360464b2e9
parent	e9964c102312967a4bc1fd501cb628c4a3b19034 [diff] [blame]