libceph: store timeouts in jiffies, verify user input
There are currently three libceph-level timeouts that the user can
specify on mount: mount_timeout, osd_idle_ttl and osdkeepalive. All of
these are in seconds and no checking is done on user input: negative
values are accepted, we multiply them all by HZ which may or may not
overflow, arbitrarily large jiffies then get added together, etc.
There is also a bug in the way mount_timeout=0 is handled. It's
supposed to mean "infinite timeout", but that's not how wait.h APIs
treat it and so __ceph_open_session() for example will busy loop
without much chance of being interrupted if none of ceph-mons are
there.
Fix all this by verifying user input, storing timeouts capped by
msecs_to_jiffies() in jiffies and using the new ceph_timeout_jiffies()
helper for all user-specified waits to handle infinite timeouts
correctly.
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Alex Elder <elder@linaro.org>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 349115a..992683b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4963,8 +4963,8 @@
*/
static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
{
+ struct ceph_options *opts = rbdc->client->options;
u64 newest_epoch;
- unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
int tries = 0;
int ret;
@@ -4979,7 +4979,8 @@
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
ceph_monc_request_next_osdmap(&rbdc->client->monc);
(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
- newest_epoch, timeout);
+ newest_epoch,
+ opts->mount_timeout);
goto again;
} else {
/* the osdmap we have is new enough */
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 4248307..173dd4b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1259,8 +1259,8 @@
inode, req->r_tid, last_tid);
if (req->r_timeout) {
unsigned long time_left = wait_for_completion_timeout(
- &req->r_safe_completion,
- req->r_timeout);
+ &req->r_safe_completion,
+ ceph_timeout_jiffies(req->r_timeout));
if (time_left > 0)
ret = 0;
else
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 69a36f4..0b0e0a9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2268,7 +2268,8 @@
dout("do_request waiting\n");
if (req->r_timeout) {
err = (long)wait_for_completion_killable_timeout(
- &req->r_completion, req->r_timeout);
+ &req->r_completion,
+ ceph_timeout_jiffies(req->r_timeout));
if (err == 0)
err = -EIO;
} else if (req->r_wait_for_completion) {
@@ -3424,8 +3425,8 @@
*/
static void wait_requests(struct ceph_mds_client *mdsc)
{
+ struct ceph_options *opts = mdsc->fsc->client->options;
struct ceph_mds_request *req;
- struct ceph_fs_client *fsc = mdsc->fsc;
mutex_lock(&mdsc->mutex);
if (__get_oldest_req(mdsc)) {
@@ -3433,7 +3434,7 @@
dout("wait_requests waiting for requests\n");
wait_for_completion_timeout(&mdsc->safe_umount_waiters,
- fsc->client->options->mount_timeout * HZ);
+ ceph_timeout_jiffies(opts->mount_timeout));
/* tear down remaining requests */
mutex_lock(&mdsc->mutex);
@@ -3556,10 +3557,9 @@
*/
void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
{
+ struct ceph_options *opts = mdsc->fsc->client->options;
struct ceph_mds_session *session;
int i;
- struct ceph_fs_client *fsc = mdsc->fsc;
- unsigned long timeout = fsc->client->options->mount_timeout * HZ;
dout("close_sessions\n");
@@ -3580,7 +3580,7 @@
dout("waiting for sessions to close\n");
wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc),
- timeout);
+ ceph_timeout_jiffies(opts->mount_timeout));
/* tear down remaining sessions */
mutex_lock(&mdsc->mutex);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 2ef7999..509d682 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -227,7 +227,7 @@
int r_err;
bool r_aborted;
- unsigned long r_timeout; /* optional. jiffies */
+ unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */
unsigned long r_started; /* start time to measure timeout against */
unsigned long r_request_started; /* start time for mds request only,
used to measure lease durations */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9a53500..edeb83c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -742,7 +742,7 @@
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
req->r_started = started;
- req->r_timeout = fsc->client->options->mount_timeout * HZ;
+ req->r_timeout = fsc->client->options->mount_timeout;
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 85ae9a8..d73a569 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -43,9 +43,9 @@
int flags;
struct ceph_fsid fsid;
struct ceph_entity_addr my_addr;
- int mount_timeout;
- int osd_idle_ttl;
- int osd_keepalive_timeout;
+ unsigned long mount_timeout; /* jiffies */
+ unsigned long osd_idle_ttl; /* jiffies */
+ unsigned long osd_keepalive_timeout; /* jiffies */
/*
* any type that can't be simply compared or doesn't need need
@@ -63,9 +63,9 @@
/*
* defaults
*/
-#define CEPH_MOUNT_TIMEOUT_DEFAULT 60
-#define CEPH_OSD_KEEPALIVE_DEFAULT 5
-#define CEPH_OSD_IDLE_TTL_DEFAULT 60
+#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
+#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
@@ -93,6 +93,11 @@
CEPH_MOUNT_SHUTDOWN,
};
+static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
+{
+ return timeout ?: MAX_SCHEDULE_TIMEOUT;
+}
+
struct ceph_mds_client;
/*
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 79e8f71..a80e91c 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -352,8 +352,8 @@
/* start with defaults */
opt->flags = CEPH_OPT_DEFAULT;
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
- opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
- opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */
+ opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
+ opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
/* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */
@@ -439,13 +439,32 @@
pr_warn("ignoring deprecated osdtimeout option\n");
break;
case Opt_osdkeepalivetimeout:
- opt->osd_keepalive_timeout = intval;
+ /* 0 isn't well defined right now, reject it */
+ if (intval < 1 || intval > INT_MAX / 1000) {
+ pr_err("osdkeepalive out of range\n");
+ err = -EINVAL;
+ goto out;
+ }
+ opt->osd_keepalive_timeout =
+ msecs_to_jiffies(intval * 1000);
break;
case Opt_osd_idle_ttl:
- opt->osd_idle_ttl = intval;
+ /* 0 isn't well defined right now, reject it */
+ if (intval < 1 || intval > INT_MAX / 1000) {
+ pr_err("osd_idle_ttl out of range\n");
+ err = -EINVAL;
+ goto out;
+ }
+ opt->osd_idle_ttl = msecs_to_jiffies(intval * 1000);
break;
case Opt_mount_timeout:
- opt->mount_timeout = intval;
+ /* 0 is "wait forever" (i.e. infinite timeout) */
+ if (intval < 0 || intval > INT_MAX / 1000) {
+ pr_err("mount_timeout out of range\n");
+ err = -EINVAL;
+ goto out;
+ }
+ opt->mount_timeout = msecs_to_jiffies(intval * 1000);
break;
case Opt_share:
@@ -512,12 +531,14 @@
seq_puts(m, "notcp_nodelay,");
if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
- seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
+ seq_printf(m, "mount_timeout=%d,",
+ jiffies_to_msecs(opt->mount_timeout) / 1000);
if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
- seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
+ seq_printf(m, "osd_idle_ttl=%d,",
+ jiffies_to_msecs(opt->osd_idle_ttl) / 1000);
if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
seq_printf(m, "osdkeepalivetimeout=%d,",
- opt->osd_keepalive_timeout);
+ jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
/* drop redundant comma */
if (m->count != pos)
@@ -627,7 +648,7 @@
int __ceph_open_session(struct ceph_client *client, unsigned long started)
{
int err;
- unsigned long timeout = client->options->mount_timeout * HZ;
+ unsigned long timeout = client->options->mount_timeout;
/* open session, and wait for mon and osd maps */
err = ceph_monc_open_session(&client->monc);
@@ -643,7 +664,7 @@
dout("mount waiting for mon_map\n");
err = wait_event_interruptible_timeout(client->auth_wq,
have_mon_and_osd_map(client) || (client->auth_err < 0),
- timeout);
+ ceph_timeout_jiffies(timeout));
if (err == -EINTR || err == -ERESTARTSYS)
return err;
if (client->auth_err < 0)
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2b3cf05..0da3bdc 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -298,6 +298,12 @@
}
EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+/*
+ * Wait for an osdmap with a given epoch.
+ *
+ * @epoch: epoch to wait for
+ * @timeout: in jiffies, 0 means "wait forever"
+ */
int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
unsigned long timeout)
{
@@ -308,11 +314,12 @@
while (monc->have_osdmap < epoch) {
mutex_unlock(&monc->mutex);
- if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+ if (timeout && time_after_eq(jiffies, started + timeout))
return -ETIMEDOUT;
ret = wait_event_interruptible_timeout(monc->client->auth_wq,
- monc->have_osdmap >= epoch, timeout);
+ monc->have_osdmap >= epoch,
+ ceph_timeout_jiffies(timeout));
if (ret < 0)
return ret;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 4cb4fab..5003367 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1097,7 +1097,7 @@
BUG_ON(!list_empty(&osd->o_osd_lru));
list_add_tail(&osd->o_osd_lru, &osdc->osd_lru);
- osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl * HZ;
+ osd->lru_ttl = jiffies + osdc->client->options->osd_idle_ttl;
}
static void maybe_move_osd_to_lru(struct ceph_osd_client *osdc,
@@ -1208,7 +1208,7 @@
static void __schedule_osd_timeout(struct ceph_osd_client *osdc)
{
schedule_delayed_work(&osdc->timeout_work,
- osdc->client->options->osd_keepalive_timeout * HZ);
+ osdc->client->options->osd_keepalive_timeout);
}
static void __cancel_osd_timeout(struct ceph_osd_client *osdc)
@@ -1576,10 +1576,9 @@
{
struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client, timeout_work.work);
+ struct ceph_options *opts = osdc->client->options;
struct ceph_osd_request *req;
struct ceph_osd *osd;
- unsigned long keepalive =
- osdc->client->options->osd_keepalive_timeout * HZ;
struct list_head slow_osds;
dout("timeout\n");
down_read(&osdc->map_sem);
@@ -1595,7 +1594,8 @@
*/
INIT_LIST_HEAD(&slow_osds);
list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) {
- if (time_before(jiffies, req->r_stamp + keepalive))
+ if (time_before(jiffies,
+ req->r_stamp + opts->osd_keepalive_timeout))
break;
osd = req->r_osd;
@@ -1622,8 +1622,7 @@
struct ceph_osd_client *osdc =
container_of(work, struct ceph_osd_client,
osds_timeout_work.work);
- unsigned long delay =
- osdc->client->options->osd_idle_ttl * HZ >> 2;
+ unsigned long delay = osdc->client->options->osd_idle_ttl / 4;
dout("osds timeout\n");
down_read(&osdc->map_sem);
@@ -2628,7 +2627,7 @@
osdc->event_count = 0;
schedule_delayed_work(&osdc->osds_timeout_work,
- round_jiffies_relative(osdc->client->options->osd_idle_ttl * HZ));
+ round_jiffies_relative(osdc->client->options->osd_idle_ttl));
err = -ENOMEM;
osdc->req_mempool = mempool_create_kmalloc_pool(10,