xfrm: SAD entries do not expire correctly after suspend-resume
This fixes the following bug in the current implementation of
net/xfrm: SAD entries timeouts do not count the time spent by the machine
in the suspended state. This leads to the connectivity problems because
after resuming local machine thinks that the SAD entry is still valid, while
it has already been expired on the remote server.
The cause of this is very simple: the timeouts in the net/xfrm are bound to
the old mod_timer() timers. This patch reassigns them to the
CLOCK_REALTIME hrtimer.
I have been using this version of the patch for a few months on my
machines without any problems. Also run a few stress tests w/o any
issues.
This version of the patch uses tasklet_hrtimer by Peter Zijlstra
(commit 9ba5f0).
This patch is against 2.6.31.4. Please CC me.
Signed-off-by: Yury Polyanskiy <polyanskiy@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index f2f7c63..e9ac0ce 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -21,6 +21,9 @@
#include <linux/cache.h>
#include <linux/audit.h>
#include <asm/uaccess.h>
+#include <linux/ktime.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
#include "xfrm_hash.h"
@@ -352,7 +355,7 @@
static void xfrm_state_gc_destroy(struct xfrm_state *x)
{
- del_timer_sync(&x->timer);
+ tasklet_hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
kfree(x->aalg);
kfree(x->ealg);
@@ -398,9 +401,10 @@
return secs*HZ;
}
-static void xfrm_timer_handler(unsigned long data)
+static enum hrtimer_restart xfrm_timer_handler(struct hrtimer * me)
{
- struct xfrm_state *x = (struct xfrm_state*)data;
+ struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
+ struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
struct net *net = xs_net(x);
unsigned long now = get_seconds();
long next = LONG_MAX;
@@ -451,8 +455,9 @@
if (warn)
km_state_expired(x, 0, 0);
resched:
- if (next != LONG_MAX)
- mod_timer(&x->timer, jiffies + make_jiffies(next));
+ if (next != LONG_MAX){
+ tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
+ }
goto out;
@@ -474,6 +479,7 @@
out:
spin_unlock(&x->lock);
+ return HRTIMER_NORESTART;
}
static void xfrm_replay_timer_handler(unsigned long data);
@@ -492,7 +498,7 @@
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
- setup_timer(&x->timer, xfrm_timer_handler, (unsigned long)x);
+ tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, CLOCK_REALTIME, HRTIMER_MODE_ABS);
setup_timer(&x->rtimer, xfrm_replay_timer_handler,
(unsigned long)x);
x->curlft.add_time = get_seconds();
@@ -843,8 +849,7 @@
hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
}
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
- x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ;
- add_timer(&x->timer);
+ tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
net->xfrm.state_num++;
xfrm_hash_grow_check(net, x->bydst.next != NULL);
} else {
@@ -921,7 +926,7 @@
hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
}
- mod_timer(&x->timer, jiffies + HZ);
+ tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
if (x->replay_maxage)
mod_timer(&x->rtimer, jiffies + x->replay_maxage);
@@ -1019,8 +1024,7 @@
x->props.reqid = reqid;
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
xfrm_state_hold(x);
- x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ;
- add_timer(&x->timer);
+ tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
list_add(&x->km.all, &net->xfrm.state_all);
hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
h = xfrm_src_hash(net, daddr, saddr, family);
@@ -1300,7 +1304,7 @@
memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
x1->km.dying = 0;
- mod_timer(&x1->timer, jiffies + HZ);
+ tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
if (x1->curlft.use_time)
xfrm_state_check_expire(x1);
@@ -1325,7 +1329,7 @@
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {
x->km.state = XFRM_STATE_EXPIRED;
- mod_timer(&x->timer, jiffies);
+ tasklet_hrtimer_start(&x->mtimer, ktime_set(0,0), HRTIMER_MODE_REL);
return -EINVAL;
}