inet: frag: don't wait for timer deletion when evicting

Frank reports 'NMI watchdog: BUG: soft lockup' errors when
load is high.  Instead of (potentially) unbounded restarts of the
eviction process, just skip to the next entry.

One caveat is that, when a netns is exiting, a timer may still be running
by the time inet_evict_bucket returns.

We use the frag memory accounting to wait for outstanding timers,
so that when we free the percpu counter we can be sure no running
timer will trip over it.

Reported-and-tested-by: Frank Schreuder <fschreuder@transip.nl>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4473232..a00ca4c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -131,24 +131,14 @@
 	unsigned int evicted = 0;
 	HLIST_HEAD(expired);
 
-evict_again:
 	spin_lock(&hb->chain_lock);
 
 	hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
 		if (!inet_fragq_should_evict(fq))
 			continue;
 
-		if (!del_timer(&fq->timer)) {
-			/* q expiring right now thus increment its refcount so
-			 * it won't be freed under us and wait until the timer
-			 * has finished executing then destroy it
-			 */
-			atomic_inc(&fq->refcnt);
-			spin_unlock(&hb->chain_lock);
-			del_timer_sync(&fq->timer);
-			inet_frag_put(fq, f);
-			goto evict_again;
-		}
+		if (!del_timer(&fq->timer))
+			continue;
 
 		fq->flags |= INET_FRAG_EVICTED;
 		hlist_add_head(&fq->list_evictor, &expired);
@@ -239,18 +229,20 @@
 	int i;
 
 	nf->low_thresh = 0;
-	local_bh_disable();
 
 evict_again:
+	local_bh_disable();
 	seq = read_seqbegin(&f->rnd_seqlock);
 
 	for (i = 0; i < INETFRAGS_HASHSZ ; i++)
 		inet_evict_bucket(f, &f->hash[i]);
 
-	if (read_seqretry(&f->rnd_seqlock, seq))
-		goto evict_again;
-
 	local_bh_enable();
+	cond_resched();
+
+	if (read_seqretry(&f->rnd_seqlock, seq) ||
+	    percpu_counter_sum(&nf->mem))
+		goto evict_again;
 
 	percpu_counter_destroy(&nf->mem);
 }
@@ -284,6 +276,7 @@
 
 	hb = get_frag_bucket_locked(fq, f);
 	hlist_del(&fq->list);
+	fq->flags |= INET_FRAG_COMPLETE;
 	spin_unlock(&hb->chain_lock);
 }
 
@@ -295,7 +288,6 @@
 	if (!(fq->flags & INET_FRAG_COMPLETE)) {
 		fq_unlink(fq, f);
 		atomic_dec(&fq->refcnt);
-		fq->flags |= INET_FRAG_COMPLETE;
 	}
 }
 EXPORT_SYMBOL(inet_frag_kill);
@@ -328,11 +320,12 @@
 		fp = xp;
 	}
 	sum = sum_truesize + f->qsize;
-	sub_frag_mem_limit(q->net, sum);
 
 	if (f->destructor)
 		f->destructor(q);
 	kmem_cache_free(f->frags_cachep, q);
+
+	sub_frag_mem_limit(nf, sum);
 }
 EXPORT_SYMBOL(inet_frag_destroy);