tcp: do not assume TCP code is non preemptible
We want to to make TCP stack preemptible, as draining prequeue
and backlog queues can take lot of time.
Many SNMP updates were assuming that BH (and preemption) was disabled.
Need to convert some __NET_INC_STATS() calls to NET_INC_STATS()
and some __TCP_INC_STATS() to TCP_INC_STATS()
Before using this_cpu_ptr(net->ipv4.tcp_sk) in tcp_v4_send_reset()
and tcp_v4_send_ack(), we add an explicit preempt disabled section.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1fb19c9..ac85fb4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -869,7 +869,7 @@
else
mib_idx = LINUX_MIB_TCPSACKREORDER;
- __NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -1062,7 +1062,7 @@
if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
dup_sack = true;
tcp_dsack_seen(tp);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
} else if (num_sacks > 1) {
u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
@@ -1071,7 +1071,7 @@
!before(start_seq_0, start_seq_1)) {
dup_sack = true;
tcp_dsack_seen(tp);
- __NET_INC_STATS(sock_net(sk),
+ NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPDSACKOFORECV);
}
}
@@ -1289,7 +1289,7 @@
if (skb->len > 0) {
BUG_ON(!tcp_skb_pcount(skb));
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTED);
return false;
}
@@ -1314,7 +1314,7 @@
tcp_unlink_write_queue(skb, sk);
sk_wmem_free_skb(sk, skb);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
return true;
}
@@ -1473,7 +1473,7 @@
return skb;
fallback:
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK);
return NULL;
}
@@ -1661,7 +1661,7 @@
mib_idx = LINUX_MIB_TCPSACKDISCARD;
}
- __NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
if (i == 0)
first_sack_index = -1;
continue;
@@ -1913,7 +1913,7 @@
skb = tcp_write_queue_head(sk);
is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
if (is_reneg) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tp->sacked_out = 0;
tp->fackets_out = 0;
}
@@ -2399,7 +2399,7 @@
else
mib_idx = LINUX_MIB_TCPFULLUNDO;
- __NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
/* Hold old state until something *above* high_seq
@@ -2421,7 +2421,7 @@
if (tp->undo_marker && !tp->undo_retrans) {
DBGUNDO(sk, "D-SACK");
tcp_undo_cwnd_reduction(sk, false);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
return true;
}
return false;
@@ -2436,9 +2436,9 @@
tcp_undo_cwnd_reduction(sk, true);
DBGUNDO(sk, "partial loss");
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
if (frto_undo)
- __NET_INC_STATS(sock_net(sk),
+ NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
if (frto_undo || tcp_is_sack(tp))
@@ -2563,7 +2563,7 @@
icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
icsk->icsk_mtup.probe_size = 0;
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
}
static void tcp_mtup_probe_success(struct sock *sk)
@@ -2583,7 +2583,7 @@
icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
icsk->icsk_mtup.probe_size = 0;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
}
/* Do a simple retransmit without using the backoff mechanisms in
@@ -2647,7 +2647,7 @@
else
mib_idx = LINUX_MIB_TCPSACKRECOVERY;
- __NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
tp->prior_ssthresh = 0;
tcp_init_undo(tp);
@@ -2740,7 +2740,7 @@
DBGUNDO(sk, "partial recovery");
tcp_undo_cwnd_reduction(sk, true);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
tcp_try_keep_open(sk);
return true;
}
@@ -3434,7 +3434,7 @@
s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
- __NET_INC_STATS(net, mib_idx);
+ NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
}
@@ -3467,7 +3467,7 @@
challenge_count = 0;
}
if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
tcp_send_ack(sk);
}
}
@@ -3516,7 +3516,7 @@
tcp_set_ca_state(sk, TCP_CA_CWR);
tcp_end_cwnd_reduction(sk);
tcp_try_keep_open(sk);
- __NET_INC_STATS(sock_net(sk),
+ NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPLOSSPROBERECOVERY);
} else if (!(flag & (FLAG_SND_UNA_ADVANCED |
FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
@@ -3621,14 +3621,14 @@
tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
u32 ack_ev_flags = CA_ACK_SLOWPATH;
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
@@ -4131,7 +4131,7 @@
else
mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
- __NET_INC_STATS(sock_net(sk), mib_idx);
+ NET_INC_STATS(sock_net(sk), mib_idx);
tp->rx_opt.dsack = 1;
tp->duplicate_sack[0].start_seq = seq;
@@ -4155,7 +4155,7 @@
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
@@ -4305,7 +4305,7 @@
atomic_add(delta, &sk->sk_rmem_alloc);
sk_mem_charge(sk, delta);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
@@ -4393,7 +4393,7 @@
tcp_ecn_check_ce(tp, skb);
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
tcp_drop(sk, skb);
return;
}
@@ -4402,7 +4402,7 @@
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
@@ -4457,7 +4457,7 @@
if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
/* All the bits are present. Drop. */
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
tcp_drop(sk, skb);
skb = NULL;
tcp_dsack_set(sk, seq, end_seq);
@@ -4496,7 +4496,7 @@
__skb_unlink(skb1, &tp->out_of_order_queue);
tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
tcp_drop(sk, skb1);
}
@@ -4661,7 +4661,7 @@
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
/* A retransmit, 2nd most common case. Force an immediate ack. */
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
out_of_window:
@@ -4707,7 +4707,7 @@
__skb_unlink(skb, list);
__kfree_skb(skb);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
return next;
}
@@ -4866,7 +4866,7 @@
bool res = false;
if (!skb_queue_empty(&tp->out_of_order_queue)) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
__skb_queue_purge(&tp->out_of_order_queue);
/* Reset SACK state. A conforming SACK implementation will
@@ -4895,7 +4895,7 @@
SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
@@ -4925,7 +4925,7 @@
* drop receive data on the floor. It will get retransmitted
* and hopefully then we'll have sufficient space.
*/
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
/* Massive buffer overcommit. */
tp->pred_flags = 0;
@@ -5184,7 +5184,7 @@
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
if (!tcp_oow_rate_limited(sock_net(sk), skb,
LINUX_MIB_TCPACKSKIPPEDPAWS,
&tp->last_oow_ack_time))
@@ -5236,8 +5236,8 @@
if (th->syn) {
syn_challenge:
if (syn_inerr)
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
tcp_send_challenge_ack(sk, skb);
goto discard;
}
@@ -5352,7 +5352,7 @@
tcp_data_snd_check(sk);
return;
} else { /* Header too small */
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
} else {
@@ -5380,7 +5380,7 @@
__skb_pull(skb, tcp_header_len);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
- __NET_INC_STATS(sock_net(sk),
+ NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHPHITSTOUSER);
eaten = 1;
}
@@ -5403,7 +5403,7 @@
tcp_rcv_rtt_measure_ts(sk, skb);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
@@ -5460,8 +5460,8 @@
return;
csum_error:
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
discard:
tcp_drop(sk, skb);
@@ -5553,13 +5553,13 @@
break;
}
tcp_rearm_rto(sk);
- __NET_INC_STATS(sock_net(sk),
+ NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVEFAIL);
return true;
}
tp->syn_data_acked = tp->syn_data;
if (tp->syn_data_acked)
- __NET_INC_STATS(sock_net(sk),
+ NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPFASTOPENACTIVE);
tcp_fastopen_add_skb(sk, synack);
@@ -5595,7 +5595,7 @@
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
- __NET_INC_STATS(sock_net(sk),
+ NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
@@ -5965,7 +5965,7 @@
(TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
tcp_done(sk);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
}
@@ -6022,7 +6022,7 @@
if (sk->sk_shutdown & RCV_SHUTDOWN) {
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
return 1;
}
@@ -6224,7 +6224,7 @@
* timeout.
*/
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
goto drop;
}
@@ -6271,7 +6271,7 @@
if (dst && strict &&
!tcp_peer_is_proven(req, dst, true,
tmp_opt.saw_tstamp)) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
}