blob: 7ea0377229c0f65c1b2b3b08189eac7f3ada99b8 [file] [log] [blame]
Yuchung Cheng659a8ad2015-10-16 21:57:46 -07001#include <linux/tcp.h>
2#include <net/tcp.h>
3
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -07004int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOST_RETRANS;
5
Yuchung Chengdb8da6b2017-01-12 22:11:30 -08006static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
7{
8 struct tcp_sock *tp = tcp_sk(sk);
9
10 tcp_skb_mark_lost_uncond_verify(tp, skb);
11 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
12 /* Account for retransmits that are lost again */
13 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
14 tp->retrans_out -= tcp_skb_pcount(skb);
15 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
16 }
17}
18
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070019/* Marks a packet lost, if some packet sent later has been (s)acked.
20 * The underlying idea is similar to the traditional dupthresh and FACK
21 * but they look at different metrics:
22 *
23 * dupthresh: 3 OOO packets delivered (packet count)
24 * FACK: sequence delta to highest sacked sequence (sequence space)
25 * RACK: sent time delta to the latest delivered packet (time domain)
26 *
27 * The advantage of RACK is it applies to both original and retransmitted
28 * packet and therefore is robust against tail losses. Another advantage
29 * is being more resilient to reordering by simply allowing some
30 * "settling delay", instead of tweaking the dupthresh.
31 *
32 * The current version is only used after recovery starts but can be
33 * easily extended to detect the first loss.
34 */
Yuchung Chenge636f8b2017-01-12 22:11:31 -080035static void tcp_rack_detect_loss(struct sock *sk)
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070036{
37 struct tcp_sock *tp = tcp_sk(sk);
38 struct sk_buff *skb;
Yuchung Chenge636f8b2017-01-12 22:11:31 -080039 u32 reo_wnd;
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070040
41 /* To be more reordering resilient, allow min_rtt/4 settling delay
42 * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed
43 * RTT because reordering is often a path property and less related
44 * to queuing or delayed ACKs.
45 *
46 * TODO: measure and adapt to the observed reordering delay, and
47 * use a timer to retransmit like the delayed early retransmit.
48 */
49 reo_wnd = 1000;
50 if (tp->rack.reord && tcp_min_rtt(tp) != ~0U)
51 reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
52
53 tcp_for_write_queue(skb, sk) {
54 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
55
56 if (skb == tcp_send_head(sk))
57 break;
58
59 /* Skip ones already (s)acked */
60 if (!after(scb->end_seq, tp->snd_una) ||
61 scb->sacked & TCPCB_SACKED_ACKED)
62 continue;
63
64 if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {
65
66 if (skb_mstamp_us_delta(&tp->rack.mstamp,
67 &skb->skb_mstamp) <= reo_wnd)
68 continue;
69
70 /* skb is lost if packet sent later is sacked */
Yuchung Chengdb8da6b2017-01-12 22:11:30 -080071 tcp_rack_mark_skb_lost(sk, skb);
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070072 } else if (!(scb->sacked & TCPCB_RETRANS)) {
73 /* Original data are sent sequentially so stop early
74 * b/c the rest are all sent after rack_sent
75 */
76 break;
77 }
78 }
Yuchung Chenge636f8b2017-01-12 22:11:31 -080079}
80
81void tcp_rack_mark_lost(struct sock *sk)
82{
83 struct tcp_sock *tp = tcp_sk(sk);
84
85 if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced)
86 return;
87 /* Reset the advanced flag to avoid unnecessary queue scanning */
88 tp->rack.advanced = 0;
89 tcp_rack_detect_loss(sk);
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070090}
91
Yuchung Cheng659a8ad2015-10-16 21:57:46 -070092/* Record the most recently (re)sent time among the (s)acked packets */
93void tcp_rack_advance(struct tcp_sock *tp,
94 const struct skb_mstamp *xmit_time, u8 sacked)
95{
96 if (tp->rack.mstamp.v64 &&
97 !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
98 return;
99
100 if (sacked & TCPCB_RETRANS) {
101 struct skb_mstamp now;
102
103 /* If the sacked packet was retransmitted, it's ambiguous
104 * whether the retransmission or the original (or the prior
105 * retransmission) was sacked.
106 *
107 * If the original is lost, there is no ambiguity. Otherwise
108 * we assume the original can be delayed up to aRTT + min_rtt.
109 * the aRTT term is bounded by the fast recovery or timeout,
110 * so it's at least one RTT (i.e., retransmission is at least
111 * an RTT later).
112 */
113 skb_mstamp_get(&now);
114 if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp))
115 return;
116 }
117
118 tp->rack.mstamp = *xmit_time;
119 tp->rack.advanced = 1;
120}