blob: 557363cde58abb87aec526c935323dfea9eb9229 [file] [log] [blame]
Yuchung Cheng659a8ad2015-10-16 21:57:46 -07001#include <linux/tcp.h>
2#include <net/tcp.h>
3
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -07004int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOST_RETRANS;
5
Yuchung Chengdb8da6b2017-01-12 22:11:30 -08006static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
7{
8 struct tcp_sock *tp = tcp_sk(sk);
9
10 tcp_skb_mark_lost_uncond_verify(tp, skb);
11 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
12 /* Account for retransmits that are lost again */
13 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
14 tp->retrans_out -= tcp_skb_pcount(skb);
15 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
16 }
17}
18
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070019/* Marks a packet lost, if some packet sent later has been (s)acked.
20 * The underlying idea is similar to the traditional dupthresh and FACK
21 * but they look at different metrics:
22 *
23 * dupthresh: 3 OOO packets delivered (packet count)
24 * FACK: sequence delta to highest sacked sequence (sequence space)
25 * RACK: sent time delta to the latest delivered packet (time domain)
26 *
27 * The advantage of RACK is it applies to both original and retransmitted
28 * packet and therefore is robust against tail losses. Another advantage
29 * is being more resilient to reordering by simply allowing some
30 * "settling delay", instead of tweaking the dupthresh.
31 *
32 * The current version is only used after recovery starts but can be
33 * easily extended to detect the first loss.
34 */
Yuchung Chengdeed7be2017-01-12 22:11:32 -080035static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now)
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070036{
37 struct tcp_sock *tp = tcp_sk(sk);
38 struct sk_buff *skb;
Yuchung Chenge636f8b2017-01-12 22:11:31 -080039 u32 reo_wnd;
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070040
41 /* To be more reordering resilient, allow min_rtt/4 settling delay
42 * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed
43 * RTT because reordering is often a path property and less related
44 * to queuing or delayed ACKs.
45 *
46 * TODO: measure and adapt to the observed reordering delay, and
47 * use a timer to retransmit like the delayed early retransmit.
48 */
49 reo_wnd = 1000;
50 if (tp->rack.reord && tcp_min_rtt(tp) != ~0U)
51 reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
52
53 tcp_for_write_queue(skb, sk) {
54 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
55
56 if (skb == tcp_send_head(sk))
57 break;
58
59 /* Skip ones already (s)acked */
60 if (!after(scb->end_seq, tp->snd_una) ||
61 scb->sacked & TCPCB_SACKED_ACKED)
62 continue;
63
64 if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {
Yuchung Chengdeed7be2017-01-12 22:11:32 -080065 /* Step 3 in draft-cheng-tcpm-rack-00.txt:
66 * A packet is lost if its elapsed time is beyond
67 * the recent RTT plus the reordering window.
68 */
69 if (skb_mstamp_us_delta(now, &skb->skb_mstamp) >
70 tp->rack.rtt_us + reo_wnd) {
71 tcp_rack_mark_skb_lost(sk, skb);
72 }
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070073 } else if (!(scb->sacked & TCPCB_RETRANS)) {
74 /* Original data are sent sequentially so stop early
75 * b/c the rest are all sent after rack_sent
76 */
77 break;
78 }
79 }
Yuchung Chenge636f8b2017-01-12 22:11:31 -080080}
81
Yuchung Chengdeed7be2017-01-12 22:11:32 -080082void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
Yuchung Chenge636f8b2017-01-12 22:11:31 -080083{
84 struct tcp_sock *tp = tcp_sk(sk);
85
86 if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced)
87 return;
88 /* Reset the advanced flag to avoid unnecessary queue scanning */
89 tp->rack.advanced = 0;
Yuchung Chengdeed7be2017-01-12 22:11:32 -080090 tcp_rack_detect_loss(sk, now);
Yuchung Cheng4f41b1c2015-10-16 21:57:47 -070091}
92
Yuchung Chengdeed7be2017-01-12 22:11:32 -080093/* Record the most recently (re)sent time among the (s)acked packets
94 * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
95 * draft-cheng-tcpm-rack-00.txt
96 */
97void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
98 const struct skb_mstamp *xmit_time,
99 const struct skb_mstamp *ack_time)
Yuchung Cheng659a8ad2015-10-16 21:57:46 -0700100{
Yuchung Chengdeed7be2017-01-12 22:11:32 -0800101 u32 rtt_us;
102
Yuchung Cheng659a8ad2015-10-16 21:57:46 -0700103 if (tp->rack.mstamp.v64 &&
104 !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
105 return;
106
Yuchung Chengdeed7be2017-01-12 22:11:32 -0800107 rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
Yuchung Cheng659a8ad2015-10-16 21:57:46 -0700108 if (sacked & TCPCB_RETRANS) {
Yuchung Cheng659a8ad2015-10-16 21:57:46 -0700109 /* If the sacked packet was retransmitted, it's ambiguous
110 * whether the retransmission or the original (or the prior
111 * retransmission) was sacked.
112 *
113 * If the original is lost, there is no ambiguity. Otherwise
114 * we assume the original can be delayed up to aRTT + min_rtt.
115 * the aRTT term is bounded by the fast recovery or timeout,
116 * so it's at least one RTT (i.e., retransmission is at least
117 * an RTT later).
118 */
Yuchung Chengdeed7be2017-01-12 22:11:32 -0800119 if (rtt_us < tcp_min_rtt(tp))
Yuchung Cheng659a8ad2015-10-16 21:57:46 -0700120 return;
121 }
Yuchung Chengdeed7be2017-01-12 22:11:32 -0800122 tp->rack.rtt_us = rtt_us;
Yuchung Cheng659a8ad2015-10-16 21:57:46 -0700123 tp->rack.mstamp = *xmit_time;
124 tp->rack.advanced = 1;
125}