tcp: use sequence to break TS ties for RACK loss detection
The packets inside a jumbo skb (e.g., TSO) share the same skb
timestamp, even though they are sent sequentially on the wire. Since
RACK is based on time, it can not detect some packets inside the
same skb are lost. However, we can leverage the packet sequence
numbers as extended timestamps to detect losses. Therefore, when
RACK timestamp is identical to skb's timestamp (i.e., one of the
packets of the skb is acked or sacked), we use the sequence numbers
of the acked and unacked packets to break ties.
We can use the same sequence logic to advance RACK xmit time as
well to detect more losses and avoid timeout.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index be11918..e42ca11 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1218,7 +1218,8 @@
return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) {
- tcp_rack_advance(tp, sacked, xmit_time, &state->ack_time);
+ tcp_rack_advance(tp, sacked, end_seq,
+ xmit_time, &state->ack_time);
if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost,
@@ -3171,7 +3172,7 @@
} else if (tcp_is_sack(tp)) {
tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb))
- tcp_rack_advance(tp, sacked,
+ tcp_rack_advance(tp, sacked, scb->end_seq,
&skb->skb_mstamp,
&sack->ack_time);
}
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index eb39b1b..1e330a2 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -16,6 +16,14 @@
}
}
+static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
+ const struct skb_mstamp *t2,
+ u32 seq1, u32 seq2)
+{
+ return skb_mstamp_after(t1, t2) ||
+ (t1->v64 == t2->v64 && after(seq1, seq2));
+}
+
/* Marks a packet lost, if some packet sent later has been (s)acked.
* The underlying idea is similar to the traditional dupthresh and FACK
* but they look at different metrics:
@@ -60,7 +68,8 @@
scb->sacked & TCPCB_SACKED_ACKED)
continue;
- if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {
+ if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp,
+ tp->rack.end_seq, scb->end_seq)) {
/* Step 3 in draft-cheng-tcpm-rack-00.txt:
* A packet is lost if its elapsed time is beyond
* the recent RTT plus the reordering window.
@@ -113,14 +122,15 @@
* This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
* draft-cheng-tcpm-rack-00.txt
*/
-void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
+void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
const struct skb_mstamp *xmit_time,
const struct skb_mstamp *ack_time)
{
u32 rtt_us;
if (tp->rack.mstamp.v64 &&
- !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
+ !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp,
+ end_seq, tp->rack.end_seq))
return;
rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
@@ -140,6 +150,7 @@
}
tp->rack.rtt_us = rtt_us;
tp->rack.mstamp = *xmit_time;
+ tp->rack.end_seq = end_seq;
tp->rack.advanced = 1;
}