[TCP]: Keep TSO enabled even during loss events. All we need to do is resegment the queue so that we record SACK information accurately. The edges of the SACK blocks guide our resegmenting decisions. With help from Herbert Xu. Signed-off-by: David S. Miller <davem@davemloft.net>

commit: 6475be16fd9b3c6746ca4d18959246b13c669ea8 [log] [tgz]
author: David S. Miller <davem@davemloft.net> Thu Sep 01 22:47:01 2005 -0700
committer: David S. Miller <davem@davemloft.net> Thu Sep 01 22:47:01 2005 -0700
tree: 03e0da36680ddb227591a4007fa4e6f18d82782c
parent: ef015786152adaff5a6a8bf0c8ea2f70cee8059d [diff]
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d6bcf13..97af77c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h

@@ -454,6 +454,7 @@
 extern void tcp_xmit_retransmit_queue(struct sock *);
 extern void tcp_simple_retransmit(struct sock *);
 extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
+extern int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int);
 
 extern void tcp_send_probe0(struct sock *);
 extern void tcp_send_partial(struct sock *);

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1afb080..29222b96 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c

@@ -923,14 +923,6 @@
 	int flag = 0;
 	int i;
 
-	/* So, SACKs for already sent large segments will be lost.
-	 * Not good, but alternative is to resegment the queue. */
-	if (sk->sk_route_caps & NETIF_F_TSO) {
-		sk->sk_route_caps &= ~NETIF_F_TSO;
-		sock_set_flag(sk, SOCK_NO_LARGESEND);
-		tp->mss_cache = tp->mss_cache;
-	}
-
 	if (!tp->sacked_out)
 		tp->fackets_out = 0;
 	prior_fackets = tp->fackets_out;
@@ -978,20 +970,40 @@
 			flag |= FLAG_DATA_LOST;
 
 		sk_stream_for_retrans_queue(skb, sk) {
-			u8 sacked = TCP_SKB_CB(skb)->sacked;
-			int in_sack;
+			int in_sack, pcount;
+			u8 sacked;
 
 			/* The retransmission queue is always in order, so
 			 * we can short-circuit the walk early.
 			 */
-			if(!before(TCP_SKB_CB(skb)->seq, end_seq))
+			if (!before(TCP_SKB_CB(skb)->seq, end_seq))
 				break;
 
-			fack_count += tcp_skb_pcount(skb);
+			pcount = tcp_skb_pcount(skb);
+
+			if (pcount > 1 &&
+			    (after(start_seq, TCP_SKB_CB(skb)->seq) ||
+			     before(end_seq, TCP_SKB_CB(skb)->end_seq))) {
+				unsigned int pkt_len;
+
+				if (after(start_seq, TCP_SKB_CB(skb)->seq))
+					pkt_len = (start_seq -
+						   TCP_SKB_CB(skb)->seq);
+				else
+					pkt_len = (end_seq -
+						   TCP_SKB_CB(skb)->seq);
+				if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size))
+					break;
+				pcount = tcp_skb_pcount(skb);
+			}
+
+			fack_count += pcount;
 
 			in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
 				!before(end_seq, TCP_SKB_CB(skb)->end_seq);
 
+			sacked = TCP_SKB_CB(skb)->sacked;
+
 			/* Account D-SACK for retransmitted packet. */
 			if ((dup_sack && in_sack) &&
 			    (sacked & TCPCB_RETRANS) &&

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 75b6811..6094db5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c

@@ -428,11 +428,11 @@
  * packet to the list.  This won't be called frequently, I hope. 
  * Remember, these are still headerless SKBs at this point.
  */
-static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
+int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
-	int nsize;
+	int nsize, old_factor;
 	u16 flags;
 
 	nsize = skb_headlen(skb) - len;
@@ -490,18 +490,29 @@
 		tp->left_out -= tcp_skb_pcount(skb);
 	}
 
+	old_factor = tcp_skb_pcount(skb);
+
 	/* Fix up tso_factor for both original and new SKB.  */
 	tcp_set_skb_tso_segs(sk, skb, mss_now);
 	tcp_set_skb_tso_segs(sk, buff, mss_now);
 
-	if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
-		tp->lost_out += tcp_skb_pcount(skb);
-		tp->left_out += tcp_skb_pcount(skb);
-	}
+	/* If this packet has been sent out already, we must
+	 * adjust the various packet counters.
+	 */
+	if (after(tp->snd_nxt, TCP_SKB_CB(buff)->end_seq)) {
+		int diff = old_factor - tcp_skb_pcount(skb) -
+			tcp_skb_pcount(buff);
 
-	if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
-		tp->lost_out += tcp_skb_pcount(buff);
-		tp->left_out += tcp_skb_pcount(buff);
+		tp->packets_out -= diff;
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
+			tp->lost_out -= diff;
+			tp->left_out -= diff;
+		}
+		if (diff > 0) {
+			tp->fackets_out -= diff;
+			if ((int)tp->fackets_out < 0)
+				tp->fackets_out = 0;
+		}
 	}
 
 	/* Link BUFF into the send queue. */
@@ -1350,12 +1361,6 @@
 	if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
 		if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
 			BUG();
-
-		if (sk->sk_route_caps & NETIF_F_TSO) {
-			sk->sk_route_caps &= ~NETIF_F_TSO;
-			sock_set_flag(sk, SOCK_NO_LARGESEND);
-		}
-
 		if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
 			return -ENOMEM;
 	}
@@ -1370,22 +1375,8 @@
 		return -EAGAIN;
 
 	if (skb->len > cur_mss) {
-		int old_factor = tcp_skb_pcount(skb);
-		int diff;
-
 		if (tcp_fragment(sk, skb, cur_mss, cur_mss))
 			return -ENOMEM; /* We'll try again later. */
-
-		/* New SKB created, account for it. */
-		diff = old_factor - tcp_skb_pcount(skb) -
-		       tcp_skb_pcount(skb->next);
-		tp->packets_out -= diff;
-
-		if (diff > 0) {
-			tp->fackets_out -= diff;
-			if ((int)tp->fackets_out < 0)
-				tp->fackets_out = 0;
-		}
 	}
 
 	/* Collapse two adjacent packets if worthwhile and we can. */
@@ -1993,12 +1984,6 @@
 				TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
 				if (tcp_fragment(sk, skb, seg_size, mss))
 					return -1;
-				/* SWS override triggered forced fragmentation.
-				 * Disable TSO, the connection is too sick. */
-				if (sk->sk_route_caps & NETIF_F_TSO) {
-					sock_set_flag(sk, SOCK_NO_LARGESEND);
-					sk->sk_route_caps &= ~NETIF_F_TSO;
-				}
 			} else if (!tcp_skb_pcount(skb))
 				tcp_set_skb_tso_segs(sk, skb, mss);
commit	6475be16fd9b3c6746ca4d18959246b13c669ea8	[log] [tgz]
author	David S. Miller <davem@davemloft.net>	Thu Sep 01 22:47:01 2005 -0700
committer	David S. Miller <davem@davemloft.net>	Thu Sep 01 22:47:01 2005 -0700
tree	03e0da36680ddb227591a4007fa4e6f18d82782c
parent	ef015786152adaff5a6a8bf0c8ea2f70cee8059d [diff]