tcp: fix SO_RCVLOWAT and RCVBUF autotuning
Applications might use SO_RCVLOWAT on TCP socket hoping to receive
one [E]POLLIN event only when a given amount of bytes are ready in socket
receive queue.
Problem is that receive autotuning is not aware of this constraint,
meaning sk_rcvbuf might be too small to allow all bytes to be stored.
Add a new (struct proto_ops)->set_rcvlowat method so that a protocol
can override the default setsockopt(SO_RCVLOWAT) behavior.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bccc4c2..0abd8d1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1701,6 +1701,27 @@ int tcp_peek_len(struct socket *sock)
}
EXPORT_SYMBOL(tcp_peek_len);
+/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
+int tcp_set_rcvlowat(struct sock *sk, int val)
+{
+ sk->sk_rcvlowat = val ? : 1;
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+ return 0;
+
+ /* val comes from user space and might be close to INT_MAX */
+ val <<= 1;
+ if (val < 0)
+ val = INT_MAX;
+
+ val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ if (val > sk->sk_rcvbuf) {
+ sk->sk_rcvbuf = val;
+ tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
+ }
+ return 0;
+}
+EXPORT_SYMBOL(tcp_set_rcvlowat);
+
static void tcp_update_recv_tstamps(struct sk_buff *skb,
struct scm_timestamping *tss)
{