blob: 7881b96d2b7214a87563c52ab0d06f15efc7e1bb [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070037 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080040 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
Joe Perchesafd465032012-03-12 07:03:32 +000053#define pr_fmt(fmt) "TCP: " fmt
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
Herbert Xueb4dea52008-12-29 23:04:08 -080055#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090064#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020066#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070068#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030070#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#include <net/ipv6.h>
72#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080073#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070074#include <net/xfrm.h>
Chris Leech1a2449a2006-05-23 18:05:53 -070075#include <net/netdma.h>
David S. Miller6e5714e2011-08-03 20:50:44 -070076#include <net/secure_seq.h>
Glauber Costad1a4c0b2011-12-11 21:47:04 +000077#include <net/tcp_memcontrol.h>
Eliezer Tamir076bb0c2013-07-10 17:13:17 +030078#include <net/busy_poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080086#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
Brian Haleyab32ea52006-09-22 14:15:41 -070089int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000091EXPORT_SYMBOL(sysctl_tcp_low_latency);
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080093#ifdef CONFIG_TCP_MD5SIG
Eric Dumazeta915da9b2012-01-31 05:18:33 +000094static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -040095 __be32 daddr, __be32 saddr, const struct tcphdr *th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080096#endif
97
Eric Dumazet5caea4e2008-11-20 00:40:07 -080098struct inet_hashinfo tcp_hashinfo;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000099EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
Octavian Purdila936b8bd2014-06-25 17:09:57 +0300101static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107}
108
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
James Morris9d729f72007-03-04 16:12:44 -0800127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{
David S. Miller2d7192d2011-04-26 13:28:44 -0700144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
David S. Millerdca8b082011-02-24 13:38:12 -0800147 __be16 orig_sport, orig_dport;
Al Virobada8ad2006-09-26 21:27:15 -0700148 __be32 daddr, nexthop;
David S. Millerda905bd2011-05-06 16:11:19 -0700149 struct flowi4 *fl4;
David S. Miller2d7192d2011-04-26 13:28:44 -0700150 struct rtable *rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 int err;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000152 struct ip_options_rcu *inet_opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 if (!daddr)
165 return -EINVAL;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000166 nexthop = inet_opt->opt.faddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 }
168
David S. Millerdca8b082011-02-24 13:38:12 -0800169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
David S. Millerda905bd2011-05-06 16:11:19 -0700171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
Steffen Klassert0e0d44a2013-08-28 08:04:14 +0200175 orig_sport, orig_dport, sk);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
Eric Dumazetf1d8cba2013-11-28 09:51:22 -0800179 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800180 return err;
Wei Dong584bdf82007-05-31 22:49:28 -0700181 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000188 if (!inet_opt || !inet_opt->opt.srr)
David S. Millerda905bd2011-05-06 16:11:19 -0700189 daddr = fl4->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000191 if (!inet->inet_saddr)
David S. Millerda905bd2011-05-06 16:11:19 -0700192 inet->inet_saddr = fl4->saddr;
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000193 inet->inet_rcv_saddr = inet->inet_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
Pavel Emelyanovee995282012-04-19 03:40:39 +0000199 if (likely(!tp->repair))
200 tp->write_seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 }
202
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700203 if (tcp_death_row.sysctl_tw_recycle &&
David S. Miller81166dd2012-07-10 03:14:24 -0700204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
Tom Herbertb73c3d02014-07-01 21:32:17 -0700210 inet_set_txhash(sk);
211
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800212 inet_csk(sk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000213 if (inet_opt)
214 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000216 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218 /* Socket identity is still unknown (sport may be zero).
219 * However we set state to SYN-SENT and not releasing socket
220 * lock select source port, enter ourselves into the hash tables and
221 * complete initialization after this.
222 */
223 tcp_set_state(sk, TCP_SYN_SENT);
Arnaldo Carvalho de Meloa7f5e7f2005-12-13 23:25:31 -0800224 err = inet_hash_connect(&tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 if (err)
226 goto failure;
227
David S. Millerda905bd2011-05-06 16:11:19 -0700228 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800229 inet->inet_sport, inet->inet_dport, sk);
230 if (IS_ERR(rt)) {
231 err = PTR_ERR(rt);
232 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 goto failure;
David S. Millerb23dd4f2011-03-02 14:31:35 -0800234 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700236 sk->sk_gso_type = SKB_GSO_TCPV4;
Changli Gaod8d1f302010-06-10 23:31:35 -0700237 sk_setup_caps(sk, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
Pavel Emelyanovee995282012-04-19 03:40:39 +0000239 if (!tp->write_seq && likely(!tp->repair))
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000240 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
241 inet->inet_daddr,
242 inet->inet_sport,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 usin->sin_port);
244
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000245 inet->inet_id = tp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
Andrey Vagin2b916472012-11-22 01:13:58 +0000247 err = tcp_connect(sk);
Pavel Emelyanovee995282012-04-19 03:40:39 +0000248
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 rt = NULL;
250 if (err)
251 goto failure;
252
253 return 0;
254
255failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200256 /*
257 * This unhashes the socket and releases the local port,
258 * if necessary.
259 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 tcp_set_state(sk, TCP_CLOSE);
261 ip_rt_put(rt);
262 sk->sk_route_caps = 0;
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000263 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 return err;
265}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000266EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268/*
Eric Dumazet563d34d2012-07-23 09:48:52 +0200269 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
270 * It can be called through tcp_release_cb() if socket was owned by user
271 * at the time tcp_v4_err() was called to handle ICMP message.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 */
Neal Cardwell4fab9072014-08-14 12:40:05 -0400273void tcp_v4_mtu_reduced(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274{
275 struct dst_entry *dst;
276 struct inet_sock *inet = inet_sk(sk);
Eric Dumazet563d34d2012-07-23 09:48:52 +0200277 u32 mtu = tcp_sk(sk)->mtu_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
David S. Miller80d0a692012-07-16 03:28:06 -0700279 dst = inet_csk_update_pmtu(sk, mtu);
280 if (!dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 return;
282
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 /* Something is about to be wrong... Remember soft error
284 * for the case, if this connection will not able to recover.
285 */
286 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
287 sk->sk_err_soft = EMSGSIZE;
288
289 mtu = dst_mtu(dst);
290
291 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +0100292 ip_sk_accept_pmtu(sk) &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800293 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 tcp_sync_mss(sk, mtu);
295
296 /* Resend the TCP packet because it's
297 * clear that the old packet has been
298 * dropped. This is the new "fast" path mtu
299 * discovery.
300 */
301 tcp_simple_retransmit(sk);
302 } /* else let the usual retransmit timer handle it */
303}
Neal Cardwell4fab9072014-08-14 12:40:05 -0400304EXPORT_SYMBOL(tcp_v4_mtu_reduced);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
David S. Miller55be7a92012-07-11 21:27:49 -0700306static void do_redirect(struct sk_buff *skb, struct sock *sk)
307{
308 struct dst_entry *dst = __sk_dst_check(sk, 0);
309
David S. Miller1ed5c482012-07-12 00:41:25 -0700310 if (dst)
David S. Miller6700c272012-07-17 03:29:28 -0700311 dst->ops->redirect(dst, sk, skb);
David S. Miller55be7a92012-07-11 21:27:49 -0700312}
313
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314/*
315 * This routine is called by the ICMP module when it gets some
316 * sort of error condition. If err < 0 then the socket should
317 * be closed and the error returned to the user. If err > 0
318 * it's just the icmp type << 8 | icmp code. After adjustment
319 * header points to the first 8 bytes of the tcp header. We need
320 * to find the appropriate port.
321 *
322 * The locking strategy used here is very "optimistic". When
323 * someone else accesses the socket the ICMP is just dropped
324 * and for some paths there is no check at all.
325 * A more general error queue to queue errors for later handling
326 * is probably better.
327 *
328 */
329
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000330void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000332 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000333 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000334 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 struct tcp_sock *tp;
336 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000337 const int type = icmp_hdr(icmp_skb)->type;
338 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000340 struct sk_buff *skb;
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700341 struct request_sock *fastopen;
342 __u32 seq, snd_una;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000343 __u32 remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000345 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346
Pavel Emelyanovfd54d712008-07-14 23:01:40 -0700347 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000348 iph->saddr, th->source, inet_iif(icmp_skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 if (!sk) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700350 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 return;
352 }
353 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700354 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 return;
356 }
357
358 bh_lock_sock(sk);
359 /* If too many ICMPs get dropped on busy
360 * servers this needs to be solved differently.
Eric Dumazet563d34d2012-07-23 09:48:52 +0200361 * We do take care of PMTU discovery (RFC1191) special case :
362 * we can receive locally generated ICMP messages while socket is held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 */
Eric Dumazetb74aa932013-01-19 16:10:37 +0000364 if (sock_owned_by_user(sk)) {
365 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
366 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
367 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 if (sk->sk_state == TCP_CLOSE)
369 goto out;
370
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000371 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
372 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
373 goto out;
374 }
375
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000376 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 tp = tcp_sk(sk);
378 seq = ntohl(th->seq);
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700379 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
380 fastopen = tp->fastopen_rsk;
381 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 if (sk->sk_state != TCP_LISTEN &&
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700383 !between(seq, snd_una, tp->snd_nxt)) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700384 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 goto out;
386 }
387
388 switch (type) {
David S. Miller55be7a92012-07-11 21:27:49 -0700389 case ICMP_REDIRECT:
390 do_redirect(icmp_skb, sk);
391 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 case ICMP_SOURCE_QUENCH:
393 /* Just silently ignore these. */
394 goto out;
395 case ICMP_PARAMETERPROB:
396 err = EPROTO;
397 break;
398 case ICMP_DEST_UNREACH:
399 if (code > NR_ICMP_UNREACH)
400 goto out;
401
402 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
Eric Dumazet0d4f0602013-03-18 07:01:28 +0000403 /* We are not interested in TCP_LISTEN and open_requests
404 * (SYN-ACKs send out by Linux are always <576bytes so
405 * they should go through unfragmented).
406 */
407 if (sk->sk_state == TCP_LISTEN)
408 goto out;
409
Eric Dumazet563d34d2012-07-23 09:48:52 +0200410 tp->mtu_info = info;
Eric Dumazet144d56e2012-08-20 00:22:46 +0000411 if (!sock_owned_by_user(sk)) {
Eric Dumazet563d34d2012-07-23 09:48:52 +0200412 tcp_v4_mtu_reduced(sk);
Eric Dumazet144d56e2012-08-20 00:22:46 +0000413 } else {
414 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
415 sock_hold(sk);
416 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 goto out;
418 }
419
420 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000421 /* check if icmp_skb allows revert of backoff
422 * (see draft-zimmermann-tcp-lcd) */
423 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
424 break;
425 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700426 !icsk->icsk_backoff || fastopen)
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000427 break;
428
David S. Miller8f49c272010-11-12 13:35:00 -0800429 if (sock_owned_by_user(sk))
430 break;
431
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000432 icsk->icsk_backoff--;
Eric Dumazet740b0f12014-02-26 14:02:48 -0800433 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
Jerry Chu9ad7c042011-06-08 11:08:38 +0000434 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000435 tcp_bound_rto(sk);
436
437 skb = tcp_write_queue_head(sk);
438 BUG_ON(!skb);
439
Eric Dumazet7faee5c2014-09-05 15:33:33 -0700440 remaining = icsk->icsk_rto -
441 min(icsk->icsk_rto,
442 tcp_time_stamp - tcp_skb_timestamp(skb));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000443
444 if (remaining) {
445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
446 remaining, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000447 } else {
448 /* RTO revert clocked out retransmission.
449 * Will retransmit now */
450 tcp_retransmit_timer(sk);
451 }
452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 break;
454 case ICMP_TIME_EXCEEDED:
455 err = EHOSTUNREACH;
456 break;
457 default:
458 goto out;
459 }
460
461 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700462 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 case TCP_LISTEN:
464 if (sock_owned_by_user(sk))
465 goto out;
466
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700467 req = inet_csk_search_req(sk, &prev, th->dest,
468 iph->daddr, iph->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 if (!req)
470 goto out;
471
472 /* ICMPs are not backlogged, hence we cannot get
473 an established socket here.
474 */
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700475 WARN_ON(req->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700477 if (seq != tcp_rsk(req)->snt_isn) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700478 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 goto out;
480 }
481
482 /*
483 * Still in SYN_RECV, just remove it silently.
484 * There is no good way to pass the error to the newly
485 * created socket, and POSIX does not want network
486 * errors returned from accept().
487 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700488 inet_csk_reqsk_queue_drop(sk, req, prev);
Vijay Subramanian848bf152013-01-31 08:24:06 +0000489 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 goto out;
491
492 case TCP_SYN_SENT:
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700493 case TCP_SYN_RECV:
494 /* Only in fast or simultaneous open. If a fast open socket is
495 * is already accepted it is treated as a connected one below.
496 */
497 if (fastopen && fastopen->sk == NULL)
498 break;
499
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 sk->sk_err = err;
502
503 sk->sk_error_report(sk);
504
505 tcp_done(sk);
506 } else {
507 sk->sk_err_soft = err;
508 }
509 goto out;
510 }
511
512 /* If we've already connected we will keep trying
513 * until we time out, or the user gives up.
514 *
515 * rfc1122 4.2.3.9 allows to consider as hard errors
516 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
517 * but it is obsoleted by pmtu discovery).
518 *
519 * Note, that in modern internet, where routing is unreliable
520 * and in each dark corner broken firewalls sit, sending random
521 * errors ordered by their masters even this two messages finally lose
522 * their original sense (even Linux sends invalid PORT_UNREACHs)
523 *
524 * Now we are in compliance with RFCs.
525 * --ANK (980905)
526 */
527
528 inet = inet_sk(sk);
529 if (!sock_owned_by_user(sk) && inet->recverr) {
530 sk->sk_err = err;
531 sk->sk_error_report(sk);
532 } else { /* Only an error on timeout */
533 sk->sk_err_soft = err;
534 }
535
536out:
537 bh_unlock_sock(sk);
538 sock_put(sk);
539}
540
Daniel Borkmann28850dc2013-06-07 05:11:46 +0000541void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700543 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
Patrick McHardy84fa7932006-08-29 16:44:56 -0700545 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Herbert Xu419f9f82010-04-11 02:15:53 +0000546 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
Herbert Xu663ead32007-04-09 11:59:07 -0700547 skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viroff1dcad2006-11-20 18:07:29 -0800548 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 } else {
Herbert Xu419f9f82010-04-11 02:15:53 +0000550 th->check = tcp_v4_check(skb->len, saddr, daddr,
Joe Perches07f07572008-11-19 15:44:53 -0800551 csum_partial(th,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 th->doff << 2,
553 skb->csum));
554 }
555}
556
Herbert Xu419f9f82010-04-11 02:15:53 +0000557/* This routine computes an IPv4 TCP checksum. */
Herbert Xubb296242010-04-11 02:15:55 +0000558void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
Herbert Xu419f9f82010-04-11 02:15:53 +0000559{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400560 const struct inet_sock *inet = inet_sk(sk);
Herbert Xu419f9f82010-04-11 02:15:53 +0000561
562 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
563}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000564EXPORT_SYMBOL(tcp_v4_send_check);
Herbert Xu419f9f82010-04-11 02:15:53 +0000565
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566/*
567 * This routine will send an RST to the other tcp.
568 *
569 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
570 * for reset.
571 * Answer: if a packet caused RST, it is not for a socket
572 * existing in our system, if it is matched to a socket,
573 * it is just duplicate segment or bug in other side's TCP.
574 * So that we build reply only basing on parameters
575 * arrived with segment.
576 * Exception: precedence violation. We do not implement it in any case.
577 */
578
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800579static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400581 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800582 struct {
583 struct tcphdr th;
584#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800585 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800586#endif
587 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800589#ifdef CONFIG_TCP_MD5SIG
590 struct tcp_md5sig_key *key;
Shawn Lu658ddaa2012-01-31 22:35:48 +0000591 const __u8 *hash_location = NULL;
592 unsigned char newhash[16];
593 int genhash;
594 struct sock *sk1 = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800595#endif
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700596 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
598 /* Never send a reset in response to a reset. */
599 if (th->rst)
600 return;
601
Eric Dumazet511c3f92009-06-02 05:14:27 +0000602 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 return;
604
605 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800606 memset(&rep, 0, sizeof(rep));
607 rep.th.dest = th->source;
608 rep.th.source = th->dest;
609 rep.th.doff = sizeof(struct tcphdr) / 4;
610 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611
612 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800613 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800615 rep.th.ack = 1;
616 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
617 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 }
619
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200620 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800621 arg.iov[0].iov_base = (unsigned char *)&rep;
622 arg.iov[0].iov_len = sizeof(rep.th);
623
624#ifdef CONFIG_TCP_MD5SIG
Shawn Lu658ddaa2012-01-31 22:35:48 +0000625 hash_location = tcp_parse_md5sig_option(th);
626 if (!sk && hash_location) {
627 /*
628 * active side is lost. Try to find listening socket through
629 * source port, and then find md5 key through listening socket.
630 * we are not loose security here:
631 * Incoming packet is checked with md5 hash with finding key,
632 * no RST generated if md5 hash doesn't match.
633 */
634 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
Tom Herbertda5e3632013-01-22 09:50:24 +0000635 &tcp_hashinfo, ip_hdr(skb)->saddr,
636 th->source, ip_hdr(skb)->daddr,
Shawn Lu658ddaa2012-01-31 22:35:48 +0000637 ntohs(th->source), inet_iif(skb));
638 /* don't send rst if it can't find key */
639 if (!sk1)
640 return;
641 rcu_read_lock();
642 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
643 &ip_hdr(skb)->saddr, AF_INET);
644 if (!key)
645 goto release_sk1;
646
647 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
648 if (genhash || memcmp(hash_location, newhash, 16) != 0)
649 goto release_sk1;
650 } else {
651 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr,
653 AF_INET) : NULL;
654 }
655
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800656 if (key) {
657 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
658 (TCPOPT_NOP << 16) |
659 (TCPOPT_MD5SIG << 8) |
660 TCPOLEN_MD5SIG);
661 /* Update length and the length the header thinks exists */
662 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
663 rep.th.doff = arg.iov[0].iov_len / 4;
664
Adam Langley49a72df2008-07-19 00:01:42 -0700665 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645cb2008-10-09 14:37:47 -0700666 key, ip_hdr(skb)->saddr,
667 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800668 }
669#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700670 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
671 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700672 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700674 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
Shawn Lue2446ea2012-02-04 12:38:09 +0000675 /* When socket is gone, all binding information is lost.
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000676 * routing might fail in this case. No choice here, if we choose to force
677 * input interface, we will misroute in case of asymmetric route.
Shawn Lue2446ea2012-02-04 12:38:09 +0000678 */
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000679 if (sk)
680 arg.bound_dev_if = sk->sk_bound_dev_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
Eric Dumazetadf30902009-06-02 05:19:30 +0000682 net = dev_net(skb_dst(skb)->dev);
Eric Dumazet66b13d92011-10-24 03:06:21 -0400683 arg.tos = ip_hdr(skb)->tos;
Eric Dumazetbe9f4a42012-07-19 07:34:03 +0000684 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
David S. Miller70e73412012-06-28 03:21:41 -0700685 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700687 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
688 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
Shawn Lu658ddaa2012-01-31 22:35:48 +0000689
690#ifdef CONFIG_TCP_MD5SIG
691release_sk1:
692 if (sk1) {
693 rcu_read_unlock();
694 sock_put(sk1);
695 }
696#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697}
698
699/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
700 outside socket context is ugly, certainly. What can I do?
701 */
702
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900703static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
Andrey Vaginee684b62013-02-11 05:50:19 +0000704 u32 win, u32 tsval, u32 tsecr, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700705 struct tcp_md5sig_key *key,
Eric Dumazet66b13d92011-10-24 03:06:21 -0400706 int reply_flags, u8 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400708 const struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 struct {
710 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800711 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800712#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800713 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800714#endif
715 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 } rep;
717 struct ip_reply_arg arg;
Eric Dumazetadf30902009-06-02 05:19:30 +0000718 struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719
720 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200721 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
723 arg.iov[0].iov_base = (unsigned char *)&rep;
724 arg.iov[0].iov_len = sizeof(rep.th);
Andrey Vaginee684b62013-02-11 05:50:19 +0000725 if (tsecr) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800726 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
727 (TCPOPT_TIMESTAMP << 8) |
728 TCPOLEN_TIMESTAMP);
Andrey Vaginee684b62013-02-11 05:50:19 +0000729 rep.opt[1] = htonl(tsval);
730 rep.opt[2] = htonl(tsecr);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800731 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
733
734 /* Swap the send and the receive. */
735 rep.th.dest = th->source;
736 rep.th.source = th->dest;
737 rep.th.doff = arg.iov[0].iov_len / 4;
738 rep.th.seq = htonl(seq);
739 rep.th.ack_seq = htonl(ack);
740 rep.th.ack = 1;
741 rep.th.window = htons(win);
742
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800743#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800744 if (key) {
Andrey Vaginee684b62013-02-11 05:50:19 +0000745 int offset = (tsecr) ? 3 : 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800746
747 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
748 (TCPOPT_NOP << 16) |
749 (TCPOPT_MD5SIG << 8) |
750 TCPOLEN_MD5SIG);
751 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
752 rep.th.doff = arg.iov[0].iov_len/4;
753
Adam Langley49a72df2008-07-19 00:01:42 -0700754 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700755 key, ip_hdr(skb)->saddr,
756 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800757 }
758#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700759 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700760 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
761 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 arg.iov[0].iov_len, IPPROTO_TCP, 0);
763 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900764 if (oif)
765 arg.bound_dev_if = oif;
Eric Dumazet66b13d92011-10-24 03:06:21 -0400766 arg.tos = tos;
Eric Dumazetbe9f4a42012-07-19 07:34:03 +0000767 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
David S. Miller70e73412012-06-28 03:21:41 -0700768 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700770 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771}
772
773static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
774{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700775 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800776 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900778 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200779 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
Andrey Vaginee684b62013-02-11 05:50:19 +0000780 tcp_time_stamp + tcptw->tw_ts_offset,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900781 tcptw->tw_ts_recent,
782 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700783 tcp_twsk_md5_key(tcptw),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400784 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
785 tw->tw_tos
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900786 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700788 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
Gui Jianfeng6edafaa2008-08-06 23:50:04 -0700791static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200792 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793{
Jerry Chu168a8f52012-08-31 12:29:13 +0000794 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
795 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
796 */
797 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
798 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
799 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
Andrey Vaginee684b62013-02-11 05:50:19 +0000800 tcp_time_stamp,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900801 req->ts_recent,
802 0,
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000803 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
804 AF_INET),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400805 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
806 ip_hdr(skb)->tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807}
808
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800810 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700811 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 * socket.
813 */
Octavian Purdila72659ec2010-01-17 19:09:39 -0800814static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
Octavian Purdilad6274bd2014-06-25 17:09:58 +0300815 struct flowi *fl,
Octavian Purdila72659ec2010-01-17 19:09:39 -0800816 struct request_sock *req,
Yuchung Cheng843f4a52014-05-11 20:22:11 -0700817 u16 queue_mapping,
818 struct tcp_fastopen_cookie *foc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700820 const struct inet_request_sock *ireq = inet_rsk(req);
David S. Miller6bd023f2011-05-18 18:32:03 -0400821 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 int err = -1;
Weilong Chend41db5a2013-12-23 14:37:28 +0800823 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824
825 /* First, grab a route. */
David S. Millerba3f7f02012-07-17 14:02:46 -0700826 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800827 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
Yuchung Cheng843f4a52014-05-11 20:22:11 -0700829 skb = tcp_make_synack(sk, dst, req, foc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830
831 if (skb) {
Eric Dumazet634fb9792013-10-09 15:21:29 -0700832 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
Eric Dumazetfff32692012-06-01 01:47:50 +0000834 skb_set_queue_mapping(skb, queue_mapping);
Eric Dumazet634fb9792013-10-09 15:21:29 -0700835 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
836 ireq->ir_rmt_addr,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700837 ireq->opt);
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200838 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 }
840
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 return err;
842}
843
844/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700845 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700847static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848{
Jesper Juhla51482b2005-11-08 09:41:34 -0800849 kfree(inet_rsk(req)->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850}
851
Eric Dumazet946cedc2011-08-30 03:21:44 +0000852/*
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000853 * Return true if a syncookie should be sent
Eric Dumazet946cedc2011-08-30 03:21:44 +0000854 */
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000855bool tcp_syn_flood_action(struct sock *sk,
Eric Dumazet946cedc2011-08-30 03:21:44 +0000856 const struct sk_buff *skb,
857 const char *proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858{
Eric Dumazet946cedc2011-08-30 03:21:44 +0000859 const char *msg = "Dropping request";
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000860 bool want_cookie = false;
Eric Dumazet946cedc2011-08-30 03:21:44 +0000861 struct listen_sock *lopt;
862
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000863#ifdef CONFIG_SYN_COOKIES
Eric Dumazet946cedc2011-08-30 03:21:44 +0000864 if (sysctl_tcp_syncookies) {
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000865 msg = "Sending cookies";
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000866 want_cookie = true;
Eric Dumazet946cedc2011-08-30 03:21:44 +0000867 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
868 } else
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200869#endif
Eric Dumazet946cedc2011-08-30 03:21:44 +0000870 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000871
Eric Dumazet946cedc2011-08-30 03:21:44 +0000872 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
Hannes Frederic Sowa5ad37d52013-07-26 17:43:23 +0200873 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
Eric Dumazet946cedc2011-08-30 03:21:44 +0000874 lopt->synflood_warned = 1;
Joe Perchesafd465032012-03-12 07:03:32 +0000875 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
Eric Dumazet946cedc2011-08-30 03:21:44 +0000876 proto, ntohs(tcp_hdr(skb)->dest), msg);
877 }
878 return want_cookie;
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000879}
Eric Dumazet946cedc2011-08-30 03:21:44 +0000880EXPORT_SYMBOL(tcp_syn_flood_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
882/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700883 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 */
Christoph Paasch5dff7472012-09-26 11:59:09 +0000885static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886{
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000887 const struct ip_options *opt = &(IPCB(skb)->opt);
888 struct ip_options_rcu *dopt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
890 if (opt && opt->optlen) {
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000891 int opt_size = sizeof(*dopt) + opt->optlen;
892
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 dopt = kmalloc(opt_size, GFP_ATOMIC);
894 if (dopt) {
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000895 if (ip_options_echo(&dopt->opt, skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 kfree(dopt);
897 dopt = NULL;
898 }
899 }
900 }
901 return dopt;
902}
903
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800904#ifdef CONFIG_TCP_MD5SIG
905/*
906 * RFC2385 MD5 checksumming requires a mapping of
907 * IP address->MD5 Key.
908 * We need to maintain these in the sk structure.
909 */
910
911/* Find the Key structure for an address. */
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000912struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
913 const union tcp_md5_addr *addr,
914 int family)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800915{
916 struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000917 struct tcp_md5sig_key *key;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000918 unsigned int size = sizeof(struct in_addr);
Eric Dumazeta8afca02012-01-31 18:45:40 +0000919 struct tcp_md5sig_info *md5sig;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800920
Eric Dumazeta8afca02012-01-31 18:45:40 +0000921 /* caller either holds rcu_read_lock() or socket lock */
922 md5sig = rcu_dereference_check(tp->md5sig_info,
Eric Dumazetb4fb05e2012-03-07 04:45:43 +0000923 sock_owned_by_user(sk) ||
924 lockdep_is_held(&sk->sk_lock.slock));
Eric Dumazeta8afca02012-01-31 18:45:40 +0000925 if (!md5sig)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800926 return NULL;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000927#if IS_ENABLED(CONFIG_IPV6)
928 if (family == AF_INET6)
929 size = sizeof(struct in6_addr);
930#endif
Sasha Levinb67bfe02013-02-27 17:06:00 -0800931 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000932 if (key->family != family)
933 continue;
934 if (!memcmp(&key->addr, addr, size))
935 return key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800936 }
937 return NULL;
938}
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000939EXPORT_SYMBOL(tcp_md5_do_lookup);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800940
941struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
942 struct sock *addr_sk)
943{
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000944 union tcp_md5_addr *addr;
945
946 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
947 return tcp_md5_do_lookup(sk, addr, AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800948}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800949EXPORT_SYMBOL(tcp_v4_md5_lookup);
950
Adrian Bunkf5b99bc2006-11-30 17:22:29 -0800951static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
952 struct request_sock *req)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800953{
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000954 union tcp_md5_addr *addr;
955
Eric Dumazet634fb9792013-10-09 15:21:29 -0700956 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000957 return tcp_md5_do_lookup(sk, addr, AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800958}
959
960/* This can be called on a newly created socket, from other files */
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000961int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
962 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800963{
964 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700965 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800966 struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000967 struct tcp_md5sig_info *md5sig;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800968
Aydin Arikc0353c72013-06-14 18:56:31 +1200969 key = tcp_md5_do_lookup(sk, addr, family);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800970 if (key) {
971 /* Pre-existing entry - just update that one. */
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000972 memcpy(key->key, newkey, newkeylen);
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700973 key->keylen = newkeylen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000974 return 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800975 }
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000976
Eric Dumazeta8afca02012-01-31 18:45:40 +0000977 md5sig = rcu_dereference_protected(tp->md5sig_info,
978 sock_owned_by_user(sk));
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000979 if (!md5sig) {
980 md5sig = kmalloc(sizeof(*md5sig), gfp);
981 if (!md5sig)
982 return -ENOMEM;
983
984 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
985 INIT_HLIST_HEAD(&md5sig->head);
Eric Dumazeta8afca02012-01-31 18:45:40 +0000986 rcu_assign_pointer(tp->md5sig_info, md5sig);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000987 }
988
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +0000989 key = sock_kmalloc(sk, sizeof(*key), gfp);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000990 if (!key)
991 return -ENOMEM;
Eric Dumazet71cea172013-05-20 06:52:26 +0000992 if (!tcp_alloc_md5sig_pool()) {
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +0000993 sock_kfree_s(sk, key, sizeof(*key));
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000994 return -ENOMEM;
995 }
996
997 memcpy(key->key, newkey, newkeylen);
998 key->keylen = newkeylen;
999 key->family = family;
1000 memcpy(&key->addr, addr,
1001 (family == AF_INET6) ? sizeof(struct in6_addr) :
1002 sizeof(struct in_addr));
1003 hlist_add_head_rcu(&key->node, &md5sig->head);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001004 return 0;
1005}
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001006EXPORT_SYMBOL(tcp_md5_do_add);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001007
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001008int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001009{
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001010 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001011
Aydin Arikc0353c72013-06-14 18:56:31 +12001012 key = tcp_md5_do_lookup(sk, addr, family);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001013 if (!key)
1014 return -ENOENT;
1015 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001016 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001017 kfree_rcu(key, rcu);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001018 return 0;
1019}
1020EXPORT_SYMBOL(tcp_md5_do_del);
1021
stephen hemmingere0683e702012-10-26 14:31:40 +00001022static void tcp_clear_md5_list(struct sock *sk)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001023{
1024 struct tcp_sock *tp = tcp_sk(sk);
1025 struct tcp_md5sig_key *key;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001026 struct hlist_node *n;
Eric Dumazeta8afca02012-01-31 18:45:40 +00001027 struct tcp_md5sig_info *md5sig;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001028
Eric Dumazeta8afca02012-01-31 18:45:40 +00001029 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1030
Sasha Levinb67bfe02013-02-27 17:06:00 -08001031 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001032 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001033 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001034 kfree_rcu(key, rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001035 }
1036}
1037
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001038static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1039 int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001040{
1041 struct tcp_md5sig cmd;
1042 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001043
1044 if (optlen < sizeof(cmd))
1045 return -EINVAL;
1046
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001047 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001048 return -EFAULT;
1049
1050 if (sin->sin_family != AF_INET)
1051 return -EINVAL;
1052
Dmitry Popov64a124e2014-08-03 22:45:19 +04001053 if (!cmd.tcpm_keylen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001054 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1055 AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001056
1057 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1058 return -EINVAL;
1059
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001060 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1061 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1062 GFP_KERNEL);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001063}
1064
Adam Langley49a72df2008-07-19 00:01:42 -07001065static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1066 __be32 daddr, __be32 saddr, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001067{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001068 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001069 struct scatterlist sg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001070
1071 bp = &hp->md5_blk.ip4;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001072
1073 /*
Adam Langley49a72df2008-07-19 00:01:42 -07001074 * 1. the TCP pseudo-header (in the order: source IP address,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001075 * destination IP address, zero-padded protocol number, and
1076 * segment length)
1077 */
1078 bp->saddr = saddr;
1079 bp->daddr = daddr;
1080 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001081 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001082 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001083
Adam Langley49a72df2008-07-19 00:01:42 -07001084 sg_init_one(&sg, bp, sizeof(*bp));
1085 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1086}
1087
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001088static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001089 __be32 daddr, __be32 saddr, const struct tcphdr *th)
Adam Langley49a72df2008-07-19 00:01:42 -07001090{
1091 struct tcp_md5sig_pool *hp;
1092 struct hash_desc *desc;
1093
1094 hp = tcp_get_md5sig_pool();
1095 if (!hp)
1096 goto clear_hash_noput;
1097 desc = &hp->md5_desc;
1098
1099 if (crypto_hash_init(desc))
1100 goto clear_hash;
1101 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1102 goto clear_hash;
1103 if (tcp_md5_hash_header(hp, th))
1104 goto clear_hash;
1105 if (tcp_md5_hash_key(hp, key))
1106 goto clear_hash;
1107 if (crypto_hash_final(desc, md5_hash))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001108 goto clear_hash;
1109
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001110 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001111 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001112
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001113clear_hash:
1114 tcp_put_md5sig_pool();
1115clear_hash_noput:
1116 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001117 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001118}
1119
Adam Langley49a72df2008-07-19 00:01:42 -07001120int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001121 const struct sock *sk, const struct request_sock *req,
1122 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001123{
Adam Langley49a72df2008-07-19 00:01:42 -07001124 struct tcp_md5sig_pool *hp;
1125 struct hash_desc *desc;
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001126 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001127 __be32 saddr, daddr;
1128
1129 if (sk) {
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001130 saddr = inet_sk(sk)->inet_saddr;
1131 daddr = inet_sk(sk)->inet_daddr;
Adam Langley49a72df2008-07-19 00:01:42 -07001132 } else if (req) {
Eric Dumazet634fb9792013-10-09 15:21:29 -07001133 saddr = inet_rsk(req)->ir_loc_addr;
1134 daddr = inet_rsk(req)->ir_rmt_addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001135 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001136 const struct iphdr *iph = ip_hdr(skb);
1137 saddr = iph->saddr;
1138 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001139 }
Adam Langley49a72df2008-07-19 00:01:42 -07001140
1141 hp = tcp_get_md5sig_pool();
1142 if (!hp)
1143 goto clear_hash_noput;
1144 desc = &hp->md5_desc;
1145
1146 if (crypto_hash_init(desc))
1147 goto clear_hash;
1148
1149 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1150 goto clear_hash;
1151 if (tcp_md5_hash_header(hp, th))
1152 goto clear_hash;
1153 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1154 goto clear_hash;
1155 if (tcp_md5_hash_key(hp, key))
1156 goto clear_hash;
1157 if (crypto_hash_final(desc, md5_hash))
1158 goto clear_hash;
1159
1160 tcp_put_md5sig_pool();
1161 return 0;
1162
1163clear_hash:
1164 tcp_put_md5sig_pool();
1165clear_hash_noput:
1166 memset(md5_hash, 0, 16);
1167 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001168}
Adam Langley49a72df2008-07-19 00:01:42 -07001169EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001170
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001171static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
1172 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001173{
1174 /*
1175 * This gets called for each TCP segment that arrives
1176 * so we want to be efficient.
1177 * We have 3 drop cases:
1178 * o No MD5 hash and one expected.
1179 * o MD5 hash and we're not expecting one.
1180 * o MD5 hash and its wrong.
1181 */
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001182 const __u8 *hash_location = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001183 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001184 const struct iphdr *iph = ip_hdr(skb);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001185 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001186 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001187 unsigned char newhash[16];
1188
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001189 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1190 AF_INET);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001191 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001192
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001193 /* We've parsed the options - do we have a hash? */
1194 if (!hash_expected && !hash_location)
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001195 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001196
1197 if (hash_expected && !hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001198 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001199 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001200 }
1201
1202 if (!hash_expected && hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001203 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001204 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001205 }
1206
1207 /* Okay, so this is hash_expected and hash_location -
1208 * so we need to calculate the checksum.
1209 */
Adam Langley49a72df2008-07-19 00:01:42 -07001210 genhash = tcp_v4_md5_hash_skb(newhash,
1211 hash_expected,
1212 NULL, NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001213
1214 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
Joe Perchese87cc472012-05-13 21:56:26 +00001215 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1216 &iph->saddr, ntohs(th->source),
1217 &iph->daddr, ntohs(th->dest),
1218 genhash ? " tcp_v4_calc_md5_hash failed"
1219 : "");
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001220 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001221 }
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001222 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001223}
1224
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001225static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1226{
1227 bool ret;
1228
1229 rcu_read_lock();
1230 ret = __tcp_v4_inbound_md5_hash(sk, skb);
1231 rcu_read_unlock();
1232
1233 return ret;
1234}
1235
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001236#endif
1237
Octavian Purdila16bea702014-06-25 17:09:53 +03001238static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1239 struct sk_buff *skb)
1240{
1241 struct inet_request_sock *ireq = inet_rsk(req);
1242
1243 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1244 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1245 ireq->no_srccheck = inet_sk(sk)->transparent;
1246 ireq->opt = tcp_v4_save_options(skb);
1247}
1248
Octavian Purdilad94e0412014-06-25 17:09:55 +03001249static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1250 const struct request_sock *req,
1251 bool *strict)
1252{
1253 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1254
1255 if (strict) {
1256 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1257 *strict = true;
1258 else
1259 *strict = false;
1260 }
1261
1262 return dst;
1263}
1264
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001265struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001267 .obj_size = sizeof(struct tcp_request_sock),
Octavian Purdila5db92c92014-06-25 17:09:59 +03001268 .rtx_syn_ack = tcp_rtx_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001269 .send_ack = tcp_v4_reqsk_send_ack,
1270 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 .send_reset = tcp_v4_send_reset,
stephen hemminger688d1942014-08-29 23:32:05 -07001272 .syn_ack_timeout = tcp_syn_ack_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273};
1274
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001275static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
Octavian Purdila2aec4a22014-06-25 17:10:00 +03001276 .mss_clamp = TCP_MSS_DEFAULT,
Octavian Purdila16bea702014-06-25 17:09:53 +03001277#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001278 .md5_lookup = tcp_v4_reqsk_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001279 .calc_md5_hash = tcp_v4_md5_hash_skb,
Andrew Mortonb6332e62006-11-30 19:16:28 -08001280#endif
Octavian Purdila16bea702014-06-25 17:09:53 +03001281 .init_req = tcp_v4_init_req,
Octavian Purdilafb7b37a2014-06-25 17:09:54 +03001282#ifdef CONFIG_SYN_COOKIES
1283 .cookie_init_seq = cookie_v4_init_sequence,
1284#endif
Octavian Purdilad94e0412014-06-25 17:09:55 +03001285 .route_req = tcp_v4_route_req,
Octavian Purdila936b8bd2014-06-25 17:09:57 +03001286 .init_seq = tcp_v4_init_sequence,
Octavian Purdilad6274bd2014-06-25 17:09:58 +03001287 .send_synack = tcp_v4_send_synack,
Octavian Purdila695da142014-06-25 17:10:01 +03001288 .queue_hash_add = inet_csk_reqsk_queue_hash_add,
Octavian Purdila16bea702014-06-25 17:09:53 +03001289};
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001290
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1292{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001294 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 goto drop;
1296
Octavian Purdila1fb6f152014-06-25 17:10:02 +03001297 return tcp_conn_request(&tcp_request_sock_ops,
1298 &tcp_request_sock_ipv4_ops, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300drop:
Vijay Subramanian848bf152013-01-31 08:24:06 +00001301 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 return 0;
1303}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001304EXPORT_SYMBOL(tcp_v4_conn_request);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305
1306
1307/*
1308 * The three way handshake has completed - we got a valid synack -
1309 * now create the new socket.
1310 */
1311struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001312 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 struct dst_entry *dst)
1314{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001315 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 struct inet_sock *newinet;
1317 struct tcp_sock *newtp;
1318 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001319#ifdef CONFIG_TCP_MD5SIG
1320 struct tcp_md5sig_key *key;
1321#endif
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001322 struct ip_options_rcu *inet_opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
1324 if (sk_acceptq_is_full(sk))
1325 goto exit_overflow;
1326
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 newsk = tcp_create_openreq_child(sk, req, skb);
1328 if (!newsk)
Balazs Scheidler093d2822010-10-21 13:06:43 +02001329 goto exit_nonewsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
Herbert Xubcd76112006-06-30 13:36:35 -07001331 newsk->sk_gso_type = SKB_GSO_TCPV4;
Neal Cardwellfae6ef82012-08-19 03:30:38 +00001332 inet_sk_rx_dst_set(newsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
1334 newtp = tcp_sk(newsk);
1335 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001336 ireq = inet_rsk(req);
Eric Dumazet634fb9792013-10-09 15:21:29 -07001337 newinet->inet_daddr = ireq->ir_rmt_addr;
1338 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1339 newinet->inet_saddr = ireq->ir_loc_addr;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001340 inet_opt = ireq->opt;
1341 rcu_assign_pointer(newinet->inet_opt, inet_opt);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001342 ireq->opt = NULL;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001343 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001344 newinet->mc_ttl = ip_hdr(skb)->ttl;
Jiri Benc4c507d22012-02-09 09:35:49 +00001345 newinet->rcv_tos = ip_hdr(skb)->tos;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001346 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Tom Herbertb73c3d02014-07-01 21:32:17 -07001347 inet_set_txhash(newsk);
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001348 if (inet_opt)
1349 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001350 newinet->inet_id = newtp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
Eric Dumazetdfd25ff2012-03-10 09:20:21 +00001352 if (!dst) {
1353 dst = inet_csk_route_child_sock(sk, newsk, req);
1354 if (!dst)
1355 goto put_and_exit;
1356 } else {
1357 /* syncookie case : see end of cookie_v4_check() */
1358 }
David S. Miller0e734412011-05-08 15:28:03 -07001359 sk_setup_caps(newsk, dst);
1360
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 tcp_sync_mss(newsk, dst_mtu(dst));
David S. Miller0dbaee32010-12-13 12:52:14 -08001362 newtp->advmss = dst_metric_advmss(dst);
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001363 if (tcp_sk(sk)->rx_opt.user_mss &&
1364 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1365 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1366
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 tcp_initialize_rcv_mss(newsk);
1368
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001369#ifdef CONFIG_TCP_MD5SIG
1370 /* Copy over the MD5 key from the original socket */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001371 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1372 AF_INET);
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001373 if (key != NULL) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001374 /*
1375 * We're using one, so create a matching key
1376 * on the newsk structure. If we fail to get
1377 * memory, then we end up not copying the key
1378 * across. Shucks.
1379 */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001380 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1381 AF_INET, key->key, key->keylen, GFP_ATOMIC);
Eric Dumazeta4654192010-05-16 00:36:33 -07001382 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001383 }
1384#endif
1385
David S. Miller0e734412011-05-08 15:28:03 -07001386 if (__inet_inherit_port(sk, newsk) < 0)
1387 goto put_and_exit;
Eric Dumazet9327f702009-12-04 03:46:54 +00001388 __inet_hash_nolisten(newsk, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389
1390 return newsk;
1391
1392exit_overflow:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001393 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Balazs Scheidler093d2822010-10-21 13:06:43 +02001394exit_nonewsk:
1395 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396exit:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001397 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 return NULL;
David S. Miller0e734412011-05-08 15:28:03 -07001399put_and_exit:
Christoph Paasche337e242012-12-14 04:07:58 +00001400 inet_csk_prepare_forced_close(newsk);
1401 tcp_done(newsk);
David S. Miller0e734412011-05-08 15:28:03 -07001402 goto exit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001404EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405
1406static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1407{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001408 struct tcphdr *th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001409 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 struct sock *nsk;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001411 struct request_sock **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001413 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1414 iph->saddr, iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 if (req)
Jerry Chu83368862012-08-31 12:29:12 +00001416 return tcp_check_req(sk, skb, req, prev, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001418 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001419 th->source, iph->daddr, th->dest, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420
1421 if (nsk) {
1422 if (nsk->sk_state != TCP_TIME_WAIT) {
1423 bh_lock_sock(nsk);
1424 return nsk;
1425 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001426 inet_twsk_put(inet_twsk(nsk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 return NULL;
1428 }
1429
1430#ifdef CONFIG_SYN_COOKIES
Florian Westphalaf9b4732010-06-03 00:43:44 +00001431 if (!th->syn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1433#endif
1434 return sk;
1435}
1436
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437/* The socket must have it's spinlock held when we get
1438 * here.
1439 *
1440 * We have a potential double-lock case here, so even when
1441 * doing backlog processing we use the BH locking scheme.
1442 * This is because we cannot sleep with the original spinlock
1443 * held.
1444 */
1445int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1446{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001447 struct sock *rsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001448
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
Eric Dumazet404e0a82012-07-29 23:20:37 +00001450 struct dst_entry *dst = sk->sk_rx_dst;
1451
Tom Herbertbdeab992011-08-14 19:45:55 +00001452 sock_rps_save_rxhash(sk, skb);
Eric Dumazet404e0a82012-07-29 23:20:37 +00001453 if (dst) {
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001454 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1455 dst->ops->check(dst, 0) == NULL) {
David S. Miller92101b32012-07-23 16:29:00 -07001456 dst_release(dst);
1457 sk->sk_rx_dst = NULL;
1458 }
1459 }
Vijay Subramanianc995ae22013-09-03 12:23:22 -07001460 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461 return 0;
1462 }
1463
Arnaldo Carvalho de Meloab6a5bb2007-03-18 17:43:48 -07001464 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 goto csum_err;
1466
1467 if (sk->sk_state == TCP_LISTEN) {
1468 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1469 if (!nsk)
1470 goto discard;
1471
1472 if (nsk != sk) {
Tom Herbertbdeab992011-08-14 19:45:55 +00001473 sock_rps_save_rxhash(nsk, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001474 if (tcp_child_process(sk, nsk, skb)) {
1475 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001477 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 return 0;
1479 }
Eric Dumazetca551582010-06-03 09:03:58 +00001480 } else
Tom Herbertbdeab992011-08-14 19:45:55 +00001481 sock_rps_save_rxhash(sk, skb);
Eric Dumazetca551582010-06-03 09:03:58 +00001482
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001483 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001484 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001486 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 return 0;
1488
1489reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001490 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491discard:
1492 kfree_skb(skb);
1493 /* Be careful here. If this function gets more complicated and
1494 * gcc suffers from register pressure on the x86, sk (in %ebx)
1495 * might be destroyed here. This current version compiles correctly,
1496 * but you have been warned.
1497 */
1498 return 0;
1499
1500csum_err:
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001501 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001502 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 goto discard;
1504}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001505EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506
David S. Miller160eb5a2012-06-27 22:01:22 -07001507void tcp_v4_early_demux(struct sk_buff *skb)
David S. Miller41063e92012-06-19 21:22:05 -07001508{
David S. Miller41063e92012-06-19 21:22:05 -07001509 const struct iphdr *iph;
1510 const struct tcphdr *th;
1511 struct sock *sk;
David S. Miller41063e92012-06-19 21:22:05 -07001512
David S. Miller41063e92012-06-19 21:22:05 -07001513 if (skb->pkt_type != PACKET_HOST)
David S. Miller160eb5a2012-06-27 22:01:22 -07001514 return;
David S. Miller41063e92012-06-19 21:22:05 -07001515
Eric Dumazet45f00f92012-10-22 21:42:47 +00001516 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
David S. Miller160eb5a2012-06-27 22:01:22 -07001517 return;
David S. Miller41063e92012-06-19 21:22:05 -07001518
1519 iph = ip_hdr(skb);
Eric Dumazet45f00f92012-10-22 21:42:47 +00001520 th = tcp_hdr(skb);
David S. Miller41063e92012-06-19 21:22:05 -07001521
1522 if (th->doff < sizeof(struct tcphdr) / 4)
David S. Miller160eb5a2012-06-27 22:01:22 -07001523 return;
David S. Miller41063e92012-06-19 21:22:05 -07001524
Eric Dumazet45f00f92012-10-22 21:42:47 +00001525 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
David S. Miller41063e92012-06-19 21:22:05 -07001526 iph->saddr, th->source,
Vijay Subramanian7011d082012-06-23 17:38:10 +00001527 iph->daddr, ntohs(th->dest),
Eric Dumazet9cb429d2012-07-24 01:19:31 +00001528 skb->skb_iif);
David S. Miller41063e92012-06-19 21:22:05 -07001529 if (sk) {
1530 skb->sk = sk;
1531 skb->destructor = sock_edemux;
1532 if (sk->sk_state != TCP_TIME_WAIT) {
1533 struct dst_entry *dst = sk->sk_rx_dst;
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001534
David S. Miller41063e92012-06-19 21:22:05 -07001535 if (dst)
1536 dst = dst_check(dst, 0);
David S. Miller92101b32012-07-23 16:29:00 -07001537 if (dst &&
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001538 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
David S. Miller92101b32012-07-23 16:29:00 -07001539 skb_dst_set_noref(skb, dst);
David S. Miller41063e92012-06-19 21:22:05 -07001540 }
1541 }
David S. Miller41063e92012-06-19 21:22:05 -07001542}
1543
Eric Dumazetb2fb4f52013-03-06 12:58:01 +00001544/* Packet is added to VJ-style prequeue for processing in process
1545 * context, if a reader task is waiting. Apparently, this exciting
1546 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1547 * failed somewhere. Latency? Burstiness? Well, at least now we will
1548 * see, why it failed. 8)8) --ANK
1549 *
1550 */
1551bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1552{
1553 struct tcp_sock *tp = tcp_sk(sk);
1554
1555 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1556 return false;
1557
1558 if (skb->len <= tcp_hdrlen(skb) &&
1559 skb_queue_len(&tp->ucopy.prequeue) == 0)
1560 return false;
1561
Eric Dumazetca777ef2014-09-08 08:06:07 -07001562 /* Before escaping RCU protected region, we need to take care of skb
1563 * dst. Prequeue is only enabled for established sockets.
1564 * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1565 * Instead of doing full sk_rx_dst validity here, let's perform
1566 * an optimistic check.
1567 */
1568 if (likely(sk->sk_rx_dst))
1569 skb_dst_drop(skb);
1570 else
1571 skb_dst_force(skb);
1572
Eric Dumazetb2fb4f52013-03-06 12:58:01 +00001573 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1574 tp->ucopy.memory += skb->truesize;
1575 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1576 struct sk_buff *skb1;
1577
1578 BUG_ON(sock_owned_by_user(sk));
1579
1580 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1581 sk_backlog_rcv(sk, skb1);
1582 NET_INC_STATS_BH(sock_net(sk),
1583 LINUX_MIB_TCPPREQUEUEDROPPED);
1584 }
1585
1586 tp->ucopy.memory = 0;
1587 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1588 wake_up_interruptible_sync_poll(sk_sleep(sk),
1589 POLLIN | POLLRDNORM | POLLRDBAND);
1590 if (!inet_csk_ack_scheduled(sk))
1591 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1592 (3 * tcp_rto_min(sk)) / 4,
1593 TCP_RTO_MAX);
1594 }
1595 return true;
1596}
1597EXPORT_SYMBOL(tcp_prequeue);
1598
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599/*
1600 * From tcp_input.c
1601 */
1602
1603int tcp_v4_rcv(struct sk_buff *skb)
1604{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001605 const struct iphdr *iph;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001606 const struct tcphdr *th;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 struct sock *sk;
1608 int ret;
Pavel Emelyanova86b1e32008-07-16 20:20:58 -07001609 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610
1611 if (skb->pkt_type != PACKET_HOST)
1612 goto discard_it;
1613
1614 /* Count it even if it's bad */
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001615 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616
1617 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1618 goto discard_it;
1619
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001620 th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621
1622 if (th->doff < sizeof(struct tcphdr) / 4)
1623 goto bad_packet;
1624 if (!pskb_may_pull(skb, th->doff * 4))
1625 goto discard_it;
1626
1627 /* An explanation is required here, I think.
1628 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001629 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 * So, we defer the checks. */
Tom Herberted70fcf2014-05-02 16:29:38 -07001631
1632 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001633 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001635 th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001636 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1638 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1639 skb->len - th->doff * 4);
1640 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
Eric Dumazet04317da2014-09-05 15:33:32 -07001641 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
Eric Dumazetb82d1bb2011-09-27 02:20:08 -04001642 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 TCP_SKB_CB(skb)->sacked = 0;
1644
Arnaldo Carvalho de Melo9a1f27c2008-10-07 11:41:57 -07001645 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 if (!sk)
1647 goto no_tcp_socket;
1648
Eric Dumazetbb134d52010-03-09 05:55:56 +00001649process:
1650 if (sk->sk_state == TCP_TIME_WAIT)
1651 goto do_time_wait;
1652
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001653 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1654 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001655 goto discard_and_relse;
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001656 }
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001657
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1659 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001660
1661#ifdef CONFIG_TCP_MD5SIG
1662 /*
1663 * We really want to reject the packet as early as possible
1664 * if:
1665 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1666 * o There is an MD5 option and we're not expecting one
1667 */
1668 if (tcp_v4_inbound_md5_hash(sk, skb))
1669 goto discard_and_relse;
1670#endif
1671
Patrick McHardyb59c2702006-01-06 23:06:10 -08001672 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001674 if (sk_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 goto discard_and_relse;
1676
Eliezer Tamir8b80cda2013-07-10 17:13:26 +03001677 sk_mark_napi_id(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 skb->dev = NULL;
1679
Ingo Molnarc6366182006-07-03 00:25:13 -07001680 bh_lock_sock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 ret = 0;
1682 if (!sock_owned_by_user(sk)) {
Chris Leech1a2449a2006-05-23 18:05:53 -07001683#ifdef CONFIG_NET_DMA
1684 struct tcp_sock *tp = tcp_sk(sk);
1685 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
Dave Jianga2bd1142012-04-04 16:10:46 -07001686 tp->ucopy.dma_chan = net_dma_find_channel();
Chris Leech1a2449a2006-05-23 18:05:53 -07001687 if (tp->ucopy.dma_chan)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001689 else
1690#endif
1691 {
1692 if (!tcp_prequeue(sk, skb))
Shan Weiae8d7f82009-05-05 01:01:29 +00001693 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001694 }
Eric Dumazetda882c12012-04-22 23:38:54 +00001695 } else if (unlikely(sk_add_backlog(sk, skb,
1696 sk->sk_rcvbuf + sk->sk_sndbuf))) {
Zhu Yi6b03a532010-03-04 18:01:41 +00001697 bh_unlock_sock(sk);
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001698 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
Zhu Yi6b03a532010-03-04 18:01:41 +00001699 goto discard_and_relse;
1700 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 bh_unlock_sock(sk);
1702
1703 sock_put(sk);
1704
1705 return ret;
1706
1707no_tcp_socket:
1708 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1709 goto discard_it;
1710
1711 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001712csum_error:
1713 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714bad_packet:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001715 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001717 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 }
1719
1720discard_it:
1721 /* Discard frame. */
1722 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001723 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724
1725discard_and_relse:
1726 sock_put(sk);
1727 goto discard_it;
1728
1729do_time_wait:
1730 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001731 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 goto discard_it;
1733 }
1734
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001735 if (skb->len < (th->doff << 2)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001736 inet_twsk_put(inet_twsk(sk));
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001737 goto bad_packet;
1738 }
1739 if (tcp_checksum_complete(skb)) {
1740 inet_twsk_put(inet_twsk(sk));
1741 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001743 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001745 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001746 &tcp_hashinfo,
Tom Herbertda5e3632013-01-22 09:50:24 +00001747 iph->saddr, th->source,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001748 iph->daddr, th->dest,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001749 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 if (sk2) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001751 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1752 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 sk = sk2;
1754 goto process;
1755 }
1756 /* Fall through to ACK */
1757 }
1758 case TCP_TW_ACK:
1759 tcp_v4_timewait_ack(sk, skb);
1760 break;
1761 case TCP_TW_RST:
1762 goto no_tcp_socket;
1763 case TCP_TW_SUCCESS:;
1764 }
1765 goto discard_it;
1766}
1767
David S. Millerccb7c412010-12-01 18:09:13 -08001768static struct timewait_sock_ops tcp_timewait_sock_ops = {
1769 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1770 .twsk_unique = tcp_twsk_unique,
1771 .twsk_destructor= tcp_twsk_destructor,
David S. Millerccb7c412010-12-01 18:09:13 -08001772};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
Eric Dumazet63d02d12012-08-09 14:11:00 +00001774void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
Eric Dumazet5d299f32012-08-06 05:09:33 +00001775{
1776 struct dst_entry *dst = skb_dst(skb);
1777
Eric Dumazetca777ef2014-09-08 08:06:07 -07001778 if (dst) {
1779 dst_hold(dst);
1780 sk->sk_rx_dst = dst;
1781 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1782 }
Eric Dumazet5d299f32012-08-06 05:09:33 +00001783}
Eric Dumazet63d02d12012-08-09 14:11:00 +00001784EXPORT_SYMBOL(inet_sk_rx_dst_set);
Eric Dumazet5d299f32012-08-06 05:09:33 +00001785
Stephen Hemminger3b401a82009-09-01 19:25:04 +00001786const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001787 .queue_xmit = ip_queue_xmit,
1788 .send_check = tcp_v4_send_check,
1789 .rebuild_header = inet_sk_rebuild_header,
Eric Dumazet5d299f32012-08-06 05:09:33 +00001790 .sk_rx_dst_set = inet_sk_rx_dst_set,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001791 .conn_request = tcp_v4_conn_request,
1792 .syn_recv_sock = tcp_v4_syn_recv_sock,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001793 .net_header_len = sizeof(struct iphdr),
1794 .setsockopt = ip_setsockopt,
1795 .getsockopt = ip_getsockopt,
1796 .addr2sockaddr = inet_csk_addr2sockaddr,
1797 .sockaddr_len = sizeof(struct sockaddr_in),
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001798 .bind_conflict = inet_csk_bind_conflict,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001799#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001800 .compat_setsockopt = compat_ip_setsockopt,
1801 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001802#endif
Neal Cardwell4fab9072014-08-14 12:40:05 -04001803 .mtu_reduced = tcp_v4_mtu_reduced,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001805EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001807#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001808static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001809 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07001810 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001811 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001812};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001813#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001814
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815/* NOTE: A lot of things set to zero explicitly by call to
1816 * sk_alloc() so need not be done here.
1817 */
1818static int tcp_v4_init_sock(struct sock *sk)
1819{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001820 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821
Neal Cardwell900f65d2012-04-19 09:55:21 +00001822 tcp_init_sock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001824 icsk->icsk_af_ops = &ipv4_specific;
Neal Cardwell900f65d2012-04-19 09:55:21 +00001825
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001826#ifdef CONFIG_TCP_MD5SIG
David S. Millerac807fa2012-04-23 03:21:58 -04001827 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001828#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 return 0;
1831}
1832
Brian Haley7d06b2e2008-06-14 17:04:49 -07001833void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834{
1835 struct tcp_sock *tp = tcp_sk(sk);
1836
1837 tcp_clear_xmit_timers(sk);
1838
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001839 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07001840
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08001842 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843
1844 /* Cleans up our, hopefully empty, out_of_order_queue. */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001845 __skb_queue_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001847#ifdef CONFIG_TCP_MD5SIG
1848 /* Clean up the MD5 key list, if any */
1849 if (tp->md5sig_info) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001850 tcp_clear_md5_list(sk);
Eric Dumazeta8afca02012-01-31 18:45:40 +00001851 kfree_rcu(tp->md5sig_info, rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001852 tp->md5sig_info = NULL;
1853 }
1854#endif
1855
Chris Leech1a2449a2006-05-23 18:05:53 -07001856#ifdef CONFIG_NET_DMA
1857 /* Cleans up our sk_async_wait_queue */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001858 __skb_queue_purge(&sk->sk_async_wait_queue);
Chris Leech1a2449a2006-05-23 18:05:53 -07001859#endif
1860
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 /* Clean prequeue, it must be empty really */
1862 __skb_queue_purge(&tp->ucopy.prequeue);
1863
1864 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001865 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001866 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
Jerry Chu168a8f52012-08-31 12:29:13 +00001868 BUG_ON(tp->fastopen_rsk != NULL);
William Allen Simpson435cf552009-12-02 18:17:05 +00001869
Yuchung Chengcf60af02012-07-19 06:43:09 +00001870 /* If socket is aborted during connect operation */
1871 tcp_free_fastopen_req(tp);
1872
Glauber Costa180d8cd2011-12-11 21:47:02 +00001873 sk_sockets_allocated_dec(sk);
Glauber Costad1a4c0b2011-12-11 21:47:04 +00001874 sock_release_memcg(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876EXPORT_SYMBOL(tcp_v4_destroy_sock);
1877
1878#ifdef CONFIG_PROC_FS
1879/* Proc filesystem TCP sock list dumping. */
1880
Tom Herberta8b690f2010-06-07 00:43:42 -07001881/*
1882 * Get next listener socket follow cur. If cur is NULL, get first socket
1883 * starting from bucket given in st->bucket; when st->bucket is zero the
1884 * very first socket in the hash table is returned.
1885 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886static void *listening_get_next(struct seq_file *seq, void *cur)
1887{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001888 struct inet_connection_sock *icsk;
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001889 struct hlist_nulls_node *node;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 struct sock *sk = cur;
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001891 struct inet_listen_hashbucket *ilb;
Jianjun Kong5799de02008-11-03 02:49:10 -08001892 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001893 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894
1895 if (!sk) {
Tom Herberta8b690f2010-06-07 00:43:42 -07001896 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001897 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001898 sk = sk_nulls_head(&ilb->head);
Tom Herberta8b690f2010-06-07 00:43:42 -07001899 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 goto get_sk;
1901 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001902 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07001904 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905
1906 if (st->state == TCP_SEQ_STATE_OPENREQ) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001907 struct request_sock *req = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001909 icsk = inet_csk(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 req = req->dl_next;
1911 while (1) {
1912 while (req) {
Daniel Lezcanobdccc4c2008-07-19 00:15:13 -07001913 if (req->rsk_ops->family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 cur = req;
1915 goto out;
1916 }
1917 req = req->dl_next;
1918 }
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001919 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 break;
1921get_req:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001922 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923 }
Eric Dumazet1bde5ac2010-12-23 09:32:46 -08001924 sk = sk_nulls_next(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925 st->state = TCP_SEQ_STATE_LISTENING;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001926 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 } else {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001928 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001929 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1930 if (reqsk_queue_len(&icsk->icsk_accept_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 goto start_req;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001932 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Eric Dumazet1bde5ac2010-12-23 09:32:46 -08001933 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 }
1935get_sk:
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001936 sk_nulls_for_each_from(sk, node) {
Pavel Emelyanov8475ef92010-11-22 03:26:12 +00001937 if (!net_eq(sock_net(sk), net))
1938 continue;
1939 if (sk->sk_family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 cur = sk;
1941 goto out;
1942 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001943 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001944 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1945 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946start_req:
1947 st->uid = sock_i_uid(sk);
1948 st->syn_wait_sk = sk;
1949 st->state = TCP_SEQ_STATE_OPENREQ;
1950 st->sbucket = 0;
1951 goto get_req;
1952 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001953 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001955 spin_unlock_bh(&ilb->lock);
Tom Herberta8b690f2010-06-07 00:43:42 -07001956 st->offset = 0;
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07001957 if (++st->bucket < INET_LHTABLE_SIZE) {
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001958 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1959 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001960 sk = sk_nulls_head(&ilb->head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 goto get_sk;
1962 }
1963 cur = NULL;
1964out:
1965 return cur;
1966}
1967
1968static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1969{
Tom Herberta8b690f2010-06-07 00:43:42 -07001970 struct tcp_iter_state *st = seq->private;
1971 void *rc;
1972
1973 st->bucket = 0;
1974 st->offset = 0;
1975 rc = listening_get_next(seq, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976
1977 while (rc && *pos) {
1978 rc = listening_get_next(seq, rc);
1979 --*pos;
1980 }
1981 return rc;
1982}
1983
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07001984static inline bool empty_bucket(const struct tcp_iter_state *st)
Andi Kleen6eac5602008-08-28 01:08:02 -07001985{
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07001986 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
Andi Kleen6eac5602008-08-28 01:08:02 -07001987}
1988
Tom Herberta8b690f2010-06-07 00:43:42 -07001989/*
1990 * Get first established socket starting from bucket given in st->bucket.
1991 * If st->bucket is zero, the very first socket in the hash is returned.
1992 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993static void *established_get_first(struct seq_file *seq)
1994{
Jianjun Kong5799de02008-11-03 02:49:10 -08001995 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001996 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 void *rc = NULL;
1998
Tom Herberta8b690f2010-06-07 00:43:42 -07001999 st->offset = 0;
2000 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002002 struct hlist_nulls_node *node;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002003 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004
Andi Kleen6eac5602008-08-28 01:08:02 -07002005 /* Lockless fast path for the common case of empty buckets */
2006 if (empty_bucket(st))
2007 continue;
2008
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002009 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002010 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002011 if (sk->sk_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002012 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 continue;
2014 }
2015 rc = sk;
2016 goto out;
2017 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002018 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 }
2020out:
2021 return rc;
2022}
2023
2024static void *established_get_next(struct seq_file *seq, void *cur)
2025{
2026 struct sock *sk = cur;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002027 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002028 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002029 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030
2031 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002032 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002034 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002036 sk_nulls_for_each_from(sk, node) {
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002037 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002038 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 }
2040
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002041 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2042 ++st->bucket;
2043 return established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044}
2045
2046static void *established_get_idx(struct seq_file *seq, loff_t pos)
2047{
Tom Herberta8b690f2010-06-07 00:43:42 -07002048 struct tcp_iter_state *st = seq->private;
2049 void *rc;
2050
2051 st->bucket = 0;
2052 rc = established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053
2054 while (rc && pos) {
2055 rc = established_get_next(seq, rc);
2056 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002057 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058 return rc;
2059}
2060
2061static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2062{
2063 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002064 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066 st->state = TCP_SEQ_STATE_LISTENING;
2067 rc = listening_get_idx(seq, &pos);
2068
2069 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 st->state = TCP_SEQ_STATE_ESTABLISHED;
2071 rc = established_get_idx(seq, pos);
2072 }
2073
2074 return rc;
2075}
2076
Tom Herberta8b690f2010-06-07 00:43:42 -07002077static void *tcp_seek_last_pos(struct seq_file *seq)
2078{
2079 struct tcp_iter_state *st = seq->private;
2080 int offset = st->offset;
2081 int orig_num = st->num;
2082 void *rc = NULL;
2083
2084 switch (st->state) {
2085 case TCP_SEQ_STATE_OPENREQ:
2086 case TCP_SEQ_STATE_LISTENING:
2087 if (st->bucket >= INET_LHTABLE_SIZE)
2088 break;
2089 st->state = TCP_SEQ_STATE_LISTENING;
2090 rc = listening_get_next(seq, NULL);
2091 while (offset-- && rc)
2092 rc = listening_get_next(seq, rc);
2093 if (rc)
2094 break;
2095 st->bucket = 0;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002096 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002097 /* Fallthrough */
2098 case TCP_SEQ_STATE_ESTABLISHED:
Tom Herberta8b690f2010-06-07 00:43:42 -07002099 if (st->bucket > tcp_hashinfo.ehash_mask)
2100 break;
2101 rc = established_get_first(seq);
2102 while (offset-- && rc)
2103 rc = established_get_next(seq, rc);
2104 }
2105
2106 st->num = orig_num;
2107
2108 return rc;
2109}
2110
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2112{
Jianjun Kong5799de02008-11-03 02:49:10 -08002113 struct tcp_iter_state *st = seq->private;
Tom Herberta8b690f2010-06-07 00:43:42 -07002114 void *rc;
2115
2116 if (*pos && *pos == st->last_pos) {
2117 rc = tcp_seek_last_pos(seq);
2118 if (rc)
2119 goto out;
2120 }
2121
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 st->state = TCP_SEQ_STATE_LISTENING;
2123 st->num = 0;
Tom Herberta8b690f2010-06-07 00:43:42 -07002124 st->bucket = 0;
2125 st->offset = 0;
2126 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2127
2128out:
2129 st->last_pos = *pos;
2130 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131}
2132
2133static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2134{
Tom Herberta8b690f2010-06-07 00:43:42 -07002135 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136 void *rc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137
2138 if (v == SEQ_START_TOKEN) {
2139 rc = tcp_get_idx(seq, 0);
2140 goto out;
2141 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142
2143 switch (st->state) {
2144 case TCP_SEQ_STATE_OPENREQ:
2145 case TCP_SEQ_STATE_LISTENING:
2146 rc = listening_get_next(seq, v);
2147 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002149 st->bucket = 0;
2150 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 rc = established_get_first(seq);
2152 }
2153 break;
2154 case TCP_SEQ_STATE_ESTABLISHED:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 rc = established_get_next(seq, v);
2156 break;
2157 }
2158out:
2159 ++*pos;
Tom Herberta8b690f2010-06-07 00:43:42 -07002160 st->last_pos = *pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 return rc;
2162}
2163
2164static void tcp_seq_stop(struct seq_file *seq, void *v)
2165{
Jianjun Kong5799de02008-11-03 02:49:10 -08002166 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167
2168 switch (st->state) {
2169 case TCP_SEQ_STATE_OPENREQ:
2170 if (v) {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002171 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2172 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 }
2174 case TCP_SEQ_STATE_LISTENING:
2175 if (v != SEQ_START_TOKEN)
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002176 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178 case TCP_SEQ_STATE_ESTABLISHED:
2179 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002180 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181 break;
2182 }
2183}
2184
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002185int tcp_seq_open(struct inode *inode, struct file *file)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186{
Al Virod9dda782013-03-31 18:16:14 -04002187 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 struct tcp_iter_state *s;
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002189 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002191 err = seq_open_net(inode, file, &afinfo->seq_ops,
2192 sizeof(struct tcp_iter_state));
2193 if (err < 0)
2194 return err;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002195
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002196 s = ((struct seq_file *)file->private_data)->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 s->family = afinfo->family;
stephen hemminger688d1942014-08-29 23:32:05 -07002198 s->last_pos = 0;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002199 return 0;
2200}
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002201EXPORT_SYMBOL(tcp_seq_open);
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002202
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002203int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204{
2205 int rc = 0;
2206 struct proc_dir_entry *p;
2207
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002208 afinfo->seq_ops.start = tcp_seq_start;
2209 afinfo->seq_ops.next = tcp_seq_next;
2210 afinfo->seq_ops.stop = tcp_seq_stop;
2211
Denis V. Lunev84841c32008-05-02 04:10:08 -07002212 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002213 afinfo->seq_fops, afinfo);
Denis V. Lunev84841c32008-05-02 04:10:08 -07002214 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 rc = -ENOMEM;
2216 return rc;
2217}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002218EXPORT_SYMBOL(tcp_proc_register);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002220void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221{
Gao fengece31ff2013-02-18 01:34:56 +00002222 remove_proc_entry(afinfo->name, net->proc_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002224EXPORT_SYMBOL(tcp_proc_unregister);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002226static void get_openreq4(const struct sock *sk, const struct request_sock *req,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002227 struct seq_file *f, int i, kuid_t uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002229 const struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazeta399a802012-08-08 21:13:53 +00002230 long delta = req->expires - jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002232 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002233 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 i,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002235 ireq->ir_loc_addr,
Eric Dumazetc720c7e2009-10-15 06:30:45 +00002236 ntohs(inet_sk(sk)->inet_sport),
Eric Dumazet634fb9792013-10-09 15:21:29 -07002237 ireq->ir_rmt_addr,
2238 ntohs(ireq->ir_rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 TCP_SYN_RECV,
2240 0, 0, /* could print option size, but that is af dependent. */
2241 1, /* timers active (only the expire timer) */
Eric Dumazeta399a802012-08-08 21:13:53 +00002242 jiffies_delta_to_clock_t(delta),
Eric Dumazete6c022a2012-10-27 23:16:46 +00002243 req->num_timeout,
Eric W. Biedermana7cb5a42012-05-24 01:10:10 -06002244 from_kuid_munged(seq_user_ns(f), uid),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 0, /* non standard timer */
2246 0, /* open_requests have no inode */
2247 atomic_read(&sk->sk_refcnt),
Tetsuo Handa652586d2013-11-14 14:31:57 -08002248 req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249}
2250
Tetsuo Handa652586d2013-11-14 14:31:57 -08002251static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252{
2253 int timer_active;
2254 unsigned long timer_expires;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002255 const struct tcp_sock *tp = tcp_sk(sk);
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002256 const struct inet_connection_sock *icsk = inet_csk(sk);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002257 const struct inet_sock *inet = inet_sk(sk);
Jerry Chu168a8f52012-08-31 12:29:13 +00002258 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
Eric Dumazetc720c7e2009-10-15 06:30:45 +00002259 __be32 dest = inet->inet_daddr;
2260 __be32 src = inet->inet_rcv_saddr;
2261 __u16 destp = ntohs(inet->inet_dport);
2262 __u16 srcp = ntohs(inet->inet_sport);
Eric Dumazet49d09002009-12-03 16:06:13 -08002263 int rx_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002265 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2266 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2267 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002269 timer_expires = icsk->icsk_timeout;
2270 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002272 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002273 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002275 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276 } else {
2277 timer_active = 0;
2278 timer_expires = jiffies;
2279 }
2280
Eric Dumazet49d09002009-12-03 16:06:13 -08002281 if (sk->sk_state == TCP_LISTEN)
2282 rx_queue = sk->sk_ack_backlog;
2283 else
2284 /*
2285 * because we dont lock socket, we might find a transient negative value
2286 */
2287 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2288
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002289 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Tetsuo Handa652586d2013-11-14 14:31:57 -08002290 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002291 i, src, srcp, dest, destp, sk->sk_state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002292 tp->write_seq - tp->snd_una,
Eric Dumazet49d09002009-12-03 16:06:13 -08002293 rx_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294 timer_active,
Eric Dumazeta399a802012-08-08 21:13:53 +00002295 jiffies_delta_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002296 icsk->icsk_retransmits,
Eric W. Biedermana7cb5a42012-05-24 01:10:10 -06002297 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002298 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002299 sock_i_ino(sk),
2300 atomic_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002301 jiffies_to_clock_t(icsk->icsk_rto),
2302 jiffies_to_clock_t(icsk->icsk_ack.ato),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002303 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304 tp->snd_cwnd,
Jerry Chu168a8f52012-08-31 12:29:13 +00002305 sk->sk_state == TCP_LISTEN ?
2306 (fastopenq ? fastopenq->max_qlen : 0) :
Tetsuo Handa652586d2013-11-14 14:31:57 -08002307 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308}
2309
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002310static void get_timewait4_sock(const struct inet_timewait_sock *tw,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002311 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312{
Al Viro23f33c22006-09-27 18:43:50 -07002313 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 __u16 destp, srcp;
Eric Dumazete2a1d3e2014-03-27 07:19:19 -07002315 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316
2317 dest = tw->tw_daddr;
2318 src = tw->tw_rcv_saddr;
2319 destp = ntohs(tw->tw_dport);
2320 srcp = ntohs(tw->tw_sport);
2321
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002322 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002323 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
Eric Dumazeta399a802012-08-08 21:13:53 +00002325 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002326 atomic_read(&tw->tw_refcnt), tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327}
2328
2329#define TMPSZ 150
2330
2331static int tcp4_seq_show(struct seq_file *seq, void *v)
2332{
Jianjun Kong5799de02008-11-03 02:49:10 -08002333 struct tcp_iter_state *st;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002334 struct sock *sk = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335
Tetsuo Handa652586d2013-11-14 14:31:57 -08002336 seq_setwidth(seq, TMPSZ - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 if (v == SEQ_START_TOKEN) {
Tetsuo Handa652586d2013-11-14 14:31:57 -08002338 seq_puts(seq, " sl local_address rem_address st tx_queue "
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339 "rx_queue tr tm->when retrnsmt uid timeout "
2340 "inode");
2341 goto out;
2342 }
2343 st = seq->private;
2344
2345 switch (st->state) {
2346 case TCP_SEQ_STATE_LISTENING:
2347 case TCP_SEQ_STATE_ESTABLISHED:
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002348 if (sk->sk_state == TCP_TIME_WAIT)
Tetsuo Handa652586d2013-11-14 14:31:57 -08002349 get_timewait4_sock(v, seq, st->num);
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002350 else
Tetsuo Handa652586d2013-11-14 14:31:57 -08002351 get_tcp4_sock(v, seq, st->num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 break;
2353 case TCP_SEQ_STATE_OPENREQ:
Tetsuo Handa652586d2013-11-14 14:31:57 -08002354 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357out:
Tetsuo Handa652586d2013-11-14 14:31:57 -08002358 seq_pad(seq, '\n');
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 return 0;
2360}
2361
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002362static const struct file_operations tcp_afinfo_seq_fops = {
2363 .owner = THIS_MODULE,
2364 .open = tcp_seq_open,
2365 .read = seq_read,
2366 .llseek = seq_lseek,
2367 .release = seq_release_net
2368};
2369
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 .name = "tcp",
2372 .family = AF_INET,
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002373 .seq_fops = &tcp_afinfo_seq_fops,
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002374 .seq_ops = {
2375 .show = tcp4_seq_show,
2376 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377};
2378
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002379static int __net_init tcp4_proc_init_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002380{
2381 return tcp_proc_register(net, &tcp4_seq_afinfo);
2382}
2383
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002384static void __net_exit tcp4_proc_exit_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002385{
2386 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2387}
2388
2389static struct pernet_operations tcp4_net_ops = {
2390 .init = tcp4_proc_init_net,
2391 .exit = tcp4_proc_exit_net,
2392};
2393
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394int __init tcp4_proc_init(void)
2395{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002396 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397}
2398
2399void tcp4_proc_exit(void)
2400{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002401 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402}
2403#endif /* CONFIG_PROC_FS */
2404
2405struct proto tcp_prot = {
2406 .name = "TCP",
2407 .owner = THIS_MODULE,
2408 .close = tcp_close,
2409 .connect = tcp_v4_connect,
2410 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002411 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412 .ioctl = tcp_ioctl,
2413 .init = tcp_v4_init_sock,
2414 .destroy = tcp_v4_destroy_sock,
2415 .shutdown = tcp_shutdown,
2416 .setsockopt = tcp_setsockopt,
2417 .getsockopt = tcp_getsockopt,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 .recvmsg = tcp_recvmsg,
Changli Gao7ba42912010-07-10 20:41:55 +00002419 .sendmsg = tcp_sendmsg,
2420 .sendpage = tcp_sendpage,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 .backlog_rcv = tcp_v4_do_rcv,
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002422 .release_cb = tcp_release_cb,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002423 .hash = inet_hash,
2424 .unhash = inet_unhash,
2425 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 .enter_memory_pressure = tcp_enter_memory_pressure,
Eric Dumazetc9bee3b72013-07-22 20:27:07 -07002427 .stream_memory_free = tcp_stream_memory_free,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002429 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430 .memory_allocated = &tcp_memory_allocated,
2431 .memory_pressure = &tcp_memory_pressure,
Eric W. Biedermana4fe34b2013-10-19 16:25:36 -07002432 .sysctl_mem = sysctl_tcp_mem,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 .sysctl_wmem = sysctl_tcp_wmem,
2434 .sysctl_rmem = sysctl_tcp_rmem,
2435 .max_header = MAX_TCP_HEADER,
2436 .obj_size = sizeof(struct tcp_sock),
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002437 .slab_flags = SLAB_DESTROY_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002438 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002439 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002440 .h.hashinfo = &tcp_hashinfo,
Changli Gao7ba42912010-07-10 20:41:55 +00002441 .no_autobind = true,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002442#ifdef CONFIG_COMPAT
2443 .compat_setsockopt = compat_tcp_setsockopt,
2444 .compat_getsockopt = compat_tcp_getsockopt,
2445#endif
Andrew Mortonc255a452012-07-31 16:43:02 -07002446#ifdef CONFIG_MEMCG_KMEM
Glauber Costad1a4c0b2011-12-11 21:47:04 +00002447 .init_cgroup = tcp_init_cgroup,
2448 .destroy_cgroup = tcp_destroy_cgroup,
2449 .proto_cgroup = tcp_proto_cgroup,
2450#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002452EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453
Denis V. Lunev046ee902008-04-03 14:31:33 -07002454static int __net_init tcp_sk_init(struct net *net)
2455{
Hannes Frederic Sowa5d134f12013-01-05 16:10:48 +00002456 net->ipv4.sysctl_tcp_ecn = 2;
Eric Dumazetbe9f4a42012-07-19 07:34:03 +00002457 return 0;
Denis V. Lunev046ee902008-04-03 14:31:33 -07002458}
2459
2460static void __net_exit tcp_sk_exit(struct net *net)
2461{
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002462}
2463
2464static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2465{
2466 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002467}
2468
2469static struct pernet_operations __net_initdata tcp_sk_ops = {
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002470 .init = tcp_sk_init,
2471 .exit = tcp_sk_exit,
2472 .exit_batch = tcp_sk_exit_batch,
Denis V. Lunev046ee902008-04-03 14:31:33 -07002473};
2474
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002475void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476{
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002477 inet_hashinfo_init(&tcp_hashinfo);
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002478 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480}