blob: 3f9bc3f0bba0f8b7e7fab8df084667384fba1732 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070037 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080040 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
Joe Perchesafd465032012-03-12 07:03:32 +000053#define pr_fmt(fmt) "TCP: " fmt
Linus Torvalds1da177e2005-04-16 15:20:36 -070054
Herbert Xueb4dea52008-12-29 23:04:08 -080055#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090064#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020066#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070068#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030070#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#include <net/ipv6.h>
72#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080073#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070074#include <net/xfrm.h>
Chris Leech1a2449a2006-05-23 18:05:53 -070075#include <net/netdma.h>
David S. Miller6e5714e2011-08-03 20:50:44 -070076#include <net/secure_seq.h>
Glauber Costad1a4c0b2011-12-11 21:47:04 +000077#include <net/tcp_memcontrol.h>
Eliezer Tamir076bb0c2013-07-10 17:13:17 +030078#include <net/busy_poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080086#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
Brian Haleyab32ea52006-09-22 14:15:41 -070089int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000091EXPORT_SYMBOL(sysctl_tcp_low_latency);
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080093#ifdef CONFIG_TCP_MD5SIG
Eric Dumazeta915da9b2012-01-31 05:18:33 +000094static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -040095 __be32 daddr, __be32 saddr, const struct tcphdr *th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080096#endif
97
Eric Dumazet5caea4e2008-11-20 00:40:07 -080098struct inet_hashinfo tcp_hashinfo;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000099EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
Octavian Purdila936b8bd2014-06-25 17:09:57 +0300101static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700103 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
104 ip_hdr(skb)->saddr,
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700105 tcp_hdr(skb)->dest,
106 tcp_hdr(skb)->source);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107}
108
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800109int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
110{
111 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
112 struct tcp_sock *tp = tcp_sk(sk);
113
114 /* With PAWS, it is safe from the viewpoint
115 of data integrity. Even without PAWS it is safe provided sequence
116 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
117
118 Actually, the idea is close to VJ's one, only timestamp cache is
119 held not per host, but per port pair and TW bucket is used as state
120 holder.
121
122 If TW bucket has been already destroyed we fall back to VJ's scheme
123 and use initial timestamp retrieved from peer table.
124 */
125 if (tcptw->tw_ts_recent_stamp &&
126 (twp == NULL || (sysctl_tcp_tw_reuse &&
James Morris9d729f72007-03-04 16:12:44 -0800127 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800128 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
129 if (tp->write_seq == 0)
130 tp->write_seq = 1;
131 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
132 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
133 sock_hold(sktw);
134 return 1;
135 }
136
137 return 0;
138}
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800139EXPORT_SYMBOL_GPL(tcp_twsk_unique);
140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141/* This will initiate an outgoing connection. */
142int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
143{
David S. Miller2d7192d2011-04-26 13:28:44 -0700144 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 struct inet_sock *inet = inet_sk(sk);
146 struct tcp_sock *tp = tcp_sk(sk);
David S. Millerdca8b082011-02-24 13:38:12 -0800147 __be16 orig_sport, orig_dport;
Al Virobada8ad2006-09-26 21:27:15 -0700148 __be32 daddr, nexthop;
David S. Millerda905bd2011-05-06 16:11:19 -0700149 struct flowi4 *fl4;
David S. Miller2d7192d2011-04-26 13:28:44 -0700150 struct rtable *rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 int err;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000152 struct ip_options_rcu *inet_opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000161 inet_opt = rcu_dereference_protected(inet->inet_opt,
162 sock_owned_by_user(sk));
163 if (inet_opt && inet_opt->opt.srr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 if (!daddr)
165 return -EINVAL;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000166 nexthop = inet_opt->opt.faddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 }
168
David S. Millerdca8b082011-02-24 13:38:12 -0800169 orig_sport = inet->inet_sport;
170 orig_dport = usin->sin_port;
David S. Millerda905bd2011-05-06 16:11:19 -0700171 fl4 = &inet->cork.fl.u.ip4;
172 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800173 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
174 IPPROTO_TCP,
Steffen Klassert0e0d44a2013-08-28 08:04:14 +0200175 orig_sport, orig_dport, sk);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800176 if (IS_ERR(rt)) {
177 err = PTR_ERR(rt);
178 if (err == -ENETUNREACH)
Eric Dumazetf1d8cba2013-11-28 09:51:22 -0800179 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800180 return err;
Wei Dong584bdf82007-05-31 22:49:28 -0700181 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000188 if (!inet_opt || !inet_opt->opt.srr)
David S. Millerda905bd2011-05-06 16:11:19 -0700189 daddr = fl4->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000191 if (!inet->inet_saddr)
David S. Millerda905bd2011-05-06 16:11:19 -0700192 inet->inet_saddr = fl4->saddr;
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000193 inet->inet_rcv_saddr = inet->inet_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
Pavel Emelyanovee995282012-04-19 03:40:39 +0000199 if (likely(!tp->repair))
200 tp->write_seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 }
202
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700203 if (tcp_death_row.sysctl_tw_recycle &&
David S. Miller81166dd2012-07-10 03:14:24 -0700204 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
205 tcp_fetch_timewait_stamp(sk, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000207 inet->inet_dport = usin->sin_port;
208 inet->inet_daddr = daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
Tom Herbertb73c3d02014-07-01 21:32:17 -0700210 inet_set_txhash(sk);
211
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800212 inet_csk(sk)->icsk_ext_hdr_len = 0;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000213 if (inet_opt)
214 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000216 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218 /* Socket identity is still unknown (sport may be zero).
219 * However we set state to SYN-SENT and not releasing socket
220 * lock select source port, enter ourselves into the hash tables and
221 * complete initialization after this.
222 */
223 tcp_set_state(sk, TCP_SYN_SENT);
Arnaldo Carvalho de Meloa7f5e7f2005-12-13 23:25:31 -0800224 err = inet_hash_connect(&tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 if (err)
226 goto failure;
227
David S. Millerda905bd2011-05-06 16:11:19 -0700228 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800229 inet->inet_sport, inet->inet_dport, sk);
230 if (IS_ERR(rt)) {
231 err = PTR_ERR(rt);
232 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 goto failure;
David S. Millerb23dd4f2011-03-02 14:31:35 -0800234 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700236 sk->sk_gso_type = SKB_GSO_TCPV4;
Changli Gaod8d1f302010-06-10 23:31:35 -0700237 sk_setup_caps(sk, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238
Pavel Emelyanovee995282012-04-19 03:40:39 +0000239 if (!tp->write_seq && likely(!tp->repair))
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000240 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
241 inet->inet_daddr,
242 inet->inet_sport,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 usin->sin_port);
244
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000245 inet->inet_id = tp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246
Andrey Vagin2b916472012-11-22 01:13:58 +0000247 err = tcp_connect(sk);
Pavel Emelyanovee995282012-04-19 03:40:39 +0000248
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 rt = NULL;
250 if (err)
251 goto failure;
252
253 return 0;
254
255failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200256 /*
257 * This unhashes the socket and releases the local port,
258 * if necessary.
259 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 tcp_set_state(sk, TCP_CLOSE);
261 ip_rt_put(rt);
262 sk->sk_route_caps = 0;
Eric Dumazetc720c7e82009-10-15 06:30:45 +0000263 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 return err;
265}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000266EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268/*
Eric Dumazet563d34d2012-07-23 09:48:52 +0200269 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
270 * It can be called through tcp_release_cb() if socket was owned by user
271 * at the time tcp_v4_err() was called to handle ICMP message.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 */
Neal Cardwell4fab9072014-08-14 12:40:05 -0400273void tcp_v4_mtu_reduced(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274{
275 struct dst_entry *dst;
276 struct inet_sock *inet = inet_sk(sk);
Eric Dumazet563d34d2012-07-23 09:48:52 +0200277 u32 mtu = tcp_sk(sk)->mtu_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
David S. Miller80d0a692012-07-16 03:28:06 -0700279 dst = inet_csk_update_pmtu(sk, mtu);
280 if (!dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 return;
282
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 /* Something is about to be wrong... Remember soft error
284 * for the case, if this connection will not able to recover.
285 */
286 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
287 sk->sk_err_soft = EMSGSIZE;
288
289 mtu = dst_mtu(dst);
290
291 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +0100292 ip_sk_accept_pmtu(sk) &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800293 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 tcp_sync_mss(sk, mtu);
295
296 /* Resend the TCP packet because it's
297 * clear that the old packet has been
298 * dropped. This is the new "fast" path mtu
299 * discovery.
300 */
301 tcp_simple_retransmit(sk);
302 } /* else let the usual retransmit timer handle it */
303}
Neal Cardwell4fab9072014-08-14 12:40:05 -0400304EXPORT_SYMBOL(tcp_v4_mtu_reduced);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
David S. Miller55be7a92012-07-11 21:27:49 -0700306static void do_redirect(struct sk_buff *skb, struct sock *sk)
307{
308 struct dst_entry *dst = __sk_dst_check(sk, 0);
309
David S. Miller1ed5c482012-07-12 00:41:25 -0700310 if (dst)
David S. Miller6700c272012-07-17 03:29:28 -0700311 dst->ops->redirect(dst, sk, skb);
David S. Miller55be7a92012-07-11 21:27:49 -0700312}
313
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314/*
315 * This routine is called by the ICMP module when it gets some
316 * sort of error condition. If err < 0 then the socket should
317 * be closed and the error returned to the user. If err > 0
318 * it's just the icmp type << 8 | icmp code. After adjustment
319 * header points to the first 8 bytes of the tcp header. We need
320 * to find the appropriate port.
321 *
322 * The locking strategy used here is very "optimistic". When
323 * someone else accesses the socket the ICMP is just dropped
324 * and for some paths there is no check at all.
325 * A more general error queue to queue errors for later handling
326 * is probably better.
327 *
328 */
329
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000330void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000332 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000333 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000334 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 struct tcp_sock *tp;
336 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000337 const int type = icmp_hdr(icmp_skb)->type;
338 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000340 struct sk_buff *skb;
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700341 struct request_sock *fastopen;
342 __u32 seq, snd_una;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000343 __u32 remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000345 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346
Pavel Emelyanovfd54d712008-07-14 23:01:40 -0700347 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000348 iph->saddr, th->source, inet_iif(icmp_skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 if (!sk) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700350 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 return;
352 }
353 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700354 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 return;
356 }
357
358 bh_lock_sock(sk);
359 /* If too many ICMPs get dropped on busy
360 * servers this needs to be solved differently.
Eric Dumazet563d34d2012-07-23 09:48:52 +0200361 * We do take care of PMTU discovery (RFC1191) special case :
362 * we can receive locally generated ICMP messages while socket is held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 */
Eric Dumazetb74aa932013-01-19 16:10:37 +0000364 if (sock_owned_by_user(sk)) {
365 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
366 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
367 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 if (sk->sk_state == TCP_CLOSE)
369 goto out;
370
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000371 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
372 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
373 goto out;
374 }
375
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000376 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 tp = tcp_sk(sk);
378 seq = ntohl(th->seq);
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700379 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
380 fastopen = tp->fastopen_rsk;
381 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 if (sk->sk_state != TCP_LISTEN &&
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700383 !between(seq, snd_una, tp->snd_nxt)) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700384 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 goto out;
386 }
387
388 switch (type) {
David S. Miller55be7a92012-07-11 21:27:49 -0700389 case ICMP_REDIRECT:
390 do_redirect(icmp_skb, sk);
391 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 case ICMP_SOURCE_QUENCH:
393 /* Just silently ignore these. */
394 goto out;
395 case ICMP_PARAMETERPROB:
396 err = EPROTO;
397 break;
398 case ICMP_DEST_UNREACH:
399 if (code > NR_ICMP_UNREACH)
400 goto out;
401
402 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
Eric Dumazet0d4f0602013-03-18 07:01:28 +0000403 /* We are not interested in TCP_LISTEN and open_requests
404 * (SYN-ACKs send out by Linux are always <576bytes so
405 * they should go through unfragmented).
406 */
407 if (sk->sk_state == TCP_LISTEN)
408 goto out;
409
Eric Dumazet563d34d2012-07-23 09:48:52 +0200410 tp->mtu_info = info;
Eric Dumazet144d56e2012-08-20 00:22:46 +0000411 if (!sock_owned_by_user(sk)) {
Eric Dumazet563d34d2012-07-23 09:48:52 +0200412 tcp_v4_mtu_reduced(sk);
Eric Dumazet144d56e2012-08-20 00:22:46 +0000413 } else {
414 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
415 sock_hold(sk);
416 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 goto out;
418 }
419
420 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000421 /* check if icmp_skb allows revert of backoff
422 * (see draft-zimmermann-tcp-lcd) */
423 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
424 break;
425 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700426 !icsk->icsk_backoff || fastopen)
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000427 break;
428
David S. Miller8f49c272010-11-12 13:35:00 -0800429 if (sock_owned_by_user(sk))
430 break;
431
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000432 icsk->icsk_backoff--;
Eric Dumazet740b0f12014-02-26 14:02:48 -0800433 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
Jerry Chu9ad7c042011-06-08 11:08:38 +0000434 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000435 tcp_bound_rto(sk);
436
437 skb = tcp_write_queue_head(sk);
438 BUG_ON(!skb);
439
Eric Dumazet7faee5c2014-09-05 15:33:33 -0700440 remaining = icsk->icsk_rto -
441 min(icsk->icsk_rto,
442 tcp_time_stamp - tcp_skb_timestamp(skb));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000443
444 if (remaining) {
445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
446 remaining, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000447 } else {
448 /* RTO revert clocked out retransmission.
449 * Will retransmit now */
450 tcp_retransmit_timer(sk);
451 }
452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 break;
454 case ICMP_TIME_EXCEEDED:
455 err = EHOSTUNREACH;
456 break;
457 default:
458 goto out;
459 }
460
461 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700462 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 case TCP_LISTEN:
464 if (sock_owned_by_user(sk))
465 goto out;
466
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700467 req = inet_csk_search_req(sk, &prev, th->dest,
468 iph->daddr, iph->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 if (!req)
470 goto out;
471
472 /* ICMPs are not backlogged, hence we cannot get
473 an established socket here.
474 */
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700475 WARN_ON(req->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700477 if (seq != tcp_rsk(req)->snt_isn) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700478 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 goto out;
480 }
481
482 /*
483 * Still in SYN_RECV, just remove it silently.
484 * There is no good way to pass the error to the newly
485 * created socket, and POSIX does not want network
486 * errors returned from accept().
487 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700488 inet_csk_reqsk_queue_drop(sk, req, prev);
Vijay Subramanian848bf152013-01-31 08:24:06 +0000489 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 goto out;
491
492 case TCP_SYN_SENT:
Yuchung Cheng0a672f72014-05-11 20:22:12 -0700493 case TCP_SYN_RECV:
494 /* Only in fast or simultaneous open. If a fast open socket is
495 * is already accepted it is treated as a connected one below.
496 */
497 if (fastopen && fastopen->sk == NULL)
498 break;
499
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 sk->sk_err = err;
502
503 sk->sk_error_report(sk);
504
505 tcp_done(sk);
506 } else {
507 sk->sk_err_soft = err;
508 }
509 goto out;
510 }
511
512 /* If we've already connected we will keep trying
513 * until we time out, or the user gives up.
514 *
515 * rfc1122 4.2.3.9 allows to consider as hard errors
516 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
517 * but it is obsoleted by pmtu discovery).
518 *
519 * Note, that in modern internet, where routing is unreliable
520 * and in each dark corner broken firewalls sit, sending random
521 * errors ordered by their masters even this two messages finally lose
522 * their original sense (even Linux sends invalid PORT_UNREACHs)
523 *
524 * Now we are in compliance with RFCs.
525 * --ANK (980905)
526 */
527
528 inet = inet_sk(sk);
529 if (!sock_owned_by_user(sk) && inet->recverr) {
530 sk->sk_err = err;
531 sk->sk_error_report(sk);
532 } else { /* Only an error on timeout */
533 sk->sk_err_soft = err;
534 }
535
536out:
537 bh_unlock_sock(sk);
538 sock_put(sk);
539}
540
Daniel Borkmann28850dc2013-06-07 05:11:46 +0000541void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700543 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
Patrick McHardy84fa7932006-08-29 16:44:56 -0700545 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Herbert Xu419f9f82010-04-11 02:15:53 +0000546 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
Herbert Xu663ead32007-04-09 11:59:07 -0700547 skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viroff1dcad2006-11-20 18:07:29 -0800548 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 } else {
Herbert Xu419f9f82010-04-11 02:15:53 +0000550 th->check = tcp_v4_check(skb->len, saddr, daddr,
Joe Perches07f07572008-11-19 15:44:53 -0800551 csum_partial(th,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552 th->doff << 2,
553 skb->csum));
554 }
555}
556
Herbert Xu419f9f82010-04-11 02:15:53 +0000557/* This routine computes an IPv4 TCP checksum. */
Herbert Xubb296242010-04-11 02:15:55 +0000558void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
Herbert Xu419f9f82010-04-11 02:15:53 +0000559{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400560 const struct inet_sock *inet = inet_sk(sk);
Herbert Xu419f9f82010-04-11 02:15:53 +0000561
562 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
563}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000564EXPORT_SYMBOL(tcp_v4_send_check);
Herbert Xu419f9f82010-04-11 02:15:53 +0000565
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566/*
567 * This routine will send an RST to the other tcp.
568 *
569 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
570 * for reset.
571 * Answer: if a packet caused RST, it is not for a socket
572 * existing in our system, if it is matched to a socket,
573 * it is just duplicate segment or bug in other side's TCP.
574 * So that we build reply only basing on parameters
575 * arrived with segment.
576 * Exception: precedence violation. We do not implement it in any case.
577 */
578
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800579static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400581 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800582 struct {
583 struct tcphdr th;
584#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800585 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800586#endif
587 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800589#ifdef CONFIG_TCP_MD5SIG
590 struct tcp_md5sig_key *key;
Shawn Lu658ddaa2012-01-31 22:35:48 +0000591 const __u8 *hash_location = NULL;
592 unsigned char newhash[16];
593 int genhash;
594 struct sock *sk1 = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800595#endif
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700596 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
598 /* Never send a reset in response to a reset. */
599 if (th->rst)
600 return;
601
Eric Dumazet511c3f92009-06-02 05:14:27 +0000602 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 return;
604
605 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800606 memset(&rep, 0, sizeof(rep));
607 rep.th.dest = th->source;
608 rep.th.source = th->dest;
609 rep.th.doff = sizeof(struct tcphdr) / 4;
610 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611
612 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800613 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800615 rep.th.ack = 1;
616 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
617 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 }
619
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200620 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800621 arg.iov[0].iov_base = (unsigned char *)&rep;
622 arg.iov[0].iov_len = sizeof(rep.th);
623
624#ifdef CONFIG_TCP_MD5SIG
Shawn Lu658ddaa2012-01-31 22:35:48 +0000625 hash_location = tcp_parse_md5sig_option(th);
626 if (!sk && hash_location) {
627 /*
628 * active side is lost. Try to find listening socket through
629 * source port, and then find md5 key through listening socket.
630 * we are not loose security here:
631 * Incoming packet is checked with md5 hash with finding key,
632 * no RST generated if md5 hash doesn't match.
633 */
634 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
Tom Herbertda5e3632013-01-22 09:50:24 +0000635 &tcp_hashinfo, ip_hdr(skb)->saddr,
636 th->source, ip_hdr(skb)->daddr,
Shawn Lu658ddaa2012-01-31 22:35:48 +0000637 ntohs(th->source), inet_iif(skb));
638 /* don't send rst if it can't find key */
639 if (!sk1)
640 return;
641 rcu_read_lock();
642 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
643 &ip_hdr(skb)->saddr, AF_INET);
644 if (!key)
645 goto release_sk1;
646
647 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
648 if (genhash || memcmp(hash_location, newhash, 16) != 0)
649 goto release_sk1;
650 } else {
651 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr,
653 AF_INET) : NULL;
654 }
655
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800656 if (key) {
657 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
658 (TCPOPT_NOP << 16) |
659 (TCPOPT_MD5SIG << 8) |
660 TCPOLEN_MD5SIG);
661 /* Update length and the length the header thinks exists */
662 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
663 rep.th.doff = arg.iov[0].iov_len / 4;
664
Adam Langley49a72df2008-07-19 00:01:42 -0700665 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645cb2008-10-09 14:37:47 -0700666 key, ip_hdr(skb)->saddr,
667 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800668 }
669#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700670 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
671 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700672 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700674 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
Shawn Lue2446ea2012-02-04 12:38:09 +0000675 /* When socket is gone, all binding information is lost.
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000676 * routing might fail in this case. No choice here, if we choose to force
677 * input interface, we will misroute in case of asymmetric route.
Shawn Lue2446ea2012-02-04 12:38:09 +0000678 */
Alexey Kuznetsov4c675252012-10-12 04:34:17 +0000679 if (sk)
680 arg.bound_dev_if = sk->sk_bound_dev_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
Eric Dumazetadf30902009-06-02 05:19:30 +0000682 net = dev_net(skb_dst(skb)->dev);
Eric Dumazet66b13d92011-10-24 03:06:21 -0400683 arg.tos = ip_hdr(skb)->tos;
Eric Dumazetbe9f4a42012-07-19 07:34:03 +0000684 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
David S. Miller70e73412012-06-28 03:21:41 -0700685 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700687 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
688 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
Shawn Lu658ddaa2012-01-31 22:35:48 +0000689
690#ifdef CONFIG_TCP_MD5SIG
691release_sk1:
692 if (sk1) {
693 rcu_read_unlock();
694 sock_put(sk1);
695 }
696#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697}
698
699/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
700 outside socket context is ugly, certainly. What can I do?
701 */
702
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900703static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
Andrey Vaginee684b62013-02-11 05:50:19 +0000704 u32 win, u32 tsval, u32 tsecr, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700705 struct tcp_md5sig_key *key,
Eric Dumazet66b13d92011-10-24 03:06:21 -0400706 int reply_flags, u8 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707{
Eric Dumazetcf533ea2011-10-21 05:22:42 -0400708 const struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 struct {
710 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800711 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800712#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800713 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800714#endif
715 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 } rep;
717 struct ip_reply_arg arg;
Eric Dumazetadf30902009-06-02 05:19:30 +0000718 struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719
720 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200721 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
723 arg.iov[0].iov_base = (unsigned char *)&rep;
724 arg.iov[0].iov_len = sizeof(rep.th);
Andrey Vaginee684b62013-02-11 05:50:19 +0000725 if (tsecr) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800726 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
727 (TCPOPT_TIMESTAMP << 8) |
728 TCPOLEN_TIMESTAMP);
Andrey Vaginee684b62013-02-11 05:50:19 +0000729 rep.opt[1] = htonl(tsval);
730 rep.opt[2] = htonl(tsecr);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800731 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
733
734 /* Swap the send and the receive. */
735 rep.th.dest = th->source;
736 rep.th.source = th->dest;
737 rep.th.doff = arg.iov[0].iov_len / 4;
738 rep.th.seq = htonl(seq);
739 rep.th.ack_seq = htonl(ack);
740 rep.th.ack = 1;
741 rep.th.window = htons(win);
742
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800743#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800744 if (key) {
Andrey Vaginee684b62013-02-11 05:50:19 +0000745 int offset = (tsecr) ? 3 : 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800746
747 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
748 (TCPOPT_NOP << 16) |
749 (TCPOPT_MD5SIG << 8) |
750 TCPOLEN_MD5SIG);
751 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
752 rep.th.doff = arg.iov[0].iov_len/4;
753
Adam Langley49a72df2008-07-19 00:01:42 -0700754 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700755 key, ip_hdr(skb)->saddr,
756 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800757 }
758#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700759 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700760 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
761 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 arg.iov[0].iov_len, IPPROTO_TCP, 0);
763 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900764 if (oif)
765 arg.bound_dev_if = oif;
Eric Dumazet66b13d92011-10-24 03:06:21 -0400766 arg.tos = tos;
Eric Dumazetbe9f4a42012-07-19 07:34:03 +0000767 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
David S. Miller70e73412012-06-28 03:21:41 -0700768 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700770 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771}
772
773static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
774{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700775 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800776 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900778 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200779 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
Andrey Vaginee684b62013-02-11 05:50:19 +0000780 tcp_time_stamp + tcptw->tw_ts_offset,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900781 tcptw->tw_ts_recent,
782 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700783 tcp_twsk_md5_key(tcptw),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400784 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
785 tw->tw_tos
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900786 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700788 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
Gui Jianfeng6edafaa2008-08-06 23:50:04 -0700791static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200792 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793{
Jerry Chu168a8f52012-08-31 12:29:13 +0000794 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
795 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
796 */
797 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
798 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
799 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
Andrey Vaginee684b62013-02-11 05:50:19 +0000800 tcp_time_stamp,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900801 req->ts_recent,
802 0,
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000803 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
804 AF_INET),
Eric Dumazet66b13d92011-10-24 03:06:21 -0400805 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
806 ip_hdr(skb)->tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807}
808
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800810 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700811 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 * socket.
813 */
Octavian Purdila72659ec2010-01-17 19:09:39 -0800814static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
Octavian Purdilad6274bd2014-06-25 17:09:58 +0300815 struct flowi *fl,
Octavian Purdila72659ec2010-01-17 19:09:39 -0800816 struct request_sock *req,
Yuchung Cheng843f4a52014-05-11 20:22:11 -0700817 u16 queue_mapping,
818 struct tcp_fastopen_cookie *foc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700820 const struct inet_request_sock *ireq = inet_rsk(req);
David S. Miller6bd023f2011-05-18 18:32:03 -0400821 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 int err = -1;
Weilong Chend41db5a2013-12-23 14:37:28 +0800823 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824
825 /* First, grab a route. */
David S. Millerba3f7f02012-07-17 14:02:46 -0700826 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800827 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
Yuchung Cheng843f4a52014-05-11 20:22:11 -0700829 skb = tcp_make_synack(sk, dst, req, foc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830
831 if (skb) {
Eric Dumazet634fb9792013-10-09 15:21:29 -0700832 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
Eric Dumazetfff32692012-06-01 01:47:50 +0000834 skb_set_queue_mapping(skb, queue_mapping);
Eric Dumazet634fb9792013-10-09 15:21:29 -0700835 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
836 ireq->ir_rmt_addr,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700837 ireq->opt);
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200838 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 }
840
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 return err;
842}
843
844/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700845 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700847static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848{
Jesper Juhla51482b2005-11-08 09:41:34 -0800849 kfree(inet_rsk(req)->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850}
851
Eric Dumazet946cedc2011-08-30 03:21:44 +0000852/*
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000853 * Return true if a syncookie should be sent
Eric Dumazet946cedc2011-08-30 03:21:44 +0000854 */
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000855bool tcp_syn_flood_action(struct sock *sk,
Eric Dumazet946cedc2011-08-30 03:21:44 +0000856 const struct sk_buff *skb,
857 const char *proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858{
Eric Dumazet946cedc2011-08-30 03:21:44 +0000859 const char *msg = "Dropping request";
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000860 bool want_cookie = false;
Eric Dumazet946cedc2011-08-30 03:21:44 +0000861 struct listen_sock *lopt;
862
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000863#ifdef CONFIG_SYN_COOKIES
Eric Dumazet946cedc2011-08-30 03:21:44 +0000864 if (sysctl_tcp_syncookies) {
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000865 msg = "Sending cookies";
Eric Dumazeta2a385d2012-05-16 23:15:34 +0000866 want_cookie = true;
Eric Dumazet946cedc2011-08-30 03:21:44 +0000867 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
868 } else
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200869#endif
Eric Dumazet946cedc2011-08-30 03:21:44 +0000870 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000871
Eric Dumazet946cedc2011-08-30 03:21:44 +0000872 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
Hannes Frederic Sowa5ad37d52013-07-26 17:43:23 +0200873 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
Eric Dumazet946cedc2011-08-30 03:21:44 +0000874 lopt->synflood_warned = 1;
Joe Perchesafd465032012-03-12 07:03:32 +0000875 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
Eric Dumazet946cedc2011-08-30 03:21:44 +0000876 proto, ntohs(tcp_hdr(skb)->dest), msg);
877 }
878 return want_cookie;
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000879}
Eric Dumazet946cedc2011-08-30 03:21:44 +0000880EXPORT_SYMBOL(tcp_syn_flood_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
882/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700883 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 */
Christoph Paasch5dff7472012-09-26 11:59:09 +0000885static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886{
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000887 const struct ip_options *opt = &(IPCB(skb)->opt);
888 struct ip_options_rcu *dopt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
890 if (opt && opt->optlen) {
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000891 int opt_size = sizeof(*dopt) + opt->optlen;
892
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 dopt = kmalloc(opt_size, GFP_ATOMIC);
894 if (dopt) {
Eric Dumazetf6d8bd02011-04-21 09:45:37 +0000895 if (ip_options_echo(&dopt->opt, skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 kfree(dopt);
897 dopt = NULL;
898 }
899 }
900 }
901 return dopt;
902}
903
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800904#ifdef CONFIG_TCP_MD5SIG
905/*
906 * RFC2385 MD5 checksumming requires a mapping of
907 * IP address->MD5 Key.
908 * We need to maintain these in the sk structure.
909 */
910
911/* Find the Key structure for an address. */
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000912struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
913 const union tcp_md5_addr *addr,
914 int family)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800915{
916 struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000917 struct tcp_md5sig_key *key;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000918 unsigned int size = sizeof(struct in_addr);
Eric Dumazeta8afca02012-01-31 18:45:40 +0000919 struct tcp_md5sig_info *md5sig;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800920
Eric Dumazeta8afca02012-01-31 18:45:40 +0000921 /* caller either holds rcu_read_lock() or socket lock */
922 md5sig = rcu_dereference_check(tp->md5sig_info,
Eric Dumazetb4fb05e2012-03-07 04:45:43 +0000923 sock_owned_by_user(sk) ||
924 lockdep_is_held(&sk->sk_lock.slock));
Eric Dumazeta8afca02012-01-31 18:45:40 +0000925 if (!md5sig)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800926 return NULL;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000927#if IS_ENABLED(CONFIG_IPV6)
928 if (family == AF_INET6)
929 size = sizeof(struct in6_addr);
930#endif
Sasha Levinb67bfe02013-02-27 17:06:00 -0800931 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000932 if (key->family != family)
933 continue;
934 if (!memcmp(&key->addr, addr, size))
935 return key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800936 }
937 return NULL;
938}
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000939EXPORT_SYMBOL(tcp_md5_do_lookup);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800940
941struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
942 struct sock *addr_sk)
943{
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000944 union tcp_md5_addr *addr;
945
946 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
947 return tcp_md5_do_lookup(sk, addr, AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800948}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800949EXPORT_SYMBOL(tcp_v4_md5_lookup);
950
Adrian Bunkf5b99bc2006-11-30 17:22:29 -0800951static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
952 struct request_sock *req)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800953{
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000954 union tcp_md5_addr *addr;
955
Eric Dumazet634fb9792013-10-09 15:21:29 -0700956 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000957 return tcp_md5_do_lookup(sk, addr, AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800958}
959
960/* This can be called on a newly created socket, from other files */
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000961int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
962 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800963{
964 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700965 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800966 struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000967 struct tcp_md5sig_info *md5sig;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800968
Aydin Arikc0353c72013-06-14 18:56:31 +1200969 key = tcp_md5_do_lookup(sk, addr, family);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800970 if (key) {
971 /* Pre-existing entry - just update that one. */
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000972 memcpy(key->key, newkey, newkeylen);
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700973 key->keylen = newkeylen;
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000974 return 0;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800975 }
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000976
Eric Dumazeta8afca02012-01-31 18:45:40 +0000977 md5sig = rcu_dereference_protected(tp->md5sig_info,
978 sock_owned_by_user(sk));
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000979 if (!md5sig) {
980 md5sig = kmalloc(sizeof(*md5sig), gfp);
981 if (!md5sig)
982 return -ENOMEM;
983
984 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
985 INIT_HLIST_HEAD(&md5sig->head);
Eric Dumazeta8afca02012-01-31 18:45:40 +0000986 rcu_assign_pointer(tp->md5sig_info, md5sig);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000987 }
988
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +0000989 key = sock_kmalloc(sk, sizeof(*key), gfp);
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000990 if (!key)
991 return -ENOMEM;
Eric Dumazet71cea172013-05-20 06:52:26 +0000992 if (!tcp_alloc_md5sig_pool()) {
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +0000993 sock_kfree_s(sk, key, sizeof(*key));
Eric Dumazeta915da9b2012-01-31 05:18:33 +0000994 return -ENOMEM;
995 }
996
997 memcpy(key->key, newkey, newkeylen);
998 key->keylen = newkeylen;
999 key->family = family;
1000 memcpy(&key->addr, addr,
1001 (family == AF_INET6) ? sizeof(struct in6_addr) :
1002 sizeof(struct in_addr));
1003 hlist_add_head_rcu(&key->node, &md5sig->head);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001004 return 0;
1005}
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001006EXPORT_SYMBOL(tcp_md5_do_add);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001007
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001008int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001009{
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001010 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001011
Aydin Arikc0353c72013-06-14 18:56:31 +12001012 key = tcp_md5_do_lookup(sk, addr, family);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001013 if (!key)
1014 return -ENOENT;
1015 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001016 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001017 kfree_rcu(key, rcu);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001018 return 0;
1019}
1020EXPORT_SYMBOL(tcp_md5_do_del);
1021
stephen hemmingere0683e702012-10-26 14:31:40 +00001022static void tcp_clear_md5_list(struct sock *sk)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001023{
1024 struct tcp_sock *tp = tcp_sk(sk);
1025 struct tcp_md5sig_key *key;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001026 struct hlist_node *n;
Eric Dumazeta8afca02012-01-31 18:45:40 +00001027 struct tcp_md5sig_info *md5sig;
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001028
Eric Dumazeta8afca02012-01-31 18:45:40 +00001029 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1030
Sasha Levinb67bfe02013-02-27 17:06:00 -08001031 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001032 hlist_del_rcu(&key->node);
Eric Dumazet5f3d9cb2012-01-31 10:56:48 +00001033 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001034 kfree_rcu(key, rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001035 }
1036}
1037
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001038static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1039 int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001040{
1041 struct tcp_md5sig cmd;
1042 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001043
1044 if (optlen < sizeof(cmd))
1045 return -EINVAL;
1046
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001047 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001048 return -EFAULT;
1049
1050 if (sin->sin_family != AF_INET)
1051 return -EINVAL;
1052
Dmitry Popov64a124e2014-08-03 22:45:19 +04001053 if (!cmd.tcpm_keylen)
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001054 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1055 AF_INET);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001056
1057 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1058 return -EINVAL;
1059
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001060 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1061 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1062 GFP_KERNEL);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001063}
1064
Adam Langley49a72df2008-07-19 00:01:42 -07001065static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1066 __be32 daddr, __be32 saddr, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001067{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001068 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001069 struct scatterlist sg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001070
1071 bp = &hp->md5_blk.ip4;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001072
1073 /*
Adam Langley49a72df2008-07-19 00:01:42 -07001074 * 1. the TCP pseudo-header (in the order: source IP address,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001075 * destination IP address, zero-padded protocol number, and
1076 * segment length)
1077 */
1078 bp->saddr = saddr;
1079 bp->daddr = daddr;
1080 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001081 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001082 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001083
Adam Langley49a72df2008-07-19 00:01:42 -07001084 sg_init_one(&sg, bp, sizeof(*bp));
1085 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1086}
1087
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001088static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001089 __be32 daddr, __be32 saddr, const struct tcphdr *th)
Adam Langley49a72df2008-07-19 00:01:42 -07001090{
1091 struct tcp_md5sig_pool *hp;
1092 struct hash_desc *desc;
1093
1094 hp = tcp_get_md5sig_pool();
1095 if (!hp)
1096 goto clear_hash_noput;
1097 desc = &hp->md5_desc;
1098
1099 if (crypto_hash_init(desc))
1100 goto clear_hash;
1101 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1102 goto clear_hash;
1103 if (tcp_md5_hash_header(hp, th))
1104 goto clear_hash;
1105 if (tcp_md5_hash_key(hp, key))
1106 goto clear_hash;
1107 if (crypto_hash_final(desc, md5_hash))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001108 goto clear_hash;
1109
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001110 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001111 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001112
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001113clear_hash:
1114 tcp_put_md5sig_pool();
1115clear_hash_noput:
1116 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001117 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001118}
1119
Adam Langley49a72df2008-07-19 00:01:42 -07001120int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001121 const struct sock *sk, const struct request_sock *req,
1122 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001123{
Adam Langley49a72df2008-07-19 00:01:42 -07001124 struct tcp_md5sig_pool *hp;
1125 struct hash_desc *desc;
Eric Dumazet318cf7a2011-10-24 02:46:04 -04001126 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001127 __be32 saddr, daddr;
1128
1129 if (sk) {
Eric Dumazetc720c7e82009-10-15 06:30:45 +00001130 saddr = inet_sk(sk)->inet_saddr;
1131 daddr = inet_sk(sk)->inet_daddr;
Adam Langley49a72df2008-07-19 00:01:42 -07001132 } else if (req) {
Eric Dumazet634fb9792013-10-09 15:21:29 -07001133 saddr = inet_rsk(req)->ir_loc_addr;
1134 daddr = inet_rsk(req)->ir_rmt_addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001135 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001136 const struct iphdr *iph = ip_hdr(skb);
1137 saddr = iph->saddr;
1138 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001139 }
Adam Langley49a72df2008-07-19 00:01:42 -07001140
1141 hp = tcp_get_md5sig_pool();
1142 if (!hp)
1143 goto clear_hash_noput;
1144 desc = &hp->md5_desc;
1145
1146 if (crypto_hash_init(desc))
1147 goto clear_hash;
1148
1149 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1150 goto clear_hash;
1151 if (tcp_md5_hash_header(hp, th))
1152 goto clear_hash;
1153 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1154 goto clear_hash;
1155 if (tcp_md5_hash_key(hp, key))
1156 goto clear_hash;
1157 if (crypto_hash_final(desc, md5_hash))
1158 goto clear_hash;
1159
1160 tcp_put_md5sig_pool();
1161 return 0;
1162
1163clear_hash:
1164 tcp_put_md5sig_pool();
1165clear_hash_noput:
1166 memset(md5_hash, 0, 16);
1167 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001168}
Adam Langley49a72df2008-07-19 00:01:42 -07001169EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001170
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001171static bool __tcp_v4_inbound_md5_hash(struct sock *sk,
1172 const struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001173{
1174 /*
1175 * This gets called for each TCP segment that arrives
1176 * so we want to be efficient.
1177 * We have 3 drop cases:
1178 * o No MD5 hash and one expected.
1179 * o MD5 hash and we're not expecting one.
1180 * o MD5 hash and its wrong.
1181 */
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001182 const __u8 *hash_location = NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001183 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001184 const struct iphdr *iph = ip_hdr(skb);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001185 const struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001186 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001187 unsigned char newhash[16];
1188
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001189 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1190 AF_INET);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001191 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001192
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001193 /* We've parsed the options - do we have a hash? */
1194 if (!hash_expected && !hash_location)
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001195 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001196
1197 if (hash_expected && !hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001198 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001199 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001200 }
1201
1202 if (!hash_expected && hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001203 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001204 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001205 }
1206
1207 /* Okay, so this is hash_expected and hash_location -
1208 * so we need to calculate the checksum.
1209 */
Adam Langley49a72df2008-07-19 00:01:42 -07001210 genhash = tcp_v4_md5_hash_skb(newhash,
1211 hash_expected,
1212 NULL, NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001213
1214 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
Joe Perchese87cc472012-05-13 21:56:26 +00001215 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1216 &iph->saddr, ntohs(th->source),
1217 &iph->daddr, ntohs(th->dest),
1218 genhash ? " tcp_v4_calc_md5_hash failed"
1219 : "");
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001220 return true;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001221 }
Eric Dumazeta2a385d2012-05-16 23:15:34 +00001222 return false;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001223}
1224
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001225static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
1226{
1227 bool ret;
1228
1229 rcu_read_lock();
1230 ret = __tcp_v4_inbound_md5_hash(sk, skb);
1231 rcu_read_unlock();
1232
1233 return ret;
1234}
1235
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001236#endif
1237
Octavian Purdila16bea702014-06-25 17:09:53 +03001238static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1239 struct sk_buff *skb)
1240{
1241 struct inet_request_sock *ireq = inet_rsk(req);
1242
1243 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1244 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1245 ireq->no_srccheck = inet_sk(sk)->transparent;
1246 ireq->opt = tcp_v4_save_options(skb);
1247}
1248
Octavian Purdilad94e0412014-06-25 17:09:55 +03001249static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1250 const struct request_sock *req,
1251 bool *strict)
1252{
1253 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1254
1255 if (strict) {
1256 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1257 *strict = true;
1258 else
1259 *strict = false;
1260 }
1261
1262 return dst;
1263}
1264
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001265struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001267 .obj_size = sizeof(struct tcp_request_sock),
Octavian Purdila5db92c92014-06-25 17:09:59 +03001268 .rtx_syn_ack = tcp_rtx_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001269 .send_ack = tcp_v4_reqsk_send_ack,
1270 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 .send_reset = tcp_v4_send_reset,
stephen hemminger688d1942014-08-29 23:32:05 -07001272 .syn_ack_timeout = tcp_syn_ack_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273};
1274
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001275static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
Octavian Purdila2aec4a22014-06-25 17:10:00 +03001276 .mss_clamp = TCP_MSS_DEFAULT,
Octavian Purdila16bea702014-06-25 17:09:53 +03001277#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001278 .md5_lookup = tcp_v4_reqsk_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001279 .calc_md5_hash = tcp_v4_md5_hash_skb,
Andrew Mortonb6332e62006-11-30 19:16:28 -08001280#endif
Octavian Purdila16bea702014-06-25 17:09:53 +03001281 .init_req = tcp_v4_init_req,
Octavian Purdilafb7b37a2014-06-25 17:09:54 +03001282#ifdef CONFIG_SYN_COOKIES
1283 .cookie_init_seq = cookie_v4_init_sequence,
1284#endif
Octavian Purdilad94e0412014-06-25 17:09:55 +03001285 .route_req = tcp_v4_route_req,
Octavian Purdila936b8bd2014-06-25 17:09:57 +03001286 .init_seq = tcp_v4_init_sequence,
Octavian Purdilad6274bd2014-06-25 17:09:58 +03001287 .send_synack = tcp_v4_send_synack,
Octavian Purdila695da142014-06-25 17:10:01 +03001288 .queue_hash_add = inet_csk_reqsk_queue_hash_add,
Octavian Purdila16bea702014-06-25 17:09:53 +03001289};
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001290
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1292{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001294 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 goto drop;
1296
Octavian Purdila1fb6f152014-06-25 17:10:02 +03001297 return tcp_conn_request(&tcp_request_sock_ops,
1298 &tcp_request_sock_ipv4_ops, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300drop:
Vijay Subramanian848bf152013-01-31 08:24:06 +00001301 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 return 0;
1303}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001304EXPORT_SYMBOL(tcp_v4_conn_request);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305
1306
1307/*
1308 * The three way handshake has completed - we got a valid synack -
1309 * now create the new socket.
1310 */
1311struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001312 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 struct dst_entry *dst)
1314{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001315 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 struct inet_sock *newinet;
1317 struct tcp_sock *newtp;
1318 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001319#ifdef CONFIG_TCP_MD5SIG
1320 struct tcp_md5sig_key *key;
1321#endif
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001322 struct ip_options_rcu *inet_opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
1324 if (sk_acceptq_is_full(sk))
1325 goto exit_overflow;
1326
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 newsk = tcp_create_openreq_child(sk, req, skb);
1328 if (!newsk)
Balazs Scheidler093d2822010-10-21 13:06:43 +02001329 goto exit_nonewsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
Herbert Xubcd76112006-06-30 13:36:35 -07001331 newsk->sk_gso_type = SKB_GSO_TCPV4;
Neal Cardwellfae6ef82012-08-19 03:30:38 +00001332 inet_sk_rx_dst_set(newsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
1334 newtp = tcp_sk(newsk);
1335 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001336 ireq = inet_rsk(req);
Eric Dumazet634fb9792013-10-09 15:21:29 -07001337 newinet->inet_daddr = ireq->ir_rmt_addr;
1338 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1339 newinet->inet_saddr = ireq->ir_loc_addr;
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001340 inet_opt = ireq->opt;
1341 rcu_assign_pointer(newinet->inet_opt, inet_opt);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001342 ireq->opt = NULL;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001343 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001344 newinet->mc_ttl = ip_hdr(skb)->ttl;
Jiri Benc4c507d22012-02-09 09:35:49 +00001345 newinet->rcv_tos = ip_hdr(skb)->tos;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001346 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Tom Herbertb73c3d02014-07-01 21:32:17 -07001347 inet_set_txhash(newsk);
Eric Dumazetf6d8bd02011-04-21 09:45:37 +00001348 if (inet_opt)
1349 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Eric Dumazetc720c7e82009-10-15 06:30:45 +00001350 newinet->inet_id = newtp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
Eric Dumazetdfd25ff2012-03-10 09:20:21 +00001352 if (!dst) {
1353 dst = inet_csk_route_child_sock(sk, newsk, req);
1354 if (!dst)
1355 goto put_and_exit;
1356 } else {
1357 /* syncookie case : see end of cookie_v4_check() */
1358 }
David S. Miller0e734412011-05-08 15:28:03 -07001359 sk_setup_caps(newsk, dst);
1360
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 tcp_sync_mss(newsk, dst_mtu(dst));
David S. Miller0dbaee32010-12-13 12:52:14 -08001362 newtp->advmss = dst_metric_advmss(dst);
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001363 if (tcp_sk(sk)->rx_opt.user_mss &&
1364 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1365 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1366
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 tcp_initialize_rcv_mss(newsk);
1368
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001369#ifdef CONFIG_TCP_MD5SIG
1370 /* Copy over the MD5 key from the original socket */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001371 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1372 AF_INET);
Eric Dumazetc720c7e82009-10-15 06:30:45 +00001373 if (key != NULL) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001374 /*
1375 * We're using one, so create a matching key
1376 * on the newsk structure. If we fail to get
1377 * memory, then we end up not copying the key
1378 * across. Shucks.
1379 */
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001380 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1381 AF_INET, key->key, key->keylen, GFP_ATOMIC);
Eric Dumazeta4654192010-05-16 00:36:33 -07001382 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001383 }
1384#endif
1385
David S. Miller0e734412011-05-08 15:28:03 -07001386 if (__inet_inherit_port(sk, newsk) < 0)
1387 goto put_and_exit;
Eric Dumazet9327f702009-12-04 03:46:54 +00001388 __inet_hash_nolisten(newsk, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389
1390 return newsk;
1391
1392exit_overflow:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001393 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Balazs Scheidler093d2822010-10-21 13:06:43 +02001394exit_nonewsk:
1395 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396exit:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001397 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 return NULL;
David S. Miller0e734412011-05-08 15:28:03 -07001399put_and_exit:
Christoph Paasche337e242012-12-14 04:07:58 +00001400 inet_csk_prepare_forced_close(newsk);
1401 tcp_done(newsk);
David S. Miller0e734412011-05-08 15:28:03 -07001402 goto exit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001404EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405
1406static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1407{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001408 struct tcphdr *th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001409 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 struct sock *nsk;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001411 struct request_sock **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001413 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1414 iph->saddr, iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 if (req)
Jerry Chu83368862012-08-31 12:29:12 +00001416 return tcp_check_req(sk, skb, req, prev, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001418 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001419 th->source, iph->daddr, th->dest, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420
1421 if (nsk) {
1422 if (nsk->sk_state != TCP_TIME_WAIT) {
1423 bh_lock_sock(nsk);
1424 return nsk;
1425 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001426 inet_twsk_put(inet_twsk(nsk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 return NULL;
1428 }
1429
1430#ifdef CONFIG_SYN_COOKIES
Florian Westphalaf9b4732010-06-03 00:43:44 +00001431 if (!th->syn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1433#endif
1434 return sk;
1435}
1436
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437/* The socket must have it's spinlock held when we get
1438 * here.
1439 *
1440 * We have a potential double-lock case here, so even when
1441 * doing backlog processing we use the BH locking scheme.
1442 * This is because we cannot sleep with the original spinlock
1443 * held.
1444 */
1445int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1446{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001447 struct sock *rsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001448
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
Eric Dumazet404e0a82012-07-29 23:20:37 +00001450 struct dst_entry *dst = sk->sk_rx_dst;
1451
Tom Herbertbdeab992011-08-14 19:45:55 +00001452 sock_rps_save_rxhash(sk, skb);
Eric Dumazet404e0a82012-07-29 23:20:37 +00001453 if (dst) {
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001454 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1455 dst->ops->check(dst, 0) == NULL) {
David S. Miller92101b32012-07-23 16:29:00 -07001456 dst_release(dst);
1457 sk->sk_rx_dst = NULL;
1458 }
1459 }
Vijay Subramanianc995ae22013-09-03 12:23:22 -07001460 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461 return 0;
1462 }
1463
Arnaldo Carvalho de Meloab6a5bb2007-03-18 17:43:48 -07001464 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 goto csum_err;
1466
1467 if (sk->sk_state == TCP_LISTEN) {
1468 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1469 if (!nsk)
1470 goto discard;
1471
1472 if (nsk != sk) {
Tom Herbertbdeab992011-08-14 19:45:55 +00001473 sock_rps_save_rxhash(nsk, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001474 if (tcp_child_process(sk, nsk, skb)) {
1475 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001477 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 return 0;
1479 }
Eric Dumazetca551582010-06-03 09:03:58 +00001480 } else
Tom Herbertbdeab992011-08-14 19:45:55 +00001481 sock_rps_save_rxhash(sk, skb);
Eric Dumazetca551582010-06-03 09:03:58 +00001482
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001483 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001484 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001486 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 return 0;
1488
1489reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001490 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491discard:
1492 kfree_skb(skb);
1493 /* Be careful here. If this function gets more complicated and
1494 * gcc suffers from register pressure on the x86, sk (in %ebx)
1495 * might be destroyed here. This current version compiles correctly,
1496 * but you have been warned.
1497 */
1498 return 0;
1499
1500csum_err:
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001501 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001502 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 goto discard;
1504}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001505EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506
David S. Miller160eb5a2012-06-27 22:01:22 -07001507void tcp_v4_early_demux(struct sk_buff *skb)
David S. Miller41063e92012-06-19 21:22:05 -07001508{
David S. Miller41063e92012-06-19 21:22:05 -07001509 const struct iphdr *iph;
1510 const struct tcphdr *th;
1511 struct sock *sk;
David S. Miller41063e92012-06-19 21:22:05 -07001512
David S. Miller41063e92012-06-19 21:22:05 -07001513 if (skb->pkt_type != PACKET_HOST)
David S. Miller160eb5a2012-06-27 22:01:22 -07001514 return;
David S. Miller41063e92012-06-19 21:22:05 -07001515
Eric Dumazet45f00f92012-10-22 21:42:47 +00001516 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
David S. Miller160eb5a2012-06-27 22:01:22 -07001517 return;
David S. Miller41063e92012-06-19 21:22:05 -07001518
1519 iph = ip_hdr(skb);
Eric Dumazet45f00f92012-10-22 21:42:47 +00001520 th = tcp_hdr(skb);
David S. Miller41063e92012-06-19 21:22:05 -07001521
1522 if (th->doff < sizeof(struct tcphdr) / 4)
David S. Miller160eb5a2012-06-27 22:01:22 -07001523 return;
David S. Miller41063e92012-06-19 21:22:05 -07001524
Eric Dumazet45f00f92012-10-22 21:42:47 +00001525 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
David S. Miller41063e92012-06-19 21:22:05 -07001526 iph->saddr, th->source,
Vijay Subramanian7011d082012-06-23 17:38:10 +00001527 iph->daddr, ntohs(th->dest),
Eric Dumazet9cb429d2012-07-24 01:19:31 +00001528 skb->skb_iif);
David S. Miller41063e92012-06-19 21:22:05 -07001529 if (sk) {
1530 skb->sk = sk;
1531 skb->destructor = sock_edemux;
1532 if (sk->sk_state != TCP_TIME_WAIT) {
1533 struct dst_entry *dst = sk->sk_rx_dst;
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001534
David S. Miller41063e92012-06-19 21:22:05 -07001535 if (dst)
1536 dst = dst_check(dst, 0);
David S. Miller92101b32012-07-23 16:29:00 -07001537 if (dst &&
Eric Dumazet505fbcf2012-07-27 06:23:40 +00001538 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
David S. Miller92101b32012-07-23 16:29:00 -07001539 skb_dst_set_noref(skb, dst);
David S. Miller41063e92012-06-19 21:22:05 -07001540 }
1541 }
David S. Miller41063e92012-06-19 21:22:05 -07001542}
1543
Eric Dumazetb2fb4f52013-03-06 12:58:01 +00001544/* Packet is added to VJ-style prequeue for processing in process
1545 * context, if a reader task is waiting. Apparently, this exciting
1546 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1547 * failed somewhere. Latency? Burstiness? Well, at least now we will
1548 * see, why it failed. 8)8) --ANK
1549 *
1550 */
1551bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1552{
1553 struct tcp_sock *tp = tcp_sk(sk);
1554
1555 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1556 return false;
1557
1558 if (skb->len <= tcp_hdrlen(skb) &&
1559 skb_queue_len(&tp->ucopy.prequeue) == 0)
1560 return false;
1561
David S. Miller58717682013-04-30 03:50:54 -04001562 skb_dst_force(skb);
Eric Dumazetb2fb4f52013-03-06 12:58:01 +00001563 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1564 tp->ucopy.memory += skb->truesize;
1565 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1566 struct sk_buff *skb1;
1567
1568 BUG_ON(sock_owned_by_user(sk));
1569
1570 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1571 sk_backlog_rcv(sk, skb1);
1572 NET_INC_STATS_BH(sock_net(sk),
1573 LINUX_MIB_TCPPREQUEUEDROPPED);
1574 }
1575
1576 tp->ucopy.memory = 0;
1577 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1578 wake_up_interruptible_sync_poll(sk_sleep(sk),
1579 POLLIN | POLLRDNORM | POLLRDBAND);
1580 if (!inet_csk_ack_scheduled(sk))
1581 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1582 (3 * tcp_rto_min(sk)) / 4,
1583 TCP_RTO_MAX);
1584 }
1585 return true;
1586}
1587EXPORT_SYMBOL(tcp_prequeue);
1588
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589/*
1590 * From tcp_input.c
1591 */
1592
1593int tcp_v4_rcv(struct sk_buff *skb)
1594{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001595 const struct iphdr *iph;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04001596 const struct tcphdr *th;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 struct sock *sk;
1598 int ret;
Pavel Emelyanova86b1e32008-07-16 20:20:58 -07001599 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600
1601 if (skb->pkt_type != PACKET_HOST)
1602 goto discard_it;
1603
1604 /* Count it even if it's bad */
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001605 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606
1607 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1608 goto discard_it;
1609
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001610 th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611
1612 if (th->doff < sizeof(struct tcphdr) / 4)
1613 goto bad_packet;
1614 if (!pskb_may_pull(skb, th->doff * 4))
1615 goto discard_it;
1616
1617 /* An explanation is required here, I think.
1618 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001619 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 * So, we defer the checks. */
Tom Herberted70fcf2014-05-02 16:29:38 -07001621
1622 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001623 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001625 th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001626 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1628 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1629 skb->len - th->doff * 4);
1630 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
Eric Dumazet04317da2014-09-05 15:33:32 -07001631 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
Eric Dumazetb82d1bb2011-09-27 02:20:08 -04001632 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 TCP_SKB_CB(skb)->sacked = 0;
1634
Arnaldo Carvalho de Melo9a1f27c2008-10-07 11:41:57 -07001635 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 if (!sk)
1637 goto no_tcp_socket;
1638
Eric Dumazetbb134d52010-03-09 05:55:56 +00001639process:
1640 if (sk->sk_state == TCP_TIME_WAIT)
1641 goto do_time_wait;
1642
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001643 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1644 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001645 goto discard_and_relse;
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001646 }
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001647
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1649 goto discard_and_relse;
Dmitry Popov9ea88a12014-08-07 02:38:22 +04001650
1651#ifdef CONFIG_TCP_MD5SIG
1652 /*
1653 * We really want to reject the packet as early as possible
1654 * if:
1655 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1656 * o There is an MD5 option and we're not expecting one
1657 */
1658 if (tcp_v4_inbound_md5_hash(sk, skb))
1659 goto discard_and_relse;
1660#endif
1661
Patrick McHardyb59c2702006-01-06 23:06:10 -08001662 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001664 if (sk_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 goto discard_and_relse;
1666
Eliezer Tamir8b80cda2013-07-10 17:13:26 +03001667 sk_mark_napi_id(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 skb->dev = NULL;
1669
Ingo Molnarc6366182006-07-03 00:25:13 -07001670 bh_lock_sock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 ret = 0;
1672 if (!sock_owned_by_user(sk)) {
Chris Leech1a2449a2006-05-23 18:05:53 -07001673#ifdef CONFIG_NET_DMA
1674 struct tcp_sock *tp = tcp_sk(sk);
1675 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
Dave Jianga2bd1142012-04-04 16:10:46 -07001676 tp->ucopy.dma_chan = net_dma_find_channel();
Chris Leech1a2449a2006-05-23 18:05:53 -07001677 if (tp->ucopy.dma_chan)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001679 else
1680#endif
1681 {
1682 if (!tcp_prequeue(sk, skb))
Shan Weiae8d7f82009-05-05 01:01:29 +00001683 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001684 }
Eric Dumazetda882c12012-04-22 23:38:54 +00001685 } else if (unlikely(sk_add_backlog(sk, skb,
1686 sk->sk_rcvbuf + sk->sk_sndbuf))) {
Zhu Yi6b03a532010-03-04 18:01:41 +00001687 bh_unlock_sock(sk);
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001688 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
Zhu Yi6b03a532010-03-04 18:01:41 +00001689 goto discard_and_relse;
1690 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 bh_unlock_sock(sk);
1692
1693 sock_put(sk);
1694
1695 return ret;
1696
1697no_tcp_socket:
1698 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1699 goto discard_it;
1700
1701 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001702csum_error:
1703 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704bad_packet:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001705 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001707 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 }
1709
1710discard_it:
1711 /* Discard frame. */
1712 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001713 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714
1715discard_and_relse:
1716 sock_put(sk);
1717 goto discard_it;
1718
1719do_time_wait:
1720 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001721 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722 goto discard_it;
1723 }
1724
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001725 if (skb->len < (th->doff << 2)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001726 inet_twsk_put(inet_twsk(sk));
Eric Dumazet6a5dc9e2013-04-29 08:39:56 +00001727 goto bad_packet;
1728 }
1729 if (tcp_checksum_complete(skb)) {
1730 inet_twsk_put(inet_twsk(sk));
1731 goto csum_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001733 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001735 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001736 &tcp_hashinfo,
Tom Herbertda5e3632013-01-22 09:50:24 +00001737 iph->saddr, th->source,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001738 iph->daddr, th->dest,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001739 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 if (sk2) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001741 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1742 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 sk = sk2;
1744 goto process;
1745 }
1746 /* Fall through to ACK */
1747 }
1748 case TCP_TW_ACK:
1749 tcp_v4_timewait_ack(sk, skb);
1750 break;
1751 case TCP_TW_RST:
1752 goto no_tcp_socket;
1753 case TCP_TW_SUCCESS:;
1754 }
1755 goto discard_it;
1756}
1757
David S. Millerccb7c412010-12-01 18:09:13 -08001758static struct timewait_sock_ops tcp_timewait_sock_ops = {
1759 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1760 .twsk_unique = tcp_twsk_unique,
1761 .twsk_destructor= tcp_twsk_destructor,
David S. Millerccb7c412010-12-01 18:09:13 -08001762};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763
Eric Dumazet63d02d12012-08-09 14:11:00 +00001764void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
Eric Dumazet5d299f32012-08-06 05:09:33 +00001765{
1766 struct dst_entry *dst = skb_dst(skb);
1767
1768 dst_hold(dst);
1769 sk->sk_rx_dst = dst;
1770 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1771}
Eric Dumazet63d02d12012-08-09 14:11:00 +00001772EXPORT_SYMBOL(inet_sk_rx_dst_set);
Eric Dumazet5d299f32012-08-06 05:09:33 +00001773
Stephen Hemminger3b401a82009-09-01 19:25:04 +00001774const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001775 .queue_xmit = ip_queue_xmit,
1776 .send_check = tcp_v4_send_check,
1777 .rebuild_header = inet_sk_rebuild_header,
Eric Dumazet5d299f32012-08-06 05:09:33 +00001778 .sk_rx_dst_set = inet_sk_rx_dst_set,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001779 .conn_request = tcp_v4_conn_request,
1780 .syn_recv_sock = tcp_v4_syn_recv_sock,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001781 .net_header_len = sizeof(struct iphdr),
1782 .setsockopt = ip_setsockopt,
1783 .getsockopt = ip_getsockopt,
1784 .addr2sockaddr = inet_csk_addr2sockaddr,
1785 .sockaddr_len = sizeof(struct sockaddr_in),
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001786 .bind_conflict = inet_csk_bind_conflict,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001787#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001788 .compat_setsockopt = compat_ip_setsockopt,
1789 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001790#endif
Neal Cardwell4fab9072014-08-14 12:40:05 -04001791 .mtu_reduced = tcp_v4_mtu_reduced,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001793EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001795#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001796static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001797 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07001798 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001799 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001800};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001801#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001802
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803/* NOTE: A lot of things set to zero explicitly by call to
1804 * sk_alloc() so need not be done here.
1805 */
1806static int tcp_v4_init_sock(struct sock *sk)
1807{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001808 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809
Neal Cardwell900f65d2012-04-19 09:55:21 +00001810 tcp_init_sock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001812 icsk->icsk_af_ops = &ipv4_specific;
Neal Cardwell900f65d2012-04-19 09:55:21 +00001813
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001814#ifdef CONFIG_TCP_MD5SIG
David S. Millerac807fa2012-04-23 03:21:58 -04001815 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001816#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818 return 0;
1819}
1820
Brian Haley7d06b2e2008-06-14 17:04:49 -07001821void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822{
1823 struct tcp_sock *tp = tcp_sk(sk);
1824
1825 tcp_clear_xmit_timers(sk);
1826
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001827 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07001828
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08001830 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831
1832 /* Cleans up our, hopefully empty, out_of_order_queue. */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001833 __skb_queue_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001835#ifdef CONFIG_TCP_MD5SIG
1836 /* Clean up the MD5 key list, if any */
1837 if (tp->md5sig_info) {
Eric Dumazeta915da9b2012-01-31 05:18:33 +00001838 tcp_clear_md5_list(sk);
Eric Dumazeta8afca02012-01-31 18:45:40 +00001839 kfree_rcu(tp->md5sig_info, rcu);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001840 tp->md5sig_info = NULL;
1841 }
1842#endif
1843
Chris Leech1a2449a2006-05-23 18:05:53 -07001844#ifdef CONFIG_NET_DMA
1845 /* Cleans up our sk_async_wait_queue */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001846 __skb_queue_purge(&sk->sk_async_wait_queue);
Chris Leech1a2449a2006-05-23 18:05:53 -07001847#endif
1848
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 /* Clean prequeue, it must be empty really */
1850 __skb_queue_purge(&tp->ucopy.prequeue);
1851
1852 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001853 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001854 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855
Jerry Chu168a8f52012-08-31 12:29:13 +00001856 BUG_ON(tp->fastopen_rsk != NULL);
William Allen Simpson435cf552009-12-02 18:17:05 +00001857
Yuchung Chengcf60af02012-07-19 06:43:09 +00001858 /* If socket is aborted during connect operation */
1859 tcp_free_fastopen_req(tp);
1860
Glauber Costa180d8cd2011-12-11 21:47:02 +00001861 sk_sockets_allocated_dec(sk);
Glauber Costad1a4c0b2011-12-11 21:47:04 +00001862 sock_release_memcg(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864EXPORT_SYMBOL(tcp_v4_destroy_sock);
1865
1866#ifdef CONFIG_PROC_FS
1867/* Proc filesystem TCP sock list dumping. */
1868
Tom Herberta8b690f2010-06-07 00:43:42 -07001869/*
1870 * Get next listener socket follow cur. If cur is NULL, get first socket
1871 * starting from bucket given in st->bucket; when st->bucket is zero the
1872 * very first socket in the hash table is returned.
1873 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874static void *listening_get_next(struct seq_file *seq, void *cur)
1875{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001876 struct inet_connection_sock *icsk;
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001877 struct hlist_nulls_node *node;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878 struct sock *sk = cur;
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001879 struct inet_listen_hashbucket *ilb;
Jianjun Kong5799de02008-11-03 02:49:10 -08001880 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001881 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882
1883 if (!sk) {
Tom Herberta8b690f2010-06-07 00:43:42 -07001884 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001885 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001886 sk = sk_nulls_head(&ilb->head);
Tom Herberta8b690f2010-06-07 00:43:42 -07001887 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 goto get_sk;
1889 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001890 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07001892 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893
1894 if (st->state == TCP_SEQ_STATE_OPENREQ) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001895 struct request_sock *req = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001897 icsk = inet_csk(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898 req = req->dl_next;
1899 while (1) {
1900 while (req) {
Daniel Lezcanobdccc4c2008-07-19 00:15:13 -07001901 if (req->rsk_ops->family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 cur = req;
1903 goto out;
1904 }
1905 req = req->dl_next;
1906 }
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001907 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 break;
1909get_req:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001910 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 }
Eric Dumazet1bde5ac2010-12-23 09:32:46 -08001912 sk = sk_nulls_next(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 st->state = TCP_SEQ_STATE_LISTENING;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001914 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915 } else {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001916 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001917 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1918 if (reqsk_queue_len(&icsk->icsk_accept_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 goto start_req;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001920 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Eric Dumazet1bde5ac2010-12-23 09:32:46 -08001921 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 }
1923get_sk:
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001924 sk_nulls_for_each_from(sk, node) {
Pavel Emelyanov8475ef92010-11-22 03:26:12 +00001925 if (!net_eq(sock_net(sk), net))
1926 continue;
1927 if (sk->sk_family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928 cur = sk;
1929 goto out;
1930 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001931 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001932 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1933 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934start_req:
1935 st->uid = sock_i_uid(sk);
1936 st->syn_wait_sk = sk;
1937 st->state = TCP_SEQ_STATE_OPENREQ;
1938 st->sbucket = 0;
1939 goto get_req;
1940 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001941 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001943 spin_unlock_bh(&ilb->lock);
Tom Herberta8b690f2010-06-07 00:43:42 -07001944 st->offset = 0;
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07001945 if (++st->bucket < INET_LHTABLE_SIZE) {
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001946 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1947 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001948 sk = sk_nulls_head(&ilb->head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949 goto get_sk;
1950 }
1951 cur = NULL;
1952out:
1953 return cur;
1954}
1955
1956static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1957{
Tom Herberta8b690f2010-06-07 00:43:42 -07001958 struct tcp_iter_state *st = seq->private;
1959 void *rc;
1960
1961 st->bucket = 0;
1962 st->offset = 0;
1963 rc = listening_get_next(seq, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964
1965 while (rc && *pos) {
1966 rc = listening_get_next(seq, rc);
1967 --*pos;
1968 }
1969 return rc;
1970}
1971
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07001972static inline bool empty_bucket(const struct tcp_iter_state *st)
Andi Kleen6eac5602008-08-28 01:08:02 -07001973{
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07001974 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
Andi Kleen6eac5602008-08-28 01:08:02 -07001975}
1976
Tom Herberta8b690f2010-06-07 00:43:42 -07001977/*
1978 * Get first established socket starting from bucket given in st->bucket.
1979 * If st->bucket is zero, the very first socket in the hash is returned.
1980 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981static void *established_get_first(struct seq_file *seq)
1982{
Jianjun Kong5799de02008-11-03 02:49:10 -08001983 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001984 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 void *rc = NULL;
1986
Tom Herberta8b690f2010-06-07 00:43:42 -07001987 st->offset = 0;
1988 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001990 struct hlist_nulls_node *node;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08001991 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992
Andi Kleen6eac5602008-08-28 01:08:02 -07001993 /* Lockless fast path for the common case of empty buckets */
1994 if (empty_bucket(st))
1995 continue;
1996
Eric Dumazet9db66bd2008-11-20 20:39:09 -08001997 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001998 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Daniel Lezcanof40c8172008-03-21 04:13:54 -07001999 if (sk->sk_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002000 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001 continue;
2002 }
2003 rc = sk;
2004 goto out;
2005 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002006 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007 }
2008out:
2009 return rc;
2010}
2011
2012static void *established_get_next(struct seq_file *seq, void *cur)
2013{
2014 struct sock *sk = cur;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002015 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002016 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002017 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018
2019 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002020 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002022 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002024 sk_nulls_for_each_from(sk, node) {
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002025 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002026 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 }
2028
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002029 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2030 ++st->bucket;
2031 return established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032}
2033
2034static void *established_get_idx(struct seq_file *seq, loff_t pos)
2035{
Tom Herberta8b690f2010-06-07 00:43:42 -07002036 struct tcp_iter_state *st = seq->private;
2037 void *rc;
2038
2039 st->bucket = 0;
2040 rc = established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041
2042 while (rc && pos) {
2043 rc = established_get_next(seq, rc);
2044 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002045 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046 return rc;
2047}
2048
2049static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2050{
2051 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002052 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 st->state = TCP_SEQ_STATE_LISTENING;
2055 rc = listening_get_idx(seq, &pos);
2056
2057 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058 st->state = TCP_SEQ_STATE_ESTABLISHED;
2059 rc = established_get_idx(seq, pos);
2060 }
2061
2062 return rc;
2063}
2064
Tom Herberta8b690f2010-06-07 00:43:42 -07002065static void *tcp_seek_last_pos(struct seq_file *seq)
2066{
2067 struct tcp_iter_state *st = seq->private;
2068 int offset = st->offset;
2069 int orig_num = st->num;
2070 void *rc = NULL;
2071
2072 switch (st->state) {
2073 case TCP_SEQ_STATE_OPENREQ:
2074 case TCP_SEQ_STATE_LISTENING:
2075 if (st->bucket >= INET_LHTABLE_SIZE)
2076 break;
2077 st->state = TCP_SEQ_STATE_LISTENING;
2078 rc = listening_get_next(seq, NULL);
2079 while (offset-- && rc)
2080 rc = listening_get_next(seq, rc);
2081 if (rc)
2082 break;
2083 st->bucket = 0;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002084 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002085 /* Fallthrough */
2086 case TCP_SEQ_STATE_ESTABLISHED:
Tom Herberta8b690f2010-06-07 00:43:42 -07002087 if (st->bucket > tcp_hashinfo.ehash_mask)
2088 break;
2089 rc = established_get_first(seq);
2090 while (offset-- && rc)
2091 rc = established_get_next(seq, rc);
2092 }
2093
2094 st->num = orig_num;
2095
2096 return rc;
2097}
2098
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2100{
Jianjun Kong5799de02008-11-03 02:49:10 -08002101 struct tcp_iter_state *st = seq->private;
Tom Herberta8b690f2010-06-07 00:43:42 -07002102 void *rc;
2103
2104 if (*pos && *pos == st->last_pos) {
2105 rc = tcp_seek_last_pos(seq);
2106 if (rc)
2107 goto out;
2108 }
2109
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 st->state = TCP_SEQ_STATE_LISTENING;
2111 st->num = 0;
Tom Herberta8b690f2010-06-07 00:43:42 -07002112 st->bucket = 0;
2113 st->offset = 0;
2114 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2115
2116out:
2117 st->last_pos = *pos;
2118 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119}
2120
2121static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2122{
Tom Herberta8b690f2010-06-07 00:43:42 -07002123 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 void *rc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125
2126 if (v == SEQ_START_TOKEN) {
2127 rc = tcp_get_idx(seq, 0);
2128 goto out;
2129 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130
2131 switch (st->state) {
2132 case TCP_SEQ_STATE_OPENREQ:
2133 case TCP_SEQ_STATE_LISTENING:
2134 rc = listening_get_next(seq, v);
2135 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002137 st->bucket = 0;
2138 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 rc = established_get_first(seq);
2140 }
2141 break;
2142 case TCP_SEQ_STATE_ESTABLISHED:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 rc = established_get_next(seq, v);
2144 break;
2145 }
2146out:
2147 ++*pos;
Tom Herberta8b690f2010-06-07 00:43:42 -07002148 st->last_pos = *pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 return rc;
2150}
2151
2152static void tcp_seq_stop(struct seq_file *seq, void *v)
2153{
Jianjun Kong5799de02008-11-03 02:49:10 -08002154 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155
2156 switch (st->state) {
2157 case TCP_SEQ_STATE_OPENREQ:
2158 if (v) {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002159 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2160 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 }
2162 case TCP_SEQ_STATE_LISTENING:
2163 if (v != SEQ_START_TOKEN)
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002164 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166 case TCP_SEQ_STATE_ESTABLISHED:
2167 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002168 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 break;
2170 }
2171}
2172
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002173int tcp_seq_open(struct inode *inode, struct file *file)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174{
Al Virod9dda782013-03-31 18:16:14 -04002175 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176 struct tcp_iter_state *s;
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002177 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002179 err = seq_open_net(inode, file, &afinfo->seq_ops,
2180 sizeof(struct tcp_iter_state));
2181 if (err < 0)
2182 return err;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002183
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002184 s = ((struct seq_file *)file->private_data)->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185 s->family = afinfo->family;
stephen hemminger688d1942014-08-29 23:32:05 -07002186 s->last_pos = 0;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002187 return 0;
2188}
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002189EXPORT_SYMBOL(tcp_seq_open);
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002190
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002191int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192{
2193 int rc = 0;
2194 struct proc_dir_entry *p;
2195
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002196 afinfo->seq_ops.start = tcp_seq_start;
2197 afinfo->seq_ops.next = tcp_seq_next;
2198 afinfo->seq_ops.stop = tcp_seq_stop;
2199
Denis V. Lunev84841c32008-05-02 04:10:08 -07002200 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002201 afinfo->seq_fops, afinfo);
Denis V. Lunev84841c32008-05-02 04:10:08 -07002202 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203 rc = -ENOMEM;
2204 return rc;
2205}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002206EXPORT_SYMBOL(tcp_proc_register);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002208void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209{
Gao fengece31ff2013-02-18 01:34:56 +00002210 remove_proc_entry(afinfo->name, net->proc_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002212EXPORT_SYMBOL(tcp_proc_unregister);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002214static void get_openreq4(const struct sock *sk, const struct request_sock *req,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002215 struct seq_file *f, int i, kuid_t uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002217 const struct inet_request_sock *ireq = inet_rsk(req);
Eric Dumazeta399a802012-08-08 21:13:53 +00002218 long delta = req->expires - jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002220 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002221 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 i,
Eric Dumazet634fb9792013-10-09 15:21:29 -07002223 ireq->ir_loc_addr,
Eric Dumazetc720c7e82009-10-15 06:30:45 +00002224 ntohs(inet_sk(sk)->inet_sport),
Eric Dumazet634fb9792013-10-09 15:21:29 -07002225 ireq->ir_rmt_addr,
2226 ntohs(ireq->ir_rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227 TCP_SYN_RECV,
2228 0, 0, /* could print option size, but that is af dependent. */
2229 1, /* timers active (only the expire timer) */
Eric Dumazeta399a802012-08-08 21:13:53 +00002230 jiffies_delta_to_clock_t(delta),
Eric Dumazete6c022a2012-10-27 23:16:46 +00002231 req->num_timeout,
Eric W. Biedermana7cb5a42012-05-24 01:10:10 -06002232 from_kuid_munged(seq_user_ns(f), uid),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233 0, /* non standard timer */
2234 0, /* open_requests have no inode */
2235 atomic_read(&sk->sk_refcnt),
Tetsuo Handa652586d2013-11-14 14:31:57 -08002236 req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237}
2238
Tetsuo Handa652586d2013-11-14 14:31:57 -08002239static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240{
2241 int timer_active;
2242 unsigned long timer_expires;
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002243 const struct tcp_sock *tp = tcp_sk(sk);
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002244 const struct inet_connection_sock *icsk = inet_csk(sk);
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002245 const struct inet_sock *inet = inet_sk(sk);
Jerry Chu168a8f52012-08-31 12:29:13 +00002246 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
Eric Dumazetc720c7e82009-10-15 06:30:45 +00002247 __be32 dest = inet->inet_daddr;
2248 __be32 src = inet->inet_rcv_saddr;
2249 __u16 destp = ntohs(inet->inet_dport);
2250 __u16 srcp = ntohs(inet->inet_sport);
Eric Dumazet49d09002009-12-03 16:06:13 -08002251 int rx_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252
Nandita Dukkipati6ba8a3b2013-03-11 10:00:43 +00002253 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2254 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2255 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002257 timer_expires = icsk->icsk_timeout;
2258 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002260 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002261 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002263 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 } else {
2265 timer_active = 0;
2266 timer_expires = jiffies;
2267 }
2268
Eric Dumazet49d09002009-12-03 16:06:13 -08002269 if (sk->sk_state == TCP_LISTEN)
2270 rx_queue = sk->sk_ack_backlog;
2271 else
2272 /*
2273 * because we dont lock socket, we might find a transient negative value
2274 */
2275 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2276
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002277 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Tetsuo Handa652586d2013-11-14 14:31:57 -08002278 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002279 i, src, srcp, dest, destp, sk->sk_state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002280 tp->write_seq - tp->snd_una,
Eric Dumazet49d09002009-12-03 16:06:13 -08002281 rx_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 timer_active,
Eric Dumazeta399a802012-08-08 21:13:53 +00002283 jiffies_delta_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002284 icsk->icsk_retransmits,
Eric W. Biedermana7cb5a42012-05-24 01:10:10 -06002285 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002286 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002287 sock_i_ino(sk),
2288 atomic_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002289 jiffies_to_clock_t(icsk->icsk_rto),
2290 jiffies_to_clock_t(icsk->icsk_ack.ato),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002291 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 tp->snd_cwnd,
Jerry Chu168a8f52012-08-31 12:29:13 +00002293 sk->sk_state == TCP_LISTEN ?
2294 (fastopenq ? fastopenq->max_qlen : 0) :
Tetsuo Handa652586d2013-11-14 14:31:57 -08002295 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296}
2297
Eric Dumazetcf533ea2011-10-21 05:22:42 -04002298static void get_timewait4_sock(const struct inet_timewait_sock *tw,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002299 struct seq_file *f, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300{
Al Viro23f33c22006-09-27 18:43:50 -07002301 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 __u16 destp, srcp;
Eric Dumazete2a1d3e2014-03-27 07:19:19 -07002303 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304
2305 dest = tw->tw_daddr;
2306 src = tw->tw_rcv_saddr;
2307 destp = ntohs(tw->tw_dport);
2308 srcp = ntohs(tw->tw_sport);
2309
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002310 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002311 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
Eric Dumazeta399a802012-08-08 21:13:53 +00002313 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002314 atomic_read(&tw->tw_refcnt), tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315}
2316
2317#define TMPSZ 150
2318
2319static int tcp4_seq_show(struct seq_file *seq, void *v)
2320{
Jianjun Kong5799de02008-11-03 02:49:10 -08002321 struct tcp_iter_state *st;
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002322 struct sock *sk = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323
Tetsuo Handa652586d2013-11-14 14:31:57 -08002324 seq_setwidth(seq, TMPSZ - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 if (v == SEQ_START_TOKEN) {
Tetsuo Handa652586d2013-11-14 14:31:57 -08002326 seq_puts(seq, " sl local_address rem_address st tx_queue "
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327 "rx_queue tr tm->when retrnsmt uid timeout "
2328 "inode");
2329 goto out;
2330 }
2331 st = seq->private;
2332
2333 switch (st->state) {
2334 case TCP_SEQ_STATE_LISTENING:
2335 case TCP_SEQ_STATE_ESTABLISHED:
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002336 if (sk->sk_state == TCP_TIME_WAIT)
Tetsuo Handa652586d2013-11-14 14:31:57 -08002337 get_timewait4_sock(v, seq, st->num);
Eric Dumazet05dbc7b2013-10-03 00:22:02 -07002338 else
Tetsuo Handa652586d2013-11-14 14:31:57 -08002339 get_tcp4_sock(v, seq, st->num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340 break;
2341 case TCP_SEQ_STATE_OPENREQ:
Tetsuo Handa652586d2013-11-14 14:31:57 -08002342 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345out:
Tetsuo Handa652586d2013-11-14 14:31:57 -08002346 seq_pad(seq, '\n');
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 return 0;
2348}
2349
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002350static const struct file_operations tcp_afinfo_seq_fops = {
2351 .owner = THIS_MODULE,
2352 .open = tcp_seq_open,
2353 .read = seq_read,
2354 .llseek = seq_lseek,
2355 .release = seq_release_net
2356};
2357
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 .name = "tcp",
2360 .family = AF_INET,
Arjan van de Ven73cb88e2011-10-30 06:46:30 +00002361 .seq_fops = &tcp_afinfo_seq_fops,
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002362 .seq_ops = {
2363 .show = tcp4_seq_show,
2364 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365};
2366
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002367static int __net_init tcp4_proc_init_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002368{
2369 return tcp_proc_register(net, &tcp4_seq_afinfo);
2370}
2371
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002372static void __net_exit tcp4_proc_exit_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002373{
2374 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2375}
2376
2377static struct pernet_operations tcp4_net_ops = {
2378 .init = tcp4_proc_init_net,
2379 .exit = tcp4_proc_exit_net,
2380};
2381
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382int __init tcp4_proc_init(void)
2383{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002384 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385}
2386
2387void tcp4_proc_exit(void)
2388{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002389 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390}
2391#endif /* CONFIG_PROC_FS */
2392
2393struct proto tcp_prot = {
2394 .name = "TCP",
2395 .owner = THIS_MODULE,
2396 .close = tcp_close,
2397 .connect = tcp_v4_connect,
2398 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002399 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 .ioctl = tcp_ioctl,
2401 .init = tcp_v4_init_sock,
2402 .destroy = tcp_v4_destroy_sock,
2403 .shutdown = tcp_shutdown,
2404 .setsockopt = tcp_setsockopt,
2405 .getsockopt = tcp_getsockopt,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 .recvmsg = tcp_recvmsg,
Changli Gao7ba42912010-07-10 20:41:55 +00002407 .sendmsg = tcp_sendmsg,
2408 .sendpage = tcp_sendpage,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 .backlog_rcv = tcp_v4_do_rcv,
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002410 .release_cb = tcp_release_cb,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002411 .hash = inet_hash,
2412 .unhash = inet_unhash,
2413 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414 .enter_memory_pressure = tcp_enter_memory_pressure,
Eric Dumazetc9bee3b72013-07-22 20:27:07 -07002415 .stream_memory_free = tcp_stream_memory_free,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002417 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 .memory_allocated = &tcp_memory_allocated,
2419 .memory_pressure = &tcp_memory_pressure,
Eric W. Biedermana4fe34b2013-10-19 16:25:36 -07002420 .sysctl_mem = sysctl_tcp_mem,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 .sysctl_wmem = sysctl_tcp_wmem,
2422 .sysctl_rmem = sysctl_tcp_rmem,
2423 .max_header = MAX_TCP_HEADER,
2424 .obj_size = sizeof(struct tcp_sock),
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002425 .slab_flags = SLAB_DESTROY_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002426 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002427 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002428 .h.hashinfo = &tcp_hashinfo,
Changli Gao7ba42912010-07-10 20:41:55 +00002429 .no_autobind = true,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002430#ifdef CONFIG_COMPAT
2431 .compat_setsockopt = compat_tcp_setsockopt,
2432 .compat_getsockopt = compat_tcp_getsockopt,
2433#endif
Andrew Mortonc255a452012-07-31 16:43:02 -07002434#ifdef CONFIG_MEMCG_KMEM
Glauber Costad1a4c0b2011-12-11 21:47:04 +00002435 .init_cgroup = tcp_init_cgroup,
2436 .destroy_cgroup = tcp_destroy_cgroup,
2437 .proto_cgroup = tcp_proto_cgroup,
2438#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002440EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441
Denis V. Lunev046ee902008-04-03 14:31:33 -07002442static int __net_init tcp_sk_init(struct net *net)
2443{
Hannes Frederic Sowa5d134f12013-01-05 16:10:48 +00002444 net->ipv4.sysctl_tcp_ecn = 2;
Eric Dumazetbe9f4a42012-07-19 07:34:03 +00002445 return 0;
Denis V. Lunev046ee902008-04-03 14:31:33 -07002446}
2447
2448static void __net_exit tcp_sk_exit(struct net *net)
2449{
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002450}
2451
2452static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2453{
2454 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002455}
2456
2457static struct pernet_operations __net_initdata tcp_sk_ops = {
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002458 .init = tcp_sk_init,
2459 .exit = tcp_sk_exit,
2460 .exit_batch = tcp_sk_exit_batch,
Denis V. Lunev046ee902008-04-03 14:31:33 -07002461};
2462
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002463void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464{
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002465 inet_hashinfo_init(&tcp_hashinfo);
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002466 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468}