blob: 310454c2f4d1352bdd35cc42421f41cff3bff8ab [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070037 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080040 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
Herbert Xueb4dea52008-12-29 23:04:08 -080054#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090063#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020065#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070067#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030069#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070070#include <net/ipv6.h>
71#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080072#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070073#include <net/xfrm.h>
Chris Leech1a2449a2006-05-23 18:05:53 -070074#include <net/netdma.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76#include <linux/inet.h>
77#include <linux/ipv6.h>
78#include <linux/stddef.h>
79#include <linux/proc_fs.h>
80#include <linux/seq_file.h>
81
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080082#include <linux/crypto.h>
83#include <linux/scatterlist.h>
84
Brian Haleyab32ea52006-09-22 14:15:41 -070085int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000087EXPORT_SYMBOL(sysctl_tcp_low_latency);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080090#ifdef CONFIG_TCP_MD5SIG
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -020091static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
92 __be32 addr);
Adam Langley49a72df2008-07-19 00:01:42 -070093static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
94 __be32 daddr, __be32 saddr, struct tcphdr *th);
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +090095#else
96static inline
97struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
98{
99 return NULL;
100}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800101#endif
102
Eric Dumazet5caea4e2008-11-20 00:40:07 -0800103struct inet_hashinfo tcp_hashinfo;
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000104EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105
Gerrit Renkera94f7232006-11-10 14:06:49 -0800106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700108 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
109 ip_hdr(skb)->saddr,
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700110 tcp_hdr(skb)->dest,
111 tcp_hdr(skb)->source);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112}
113
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800114int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
115{
116 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
117 struct tcp_sock *tp = tcp_sk(sk);
118
119 /* With PAWS, it is safe from the viewpoint
120 of data integrity. Even without PAWS it is safe provided sequence
121 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
122
123 Actually, the idea is close to VJ's one, only timestamp cache is
124 held not per host, but per port pair and TW bucket is used as state
125 holder.
126
127 If TW bucket has been already destroyed we fall back to VJ's scheme
128 and use initial timestamp retrieved from peer table.
129 */
130 if (tcptw->tw_ts_recent_stamp &&
131 (twp == NULL || (sysctl_tcp_tw_reuse &&
James Morris9d729f72007-03-04 16:12:44 -0800132 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800133 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
134 if (tp->write_seq == 0)
135 tp->write_seq = 1;
136 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
137 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
138 sock_hold(sktw);
139 return 1;
140 }
141
142 return 0;
143}
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
145
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146/* This will initiate an outgoing connection. */
147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148{
David S. Miller2d7192d2011-04-26 13:28:44 -0700149 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 struct inet_sock *inet = inet_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
David S. Millerdca8b082011-02-24 13:38:12 -0800152 __be16 orig_sport, orig_dport;
Al Virobada8ad2006-09-26 21:27:15 -0700153 __be32 daddr, nexthop;
David S. Miller2d7192d2011-04-26 13:28:44 -0700154 struct flowi4 fl4;
155 struct rtable *rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 int err;
157
158 if (addr_len < sizeof(struct sockaddr_in))
159 return -EINVAL;
160
161 if (usin->sin_family != AF_INET)
162 return -EAFNOSUPPORT;
163
164 nexthop = daddr = usin->sin_addr.s_addr;
165 if (inet->opt && inet->opt->srr) {
166 if (!daddr)
167 return -EINVAL;
168 nexthop = inet->opt->faddr;
169 }
170
David S. Millerdca8b082011-02-24 13:38:12 -0800171 orig_sport = inet->inet_sport;
172 orig_dport = usin->sin_port;
David S. Miller2d7192d2011-04-26 13:28:44 -0700173 rt = ip_route_connect(&fl4, nexthop, inet->inet_saddr,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
176 orig_sport, orig_dport, sk, true);
177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH)
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -0700180 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
David S. Millerb23dd4f2011-03-02 14:31:35 -0800181 return err;
Wei Dong584bdf82007-05-31 22:49:28 -0700182 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 ip_rt_put(rt);
186 return -ENETUNREACH;
187 }
188
189 if (!inet->opt || !inet->opt->srr)
190 daddr = rt->rt_dst;
191
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000192 if (!inet->inet_saddr)
193 inet->inet_saddr = rt->rt_src;
194 inet->inet_rcv_saddr = inet->inet_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 /* Reset inherited state */
198 tp->rx_opt.ts_recent = 0;
199 tp->rx_opt.ts_recent_stamp = 0;
200 tp->write_seq = 0;
201 }
202
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700203 if (tcp_death_row.sysctl_tw_recycle &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
205 struct inet_peer *peer = rt_get_peer(rt);
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200206 /*
207 * VJ's idea. We save last timestamp seen from
208 * the destination in peer table, when entering state
209 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
210 * when trying new connection.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 */
Eric Dumazet317fe0e2010-06-16 04:52:13 +0000212 if (peer) {
213 inet_peer_refcheck(peer);
214 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
215 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
216 tp->rx_opt.ts_recent = peer->tcp_ts;
217 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 }
219 }
220
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000221 inet->inet_dport = usin->sin_port;
222 inet->inet_daddr = daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800224 inet_csk(sk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 if (inet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800226 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000228 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229
230 /* Socket identity is still unknown (sport may be zero).
231 * However we set state to SYN-SENT and not releasing socket
232 * lock select source port, enter ourselves into the hash tables and
233 * complete initialization after this.
234 */
235 tcp_set_state(sk, TCP_SYN_SENT);
Arnaldo Carvalho de Meloa7f5e7f2005-12-13 23:25:31 -0800236 err = inet_hash_connect(&tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 if (err)
238 goto failure;
239
David S. Miller2d7192d2011-04-26 13:28:44 -0700240 rt = ip_route_newports(&fl4, rt, orig_sport, orig_dport,
David S. Millerb23dd4f2011-03-02 14:31:35 -0800241 inet->inet_sport, inet->inet_dport, sk);
242 if (IS_ERR(rt)) {
243 err = PTR_ERR(rt);
244 rt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 goto failure;
David S. Millerb23dd4f2011-03-02 14:31:35 -0800246 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700248 sk->sk_gso_type = SKB_GSO_TCPV4;
Changli Gaod8d1f302010-06-10 23:31:35 -0700249 sk_setup_caps(sk, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
251 if (!tp->write_seq)
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000252 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
253 inet->inet_daddr,
254 inet->inet_sport,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 usin->sin_port);
256
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000257 inet->inet_id = tp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
259 err = tcp_connect(sk);
260 rt = NULL;
261 if (err)
262 goto failure;
263
264 return 0;
265
266failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200267 /*
268 * This unhashes the socket and releases the local port,
269 * if necessary.
270 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 tcp_set_state(sk, TCP_CLOSE);
272 ip_rt_put(rt);
273 sk->sk_route_caps = 0;
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000274 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 return err;
276}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000277EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279/*
280 * This routine does path mtu discovery as defined in RFC1191.
281 */
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000282static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283{
284 struct dst_entry *dst;
285 struct inet_sock *inet = inet_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286
287 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
288 * send out by Linux are always <576bytes so they should go through
289 * unfragmented).
290 */
291 if (sk->sk_state == TCP_LISTEN)
292 return;
293
294 /* We don't check in the destentry if pmtu discovery is forbidden
295 * on this route. We just assume that no packet_to_big packets
296 * are send back when pmtu discovery is not active.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900297 * There is a small race when the user changes this flag in the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 * route, but I think that's acceptable.
299 */
300 if ((dst = __sk_dst_check(sk, 0)) == NULL)
301 return;
302
303 dst->ops->update_pmtu(dst, mtu);
304
305 /* Something is about to be wrong... Remember soft error
306 * for the case, if this connection will not able to recover.
307 */
308 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
309 sk->sk_err_soft = EMSGSIZE;
310
311 mtu = dst_mtu(dst);
312
313 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800314 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 tcp_sync_mss(sk, mtu);
316
317 /* Resend the TCP packet because it's
318 * clear that the old packet has been
319 * dropped. This is the new "fast" path mtu
320 * discovery.
321 */
322 tcp_simple_retransmit(sk);
323 } /* else let the usual retransmit timer handle it */
324}
325
326/*
327 * This routine is called by the ICMP module when it gets some
328 * sort of error condition. If err < 0 then the socket should
329 * be closed and the error returned to the user. If err > 0
330 * it's just the icmp type << 8 | icmp code. After adjustment
331 * header points to the first 8 bytes of the tcp header. We need
332 * to find the appropriate port.
333 *
334 * The locking strategy used here is very "optimistic". When
335 * someone else accesses the socket the ICMP is just dropped
336 * and for some paths there is no check at all.
337 * A more general error queue to queue errors for later handling
338 * is probably better.
339 *
340 */
341
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000342void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000344 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000345 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000346 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 struct tcp_sock *tp;
348 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000349 const int type = icmp_hdr(icmp_skb)->type;
350 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000352 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 __u32 seq;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000354 __u32 remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000356 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000358 if (icmp_skb->len < (iph->ihl << 2) + 8) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700359 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 return;
361 }
362
Pavel Emelyanovfd54d712008-07-14 23:01:40 -0700363 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000364 iph->saddr, th->source, inet_iif(icmp_skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 if (!sk) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700366 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 return;
368 }
369 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700370 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 return;
372 }
373
374 bh_lock_sock(sk);
375 /* If too many ICMPs get dropped on busy
376 * servers this needs to be solved differently.
377 */
378 if (sock_owned_by_user(sk))
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700379 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380
381 if (sk->sk_state == TCP_CLOSE)
382 goto out;
383
stephen hemminger97e3ecd12010-03-18 11:27:32 +0000384 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
385 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
386 goto out;
387 }
388
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000389 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 tp = tcp_sk(sk);
391 seq = ntohl(th->seq);
392 if (sk->sk_state != TCP_LISTEN &&
393 !between(seq, tp->snd_una, tp->snd_nxt)) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700394 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 goto out;
396 }
397
398 switch (type) {
399 case ICMP_SOURCE_QUENCH:
400 /* Just silently ignore these. */
401 goto out;
402 case ICMP_PARAMETERPROB:
403 err = EPROTO;
404 break;
405 case ICMP_DEST_UNREACH:
406 if (code > NR_ICMP_UNREACH)
407 goto out;
408
409 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
410 if (!sock_owned_by_user(sk))
411 do_pmtu_discovery(sk, iph, info);
412 goto out;
413 }
414
415 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000416 /* check if icmp_skb allows revert of backoff
417 * (see draft-zimmermann-tcp-lcd) */
418 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
419 break;
420 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
421 !icsk->icsk_backoff)
422 break;
423
David S. Miller8f49c272010-11-12 13:35:00 -0800424 if (sock_owned_by_user(sk))
425 break;
426
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000427 icsk->icsk_backoff--;
428 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
429 icsk->icsk_backoff;
430 tcp_bound_rto(sk);
431
432 skb = tcp_write_queue_head(sk);
433 BUG_ON(!skb);
434
435 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
436 tcp_time_stamp - TCP_SKB_CB(skb)->when);
437
438 if (remaining) {
439 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
440 remaining, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000441 } else {
442 /* RTO revert clocked out retransmission.
443 * Will retransmit now */
444 tcp_retransmit_timer(sk);
445 }
446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 break;
448 case ICMP_TIME_EXCEEDED:
449 err = EHOSTUNREACH;
450 break;
451 default:
452 goto out;
453 }
454
455 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700456 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 case TCP_LISTEN:
458 if (sock_owned_by_user(sk))
459 goto out;
460
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700461 req = inet_csk_search_req(sk, &prev, th->dest,
462 iph->daddr, iph->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 if (!req)
464 goto out;
465
466 /* ICMPs are not backlogged, hence we cannot get
467 an established socket here.
468 */
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700469 WARN_ON(req->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700471 if (seq != tcp_rsk(req)->snt_isn) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700472 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 goto out;
474 }
475
476 /*
477 * Still in SYN_RECV, just remove it silently.
478 * There is no good way to pass the error to the newly
479 * created socket, and POSIX does not want network
480 * errors returned from accept().
481 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700482 inet_csk_reqsk_queue_drop(sk, req, prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 goto out;
484
485 case TCP_SYN_SENT:
486 case TCP_SYN_RECV: /* Cannot happen.
487 It can f.e. if SYNs crossed.
488 */
489 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 sk->sk_err = err;
491
492 sk->sk_error_report(sk);
493
494 tcp_done(sk);
495 } else {
496 sk->sk_err_soft = err;
497 }
498 goto out;
499 }
500
501 /* If we've already connected we will keep trying
502 * until we time out, or the user gives up.
503 *
504 * rfc1122 4.2.3.9 allows to consider as hard errors
505 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
506 * but it is obsoleted by pmtu discovery).
507 *
508 * Note, that in modern internet, where routing is unreliable
509 * and in each dark corner broken firewalls sit, sending random
510 * errors ordered by their masters even this two messages finally lose
511 * their original sense (even Linux sends invalid PORT_UNREACHs)
512 *
513 * Now we are in compliance with RFCs.
514 * --ANK (980905)
515 */
516
517 inet = inet_sk(sk);
518 if (!sock_owned_by_user(sk) && inet->recverr) {
519 sk->sk_err = err;
520 sk->sk_error_report(sk);
521 } else { /* Only an error on timeout */
522 sk->sk_err_soft = err;
523 }
524
525out:
526 bh_unlock_sock(sk);
527 sock_put(sk);
528}
529
Herbert Xu419f9f82010-04-11 02:15:53 +0000530static void __tcp_v4_send_check(struct sk_buff *skb,
531 __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700533 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534
Patrick McHardy84fa7932006-08-29 16:44:56 -0700535 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Herbert Xu419f9f82010-04-11 02:15:53 +0000536 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
Herbert Xu663ead32007-04-09 11:59:07 -0700537 skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viroff1dcad2006-11-20 18:07:29 -0800538 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 } else {
Herbert Xu419f9f82010-04-11 02:15:53 +0000540 th->check = tcp_v4_check(skb->len, saddr, daddr,
Joe Perches07f07572008-11-19 15:44:53 -0800541 csum_partial(th,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 th->doff << 2,
543 skb->csum));
544 }
545}
546
Herbert Xu419f9f82010-04-11 02:15:53 +0000547/* This routine computes an IPv4 TCP checksum. */
Herbert Xubb296242010-04-11 02:15:55 +0000548void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
Herbert Xu419f9f82010-04-11 02:15:53 +0000549{
550 struct inet_sock *inet = inet_sk(sk);
551
552 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
553}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000554EXPORT_SYMBOL(tcp_v4_send_check);
Herbert Xu419f9f82010-04-11 02:15:53 +0000555
Herbert Xua430a432006-07-08 13:34:56 -0700556int tcp_v4_gso_send_check(struct sk_buff *skb)
557{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700558 const struct iphdr *iph;
Herbert Xua430a432006-07-08 13:34:56 -0700559 struct tcphdr *th;
560
561 if (!pskb_may_pull(skb, sizeof(*th)))
562 return -EINVAL;
563
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700564 iph = ip_hdr(skb);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700565 th = tcp_hdr(skb);
Herbert Xua430a432006-07-08 13:34:56 -0700566
567 th->check = 0;
Patrick McHardy84fa7932006-08-29 16:44:56 -0700568 skb->ip_summed = CHECKSUM_PARTIAL;
Herbert Xu419f9f82010-04-11 02:15:53 +0000569 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
Herbert Xua430a432006-07-08 13:34:56 -0700570 return 0;
571}
572
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573/*
574 * This routine will send an RST to the other tcp.
575 *
576 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
577 * for reset.
578 * Answer: if a packet caused RST, it is not for a socket
579 * existing in our system, if it is matched to a socket,
580 * it is just duplicate segment or bug in other side's TCP.
581 * So that we build reply only basing on parameters
582 * arrived with segment.
583 * Exception: precedence violation. We do not implement it in any case.
584 */
585
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800586static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700588 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800589 struct {
590 struct tcphdr th;
591#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800592 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800593#endif
594 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800596#ifdef CONFIG_TCP_MD5SIG
597 struct tcp_md5sig_key *key;
598#endif
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700599 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600
601 /* Never send a reset in response to a reset. */
602 if (th->rst)
603 return;
604
Eric Dumazet511c3f92009-06-02 05:14:27 +0000605 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 return;
607
608 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800609 memset(&rep, 0, sizeof(rep));
610 rep.th.dest = th->source;
611 rep.th.source = th->dest;
612 rep.th.doff = sizeof(struct tcphdr) / 4;
613 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614
615 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800616 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800618 rep.th.ack = 1;
619 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
620 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 }
622
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200623 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800624 arg.iov[0].iov_base = (unsigned char *)&rep;
625 arg.iov[0].iov_len = sizeof(rep.th);
626
627#ifdef CONFIG_TCP_MD5SIG
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700628 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800629 if (key) {
630 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
631 (TCPOPT_NOP << 16) |
632 (TCPOPT_MD5SIG << 8) |
633 TCPOLEN_MD5SIG);
634 /* Update length and the length the header thinks exists */
635 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
636 rep.th.doff = arg.iov[0].iov_len / 4;
637
Adam Langley49a72df2008-07-19 00:01:42 -0700638 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645cb2008-10-09 14:37:47 -0700639 key, ip_hdr(skb)->saddr,
640 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800641 }
642#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700643 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
644 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700645 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700647 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
Eric Dumazetadf30902009-06-02 05:19:30 +0000649 net = dev_net(skb_dst(skb)->dev);
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700650 ip_send_reply(net->ipv4.tcp_sock, skb,
Denis V. Lunev7feb49c2008-04-03 14:32:00 -0700651 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700653 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
654 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655}
656
657/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
658 outside socket context is ugly, certainly. What can I do?
659 */
660
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900661static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
662 u32 win, u32 ts, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700663 struct tcp_md5sig_key *key,
664 int reply_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700666 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 struct {
668 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800669 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800670#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800671 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800672#endif
673 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 } rep;
675 struct ip_reply_arg arg;
Eric Dumazetadf30902009-06-02 05:19:30 +0000676 struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
678 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200679 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
681 arg.iov[0].iov_base = (unsigned char *)&rep;
682 arg.iov[0].iov_len = sizeof(rep.th);
683 if (ts) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800684 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
685 (TCPOPT_TIMESTAMP << 8) |
686 TCPOLEN_TIMESTAMP);
687 rep.opt[1] = htonl(tcp_time_stamp);
688 rep.opt[2] = htonl(ts);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800689 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 }
691
692 /* Swap the send and the receive. */
693 rep.th.dest = th->source;
694 rep.th.source = th->dest;
695 rep.th.doff = arg.iov[0].iov_len / 4;
696 rep.th.seq = htonl(seq);
697 rep.th.ack_seq = htonl(ack);
698 rep.th.ack = 1;
699 rep.th.window = htons(win);
700
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800701#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800702 if (key) {
703 int offset = (ts) ? 3 : 0;
704
705 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
706 (TCPOPT_NOP << 16) |
707 (TCPOPT_MD5SIG << 8) |
708 TCPOLEN_MD5SIG);
709 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
710 rep.th.doff = arg.iov[0].iov_len/4;
711
Adam Langley49a72df2008-07-19 00:01:42 -0700712 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700713 key, ip_hdr(skb)->saddr,
714 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800715 }
716#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700717 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700718 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
719 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 arg.iov[0].iov_len, IPPROTO_TCP, 0);
721 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900722 if (oif)
723 arg.bound_dev_if = oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700725 ip_send_reply(net->ipv4.tcp_sock, skb,
Denis V. Lunev7feb49c2008-04-03 14:32:00 -0700726 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700728 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729}
730
731static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
732{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700733 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800734 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900736 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200737 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900738 tcptw->tw_ts_recent,
739 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700740 tcp_twsk_md5_key(tcptw),
741 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900742 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700744 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745}
746
Gui Jianfeng6edafaa2008-08-06 23:50:04 -0700747static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200748 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749{
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900750 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800751 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900752 req->ts_recent,
753 0,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700754 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
755 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756}
757
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800759 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700760 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 * socket.
762 */
Octavian Purdila72659ec2010-01-17 19:09:39 -0800763static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
764 struct request_sock *req,
765 struct request_values *rvp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700767 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 int err = -1;
769 struct sk_buff * skb;
770
771 /* First, grab a route. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700772 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800773 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774
William Allen Simpsone6b4d112009-12-02 18:07:39 +0000775 skb = tcp_make_synack(sk, dst, req, rvp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776
777 if (skb) {
Herbert Xu419f9f82010-04-11 02:15:53 +0000778 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700780 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
781 ireq->rmt_addr,
782 ireq->opt);
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200783 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 }
785
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 dst_release(dst);
787 return err;
788}
789
Octavian Purdila72659ec2010-01-17 19:09:39 -0800790static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
William Allen Simpsone6b4d112009-12-02 18:07:39 +0000791 struct request_values *rvp)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800792{
Octavian Purdila72659ec2010-01-17 19:09:39 -0800793 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
794 return tcp_v4_send_synack(sk, NULL, req, rvp);
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800795}
796
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700798 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700800static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801{
Jesper Juhla51482b2005-11-08 09:41:34 -0800802 kfree(inet_rsk(req)->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803}
804
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000805static void syn_flood_warning(const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806{
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000807 const char *msg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000809#ifdef CONFIG_SYN_COOKIES
810 if (sysctl_tcp_syncookies)
811 msg = "Sending cookies";
812 else
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200813#endif
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000814 msg = "Dropping request";
815
816 pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
817 ntohs(tcp_hdr(skb)->dest), msg);
818}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819
820/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700821 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800823static struct ip_options *tcp_v4_save_options(struct sock *sk,
824 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825{
826 struct ip_options *opt = &(IPCB(skb)->opt);
827 struct ip_options *dopt = NULL;
828
829 if (opt && opt->optlen) {
830 int opt_size = optlength(opt);
831 dopt = kmalloc(opt_size, GFP_ATOMIC);
832 if (dopt) {
833 if (ip_options_echo(dopt, skb)) {
834 kfree(dopt);
835 dopt = NULL;
836 }
837 }
838 }
839 return dopt;
840}
841
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800842#ifdef CONFIG_TCP_MD5SIG
843/*
844 * RFC2385 MD5 checksumming requires a mapping of
845 * IP address->MD5 Key.
846 * We need to maintain these in the sk structure.
847 */
848
849/* Find the Key structure for an address. */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200850static struct tcp_md5sig_key *
851 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800852{
853 struct tcp_sock *tp = tcp_sk(sk);
854 int i;
855
856 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
857 return NULL;
858 for (i = 0; i < tp->md5sig_info->entries4; i++) {
859 if (tp->md5sig_info->keys4[i].addr == addr)
David S. Millerf8ab18d2007-09-28 15:18:35 -0700860 return &tp->md5sig_info->keys4[i].base;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800861 }
862 return NULL;
863}
864
865struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
866 struct sock *addr_sk)
867{
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000868 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800869}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800870EXPORT_SYMBOL(tcp_v4_md5_lookup);
871
Adrian Bunkf5b99bc2006-11-30 17:22:29 -0800872static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
873 struct request_sock *req)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800874{
875 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
876}
877
878/* This can be called on a newly created socket, from other files */
879int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
880 u8 *newkey, u8 newkeylen)
881{
882 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700883 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800884 struct tcp_sock *tp = tcp_sk(sk);
885 struct tcp4_md5sig_key *keys;
886
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700887 key = tcp_v4_md5_do_lookup(sk, addr);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800888 if (key) {
889 /* Pre-existing entry - just update that one. */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700890 kfree(key->key);
891 key->key = newkey;
892 key->keylen = newkeylen;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800893 } else {
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200894 struct tcp_md5sig_info *md5sig;
895
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800896 if (!tp->md5sig_info) {
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200897 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
898 GFP_ATOMIC);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800899 if (!tp->md5sig_info) {
900 kfree(newkey);
901 return -ENOMEM;
902 }
Eric Dumazeta4654192010-05-16 00:36:33 -0700903 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800904 }
Wu Fengguangaa133072009-09-02 23:45:45 -0700905 if (tcp_alloc_md5sig_pool(sk) == NULL) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800906 kfree(newkey);
907 return -ENOMEM;
908 }
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200909 md5sig = tp->md5sig_info;
910
911 if (md5sig->alloced4 == md5sig->entries4) {
912 keys = kmalloc((sizeof(*keys) *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900913 (md5sig->entries4 + 1)), GFP_ATOMIC);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800914 if (!keys) {
915 kfree(newkey);
916 tcp_free_md5sig_pool();
917 return -ENOMEM;
918 }
919
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200920 if (md5sig->entries4)
921 memcpy(keys, md5sig->keys4,
922 sizeof(*keys) * md5sig->entries4);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800923
924 /* Free old key list, and reference new one */
YOSHIFUJI Hideakia80cc202007-11-20 17:30:06 -0800925 kfree(md5sig->keys4);
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200926 md5sig->keys4 = keys;
927 md5sig->alloced4++;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800928 }
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200929 md5sig->entries4++;
David S. Millerf8ab18d2007-09-28 15:18:35 -0700930 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
931 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
932 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800933 }
934 return 0;
935}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800936EXPORT_SYMBOL(tcp_v4_md5_do_add);
937
938static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
939 u8 *newkey, u8 newkeylen)
940{
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000941 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800942 newkey, newkeylen);
943}
944
945int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
946{
947 struct tcp_sock *tp = tcp_sk(sk);
948 int i;
949
950 for (i = 0; i < tp->md5sig_info->entries4; i++) {
951 if (tp->md5sig_info->keys4[i].addr == addr) {
952 /* Free the key */
David S. Millerf8ab18d2007-09-28 15:18:35 -0700953 kfree(tp->md5sig_info->keys4[i].base.key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800954 tp->md5sig_info->entries4--;
955
956 if (tp->md5sig_info->entries4 == 0) {
957 kfree(tp->md5sig_info->keys4);
958 tp->md5sig_info->keys4 = NULL;
Leigh Brown8228a18d2006-12-17 17:12:30 -0800959 tp->md5sig_info->alloced4 = 0;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200960 } else if (tp->md5sig_info->entries4 != i) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800961 /* Need to do some manipulation */
YOSHIFUJI Hideaki354faf02007-11-20 17:30:31 -0800962 memmove(&tp->md5sig_info->keys4[i],
963 &tp->md5sig_info->keys4[i+1],
964 (tp->md5sig_info->entries4 - i) *
965 sizeof(struct tcp4_md5sig_key));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800966 }
967 tcp_free_md5sig_pool();
968 return 0;
969 }
970 }
971 return -ENOENT;
972}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800973EXPORT_SYMBOL(tcp_v4_md5_do_del);
974
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200975static void tcp_v4_clear_md5_list(struct sock *sk)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800976{
977 struct tcp_sock *tp = tcp_sk(sk);
978
979 /* Free each key, then the set of key keys,
980 * the crypto element, and then decrement our
981 * hold on the last resort crypto.
982 */
983 if (tp->md5sig_info->entries4) {
984 int i;
985 for (i = 0; i < tp->md5sig_info->entries4; i++)
David S. Millerf8ab18d2007-09-28 15:18:35 -0700986 kfree(tp->md5sig_info->keys4[i].base.key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800987 tp->md5sig_info->entries4 = 0;
988 tcp_free_md5sig_pool();
989 }
990 if (tp->md5sig_info->keys4) {
991 kfree(tp->md5sig_info->keys4);
992 tp->md5sig_info->keys4 = NULL;
993 tp->md5sig_info->alloced4 = 0;
994 }
995}
996
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200997static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
998 int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800999{
1000 struct tcp_md5sig cmd;
1001 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1002 u8 *newkey;
1003
1004 if (optlen < sizeof(cmd))
1005 return -EINVAL;
1006
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001007 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001008 return -EFAULT;
1009
1010 if (sin->sin_family != AF_INET)
1011 return -EINVAL;
1012
1013 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1014 if (!tcp_sk(sk)->md5sig_info)
1015 return -ENOENT;
1016 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1017 }
1018
1019 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1020 return -EINVAL;
1021
1022 if (!tcp_sk(sk)->md5sig_info) {
1023 struct tcp_sock *tp = tcp_sk(sk);
Wu Fengguangaa133072009-09-02 23:45:45 -07001024 struct tcp_md5sig_info *p;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001025
Wu Fengguangaa133072009-09-02 23:45:45 -07001026 p = kzalloc(sizeof(*p), sk->sk_allocation);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001027 if (!p)
1028 return -EINVAL;
1029
1030 tp->md5sig_info = p;
Eric Dumazeta4654192010-05-16 00:36:33 -07001031 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001032 }
1033
Wu Fengguangaa133072009-09-02 23:45:45 -07001034 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001035 if (!newkey)
1036 return -ENOMEM;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001037 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1038 newkey, cmd.tcpm_keylen);
1039}
1040
Adam Langley49a72df2008-07-19 00:01:42 -07001041static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1042 __be32 daddr, __be32 saddr, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001043{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001044 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001045 struct scatterlist sg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001046
1047 bp = &hp->md5_blk.ip4;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001048
1049 /*
Adam Langley49a72df2008-07-19 00:01:42 -07001050 * 1. the TCP pseudo-header (in the order: source IP address,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001051 * destination IP address, zero-padded protocol number, and
1052 * segment length)
1053 */
1054 bp->saddr = saddr;
1055 bp->daddr = daddr;
1056 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001057 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001058 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001059
Adam Langley49a72df2008-07-19 00:01:42 -07001060 sg_init_one(&sg, bp, sizeof(*bp));
1061 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1062}
1063
1064static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1065 __be32 daddr, __be32 saddr, struct tcphdr *th)
1066{
1067 struct tcp_md5sig_pool *hp;
1068 struct hash_desc *desc;
1069
1070 hp = tcp_get_md5sig_pool();
1071 if (!hp)
1072 goto clear_hash_noput;
1073 desc = &hp->md5_desc;
1074
1075 if (crypto_hash_init(desc))
1076 goto clear_hash;
1077 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1078 goto clear_hash;
1079 if (tcp_md5_hash_header(hp, th))
1080 goto clear_hash;
1081 if (tcp_md5_hash_key(hp, key))
1082 goto clear_hash;
1083 if (crypto_hash_final(desc, md5_hash))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001084 goto clear_hash;
1085
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001086 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001087 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001088
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001089clear_hash:
1090 tcp_put_md5sig_pool();
1091clear_hash_noput:
1092 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001093 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001094}
1095
Adam Langley49a72df2008-07-19 00:01:42 -07001096int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1097 struct sock *sk, struct request_sock *req,
1098 struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001099{
Adam Langley49a72df2008-07-19 00:01:42 -07001100 struct tcp_md5sig_pool *hp;
1101 struct hash_desc *desc;
1102 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001103 __be32 saddr, daddr;
1104
1105 if (sk) {
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001106 saddr = inet_sk(sk)->inet_saddr;
1107 daddr = inet_sk(sk)->inet_daddr;
Adam Langley49a72df2008-07-19 00:01:42 -07001108 } else if (req) {
1109 saddr = inet_rsk(req)->loc_addr;
1110 daddr = inet_rsk(req)->rmt_addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001111 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001112 const struct iphdr *iph = ip_hdr(skb);
1113 saddr = iph->saddr;
1114 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001115 }
Adam Langley49a72df2008-07-19 00:01:42 -07001116
1117 hp = tcp_get_md5sig_pool();
1118 if (!hp)
1119 goto clear_hash_noput;
1120 desc = &hp->md5_desc;
1121
1122 if (crypto_hash_init(desc))
1123 goto clear_hash;
1124
1125 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1126 goto clear_hash;
1127 if (tcp_md5_hash_header(hp, th))
1128 goto clear_hash;
1129 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1130 goto clear_hash;
1131 if (tcp_md5_hash_key(hp, key))
1132 goto clear_hash;
1133 if (crypto_hash_final(desc, md5_hash))
1134 goto clear_hash;
1135
1136 tcp_put_md5sig_pool();
1137 return 0;
1138
1139clear_hash:
1140 tcp_put_md5sig_pool();
1141clear_hash_noput:
1142 memset(md5_hash, 0, 16);
1143 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001144}
Adam Langley49a72df2008-07-19 00:01:42 -07001145EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001146
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001147static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001148{
1149 /*
1150 * This gets called for each TCP segment that arrives
1151 * so we want to be efficient.
1152 * We have 3 drop cases:
1153 * o No MD5 hash and one expected.
1154 * o MD5 hash and we're not expecting one.
1155 * o MD5 hash and its wrong.
1156 */
1157 __u8 *hash_location = NULL;
1158 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001159 const struct iphdr *iph = ip_hdr(skb);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001160 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001161 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001162 unsigned char newhash[16];
1163
1164 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001165 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001166
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001167 /* We've parsed the options - do we have a hash? */
1168 if (!hash_expected && !hash_location)
1169 return 0;
1170
1171 if (hash_expected && !hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001172 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001173 return 1;
1174 }
1175
1176 if (!hash_expected && hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001177 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001178 return 1;
1179 }
1180
1181 /* Okay, so this is hash_expected and hash_location -
1182 * so we need to calculate the checksum.
1183 */
Adam Langley49a72df2008-07-19 00:01:42 -07001184 genhash = tcp_v4_md5_hash_skb(newhash,
1185 hash_expected,
1186 NULL, NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001187
1188 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1189 if (net_ratelimit()) {
Harvey Harrison673d57e2008-10-31 00:53:57 -07001190 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1191 &iph->saddr, ntohs(th->source),
1192 &iph->daddr, ntohs(th->dest),
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001193 genhash ? " tcp_v4_calc_md5_hash failed" : "");
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001194 }
1195 return 1;
1196 }
1197 return 0;
1198}
1199
1200#endif
1201
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001202struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001204 .obj_size = sizeof(struct tcp_request_sock),
Octavian Purdila72659ec2010-01-17 19:09:39 -08001205 .rtx_syn_ack = tcp_v4_rtx_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001206 .send_ack = tcp_v4_reqsk_send_ack,
1207 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 .send_reset = tcp_v4_send_reset,
Octavian Purdila72659ec2010-01-17 19:09:39 -08001209 .syn_ack_timeout = tcp_syn_ack_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210};
1211
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001212#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001213static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001214 .md5_lookup = tcp_v4_reqsk_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001215 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001216};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001217#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001218
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1220{
William Allen Simpson4957faade2009-12-02 18:25:27 +00001221 struct tcp_extend_values tmp_ext;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 struct tcp_options_received tmp_opt;
William Allen Simpson4957faade2009-12-02 18:25:27 +00001223 u8 *hash_location;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001224 struct request_sock *req;
William Allen Simpsone6b4d112009-12-02 18:07:39 +00001225 struct inet_request_sock *ireq;
William Allen Simpson4957faade2009-12-02 18:25:27 +00001226 struct tcp_sock *tp = tcp_sk(sk);
William Allen Simpsone6b4d112009-12-02 18:07:39 +00001227 struct dst_entry *dst = NULL;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001228 __be32 saddr = ip_hdr(skb)->saddr;
1229 __be32 daddr = ip_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 __u32 isn = TCP_SKB_CB(skb)->when;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231#ifdef CONFIG_SYN_COOKIES
1232 int want_cookie = 0;
1233#else
1234#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1235#endif
1236
1237 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001238 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 goto drop;
1240
1241 /* TW buckets are converted to open requests without
1242 * limitations, they conserve resources and peer is
1243 * evidently real one.
1244 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001245 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
Florian Westphal2a1d4bd2010-06-03 00:43:12 +00001246 if (net_ratelimit())
1247 syn_flood_warning(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248#ifdef CONFIG_SYN_COOKIES
1249 if (sysctl_tcp_syncookies) {
1250 want_cookie = 1;
1251 } else
1252#endif
1253 goto drop;
1254 }
1255
1256 /* Accept backlog is full. If we have already queued enough
1257 * of warm entries in syn queue, drop request. It is better than
1258 * clogging syn queue with openreqs with exponentially increasing
1259 * timeout.
1260 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001261 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 goto drop;
1263
Arnaldo Carvalho de Meloce4a7d02008-06-10 12:39:35 -07001264 req = inet_reqsk_alloc(&tcp_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 if (!req)
1266 goto drop;
1267
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001268#ifdef CONFIG_TCP_MD5SIG
1269 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1270#endif
1271
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 tcp_clear_options(&tmp_opt);
William Allen Simpsonbee7ca92009-11-10 09:51:18 +00001273 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
William Allen Simpson4957faade2009-12-02 18:25:27 +00001274 tmp_opt.user_mss = tp->rx_opt.user_mss;
David S. Millerbb5b7c12009-12-15 20:56:42 -08001275 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276
William Allen Simpson4957faade2009-12-02 18:25:27 +00001277 if (tmp_opt.cookie_plus > 0 &&
1278 tmp_opt.saw_tstamp &&
1279 !tp->rx_opt.cookie_out_never &&
1280 (sysctl_tcp_cookie_size > 0 ||
1281 (tp->cookie_values != NULL &&
1282 tp->cookie_values->cookie_desired > 0))) {
1283 u8 *c;
1284 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1285 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1286
1287 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1288 goto drop_and_release;
1289
1290 /* Secret recipe starts with IP addresses */
Eric Dumazet0eae88f2010-04-20 19:06:52 -07001291 *mess++ ^= (__force u32)daddr;
1292 *mess++ ^= (__force u32)saddr;
William Allen Simpson4957faade2009-12-02 18:25:27 +00001293
1294 /* plus variable length Initiator Cookie */
1295 c = (u8 *)mess;
1296 while (l-- > 0)
1297 *c++ ^= *hash_location++;
1298
1299#ifdef CONFIG_SYN_COOKIES
1300 want_cookie = 0; /* not our kind of cookie */
1301#endif
1302 tmp_ext.cookie_out_never = 0; /* false */
1303 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1304 } else if (!tp->rx_opt.cookie_in_always) {
1305 /* redundant indications, but ensure initialization. */
1306 tmp_ext.cookie_out_never = 1; /* true */
1307 tmp_ext.cookie_plus = 0;
1308 } else {
1309 goto drop_and_release;
1310 }
1311 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312
Florian Westphal4dfc2812008-04-10 03:12:40 -07001313 if (want_cookie && !tmp_opt.saw_tstamp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 tcp_clear_options(&tmp_opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 tcp_openreq_init(req, &tmp_opt, skb);
1318
David S. Millerbb5b7c12009-12-15 20:56:42 -08001319 ireq = inet_rsk(req);
1320 ireq->loc_addr = daddr;
1321 ireq->rmt_addr = saddr;
1322 ireq->no_srccheck = inet_sk(sk)->transparent;
1323 ireq->opt = tcp_v4_save_options(sk, skb);
1324
Paul Moore284904a2009-03-27 17:10:28 -04001325 if (security_inet_conn_request(sk, skb, req))
David S. Millerbb5b7c12009-12-15 20:56:42 -08001326 goto drop_and_free;
Paul Moore284904a2009-03-27 17:10:28 -04001327
Florian Westphal172d69e2010-06-21 11:48:45 +00001328 if (!want_cookie || tmp_opt.tstamp_ok)
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001329 TCP_ECN_create_request(req, tcp_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
1331 if (want_cookie) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
Florian Westphal172d69e2010-06-21 11:48:45 +00001333 req->cookie_ts = tmp_opt.tstamp_ok;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 } else if (!isn) {
1335 struct inet_peer *peer = NULL;
1336
1337 /* VJ's idea. We save last timestamp seen
1338 * from the destination in peer table, when entering
1339 * state TIME-WAIT, and check against it before
1340 * accepting new connection request.
1341 *
1342 * If "isn" is not zero, this request hit alive
1343 * timewait bucket, so that all the necessary checks
1344 * are made in the function processing timewait state.
1345 */
1346 if (tmp_opt.saw_tstamp &&
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -07001347 tcp_death_row.sysctl_tw_recycle &&
David S. Millerbb5b7c12009-12-15 20:56:42 -08001348 (dst = inet_csk_route_req(sk, req)) != NULL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
David S. Miller7a71ed82011-02-09 14:30:26 -08001350 peer->daddr.addr.a4 == saddr) {
Eric Dumazet317fe0e2010-06-16 04:52:13 +00001351 inet_peer_refcheck(peer);
Eric Dumazet2c1409a2009-11-12 09:33:09 +00001352 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 (s32)(peer->tcp_ts - req->ts_recent) >
1354 TCP_PAWS_WINDOW) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001355 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001356 goto drop_and_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 }
1358 }
1359 /* Kill the following clause, if you dislike this way. */
1360 else if (!sysctl_tcp_syncookies &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001361 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 (sysctl_max_syn_backlog >> 2)) &&
1363 (!peer || !peer->tcp_ts_stamp) &&
1364 (!dst || !dst_metric(dst, RTAX_RTT))) {
1365 /* Without syncookies last quarter of
1366 * backlog is filled with destinations,
1367 * proven to be alive.
1368 * It means that we continue to communicate
1369 * to destinations, already remembered
1370 * to the moment of synflood.
1371 */
Harvey Harrison673d57e2008-10-31 00:53:57 -07001372 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1373 &saddr, ntohs(tcp_hdr(skb)->source));
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001374 goto drop_and_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 }
1376
Gerrit Renkera94f7232006-11-10 14:06:49 -08001377 isn = tcp_v4_init_sequence(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001379 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380
Octavian Purdila72659ec2010-01-17 19:09:39 -08001381 if (tcp_v4_send_synack(sk, dst, req,
1382 (struct request_values *)&tmp_ext) ||
William Allen Simpson4957faade2009-12-02 18:25:27 +00001383 want_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 goto drop_and_free;
1385
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001386 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387 return 0;
1388
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001389drop_and_release:
1390 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391drop_and_free:
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001392 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 return 0;
1395}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001396EXPORT_SYMBOL(tcp_v4_conn_request);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397
1398
1399/*
1400 * The three way handshake has completed - we got a valid synack -
1401 * now create the new socket.
1402 */
1403struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001404 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 struct dst_entry *dst)
1406{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001407 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 struct inet_sock *newinet;
1409 struct tcp_sock *newtp;
1410 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001411#ifdef CONFIG_TCP_MD5SIG
1412 struct tcp_md5sig_key *key;
1413#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415 if (sk_acceptq_is_full(sk))
1416 goto exit_overflow;
1417
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001418 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 goto exit;
1420
1421 newsk = tcp_create_openreq_child(sk, req, skb);
1422 if (!newsk)
Balazs Scheidler093d2822010-10-21 13:06:43 +02001423 goto exit_nonewsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424
Herbert Xubcd76112006-06-30 13:36:35 -07001425 newsk->sk_gso_type = SKB_GSO_TCPV4;
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -07001426 sk_setup_caps(newsk, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
1428 newtp = tcp_sk(newsk);
1429 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001430 ireq = inet_rsk(req);
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001431 newinet->inet_daddr = ireq->rmt_addr;
1432 newinet->inet_rcv_saddr = ireq->loc_addr;
1433 newinet->inet_saddr = ireq->loc_addr;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001434 newinet->opt = ireq->opt;
1435 ireq->opt = NULL;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001436 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001437 newinet->mc_ttl = ip_hdr(skb)->ttl;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001438 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 if (newinet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001440 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001441 newinet->inet_id = newtp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442
John Heffner5d424d52006-03-20 17:53:41 -08001443 tcp_mtup_init(newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 tcp_sync_mss(newsk, dst_mtu(dst));
David S. Miller0dbaee32010-12-13 12:52:14 -08001445 newtp->advmss = dst_metric_advmss(dst);
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001446 if (tcp_sk(sk)->rx_opt.user_mss &&
1447 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1448 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1449
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 tcp_initialize_rcv_mss(newsk);
1451
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001452#ifdef CONFIG_TCP_MD5SIG
1453 /* Copy over the MD5 key from the original socket */
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001454 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1455 if (key != NULL) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001456 /*
1457 * We're using one, so create a matching key
1458 * on the newsk structure. If we fail to get
1459 * memory, then we end up not copying the key
1460 * across. Shucks.
1461 */
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -02001462 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1463 if (newkey != NULL)
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001464 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001465 newkey, key->keylen);
Eric Dumazeta4654192010-05-16 00:36:33 -07001466 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001467 }
1468#endif
1469
Balazs Scheidler093d2822010-10-21 13:06:43 +02001470 if (__inet_inherit_port(sk, newsk) < 0) {
1471 sock_put(newsk);
1472 goto exit;
1473 }
Eric Dumazet9327f702009-12-04 03:46:54 +00001474 __inet_hash_nolisten(newsk, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475
1476 return newsk;
1477
1478exit_overflow:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001479 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Balazs Scheidler093d2822010-10-21 13:06:43 +02001480exit_nonewsk:
1481 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482exit:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001483 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 return NULL;
1485}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001486EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487
1488static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1489{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001490 struct tcphdr *th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001491 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 struct sock *nsk;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001493 struct request_sock **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001495 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1496 iph->saddr, iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 if (req)
1498 return tcp_check_req(sk, skb, req, prev);
1499
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001500 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001501 th->source, iph->daddr, th->dest, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502
1503 if (nsk) {
1504 if (nsk->sk_state != TCP_TIME_WAIT) {
1505 bh_lock_sock(nsk);
1506 return nsk;
1507 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001508 inet_twsk_put(inet_twsk(nsk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 return NULL;
1510 }
1511
1512#ifdef CONFIG_SYN_COOKIES
Florian Westphalaf9b4732010-06-03 00:43:44 +00001513 if (!th->syn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1515#endif
1516 return sk;
1517}
1518
Al Virob51655b2006-11-14 21:40:42 -08001519static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001521 const struct iphdr *iph = ip_hdr(skb);
1522
Patrick McHardy84fa7932006-08-29 16:44:56 -07001523 if (skb->ip_summed == CHECKSUM_COMPLETE) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001524 if (!tcp_v4_check(skb->len, iph->saddr,
1525 iph->daddr, skb->csum)) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001526 skb->ip_summed = CHECKSUM_UNNECESSARY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 return 0;
Herbert Xufb286bb2005-11-10 13:01:24 -08001528 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 }
Herbert Xufb286bb2005-11-10 13:01:24 -08001530
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001531 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
Herbert Xufb286bb2005-11-10 13:01:24 -08001532 skb->len, IPPROTO_TCP, 0);
1533
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 if (skb->len <= 76) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001535 return __skb_checksum_complete(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 }
1537 return 0;
1538}
1539
1540
1541/* The socket must have it's spinlock held when we get
1542 * here.
1543 *
1544 * We have a potential double-lock case here, so even when
1545 * doing backlog processing we use the BH locking scheme.
1546 * This is because we cannot sleep with the original spinlock
1547 * held.
1548 */
1549int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1550{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001551 struct sock *rsk;
1552#ifdef CONFIG_TCP_MD5SIG
1553 /*
1554 * We really want to reject the packet as early as possible
1555 * if:
1556 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1557 * o There is an MD5 option and we're not expecting one
1558 */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001559 if (tcp_v4_inbound_md5_hash(sk, skb))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001560 goto discard;
1561#endif
1562
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
Eric Dumazetca551582010-06-03 09:03:58 +00001564 sock_rps_save_rxhash(sk, skb->rxhash);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001565 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001566 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001568 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 return 0;
1570 }
1571
Arnaldo Carvalho de Meloab6a5bb2007-03-18 17:43:48 -07001572 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 goto csum_err;
1574
1575 if (sk->sk_state == TCP_LISTEN) {
1576 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1577 if (!nsk)
1578 goto discard;
1579
1580 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001581 if (tcp_child_process(sk, nsk, skb)) {
1582 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001584 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 return 0;
1586 }
Eric Dumazetca551582010-06-03 09:03:58 +00001587 } else
1588 sock_rps_save_rxhash(sk, skb->rxhash);
1589
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001590 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001591 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001593 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 return 0;
1595
1596reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001597 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598discard:
1599 kfree_skb(skb);
1600 /* Be careful here. If this function gets more complicated and
1601 * gcc suffers from register pressure on the x86, sk (in %ebx)
1602 * might be destroyed here. This current version compiles correctly,
1603 * but you have been warned.
1604 */
1605 return 0;
1606
1607csum_err:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001608 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 goto discard;
1610}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001611EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
1613/*
1614 * From tcp_input.c
1615 */
1616
1617int tcp_v4_rcv(struct sk_buff *skb)
1618{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001619 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 struct tcphdr *th;
1621 struct sock *sk;
1622 int ret;
Pavel Emelyanova86b1e32008-07-16 20:20:58 -07001623 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624
1625 if (skb->pkt_type != PACKET_HOST)
1626 goto discard_it;
1627
1628 /* Count it even if it's bad */
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001629 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630
1631 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1632 goto discard_it;
1633
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001634 th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
1636 if (th->doff < sizeof(struct tcphdr) / 4)
1637 goto bad_packet;
1638 if (!pskb_may_pull(skb, th->doff * 4))
1639 goto discard_it;
1640
1641 /* An explanation is required here, I think.
1642 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001643 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 * So, we defer the checks. */
Herbert Xu60476372007-04-09 11:59:39 -07001645 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 goto bad_packet;
1647
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001648 th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001649 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1651 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1652 skb->len - th->doff * 4);
1653 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1654 TCP_SKB_CB(skb)->when = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001655 TCP_SKB_CB(skb)->flags = iph->tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 TCP_SKB_CB(skb)->sacked = 0;
1657
Arnaldo Carvalho de Melo9a1f27c2008-10-07 11:41:57 -07001658 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 if (!sk)
1660 goto no_tcp_socket;
1661
Eric Dumazetbb134d52010-03-09 05:55:56 +00001662process:
1663 if (sk->sk_state == TCP_TIME_WAIT)
1664 goto do_time_wait;
1665
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001666 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1667 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001668 goto discard_and_relse;
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001669 }
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001670
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1672 goto discard_and_relse;
Patrick McHardyb59c2702006-01-06 23:06:10 -08001673 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001675 if (sk_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 goto discard_and_relse;
1677
1678 skb->dev = NULL;
1679
Ingo Molnarc6366182006-07-03 00:25:13 -07001680 bh_lock_sock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 ret = 0;
1682 if (!sock_owned_by_user(sk)) {
Chris Leech1a2449a2006-05-23 18:05:53 -07001683#ifdef CONFIG_NET_DMA
1684 struct tcp_sock *tp = tcp_sk(sk);
1685 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
Dan Williamsf67b4592009-01-06 11:38:15 -07001686 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
Chris Leech1a2449a2006-05-23 18:05:53 -07001687 if (tp->ucopy.dma_chan)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001689 else
1690#endif
1691 {
1692 if (!tcp_prequeue(sk, skb))
Shan Weiae8d7f82009-05-05 01:01:29 +00001693 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001694 }
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001695 } else if (unlikely(sk_add_backlog(sk, skb))) {
Zhu Yi6b03a532010-03-04 18:01:41 +00001696 bh_unlock_sock(sk);
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001697 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
Zhu Yi6b03a532010-03-04 18:01:41 +00001698 goto discard_and_relse;
1699 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 bh_unlock_sock(sk);
1701
1702 sock_put(sk);
1703
1704 return ret;
1705
1706no_tcp_socket:
1707 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1708 goto discard_it;
1709
1710 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1711bad_packet:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001712 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001714 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715 }
1716
1717discard_it:
1718 /* Discard frame. */
1719 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001720 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721
1722discard_and_relse:
1723 sock_put(sk);
1724 goto discard_it;
1725
1726do_time_wait:
1727 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001728 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 goto discard_it;
1730 }
1731
1732 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001733 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001734 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 goto discard_it;
1736 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001737 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001739 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001740 &tcp_hashinfo,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001741 iph->daddr, th->dest,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001742 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 if (sk2) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001744 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1745 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 sk = sk2;
1747 goto process;
1748 }
1749 /* Fall through to ACK */
1750 }
1751 case TCP_TW_ACK:
1752 tcp_v4_timewait_ack(sk, skb);
1753 break;
1754 case TCP_TW_RST:
1755 goto no_tcp_socket;
1756 case TCP_TW_SUCCESS:;
1757 }
1758 goto discard_it;
1759}
1760
David S. Miller3f419d22010-11-29 13:37:14 -08001761struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762{
David S. Miller3f419d22010-11-29 13:37:14 -08001763 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 struct inet_sock *inet = inet_sk(sk);
David S. Miller3f419d22010-11-29 13:37:14 -08001765 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001767 if (!rt || rt->rt_dst != inet->inet_daddr) {
David S. Millerb534ecf2010-11-30 11:54:19 -08001768 peer = inet_getpeer_v4(inet->inet_daddr, 1);
David S. Miller3f419d22010-11-29 13:37:14 -08001769 *release_it = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770 } else {
1771 if (!rt->peer)
1772 rt_bind_peer(rt, 1);
1773 peer = rt->peer;
David S. Miller3f419d22010-11-29 13:37:14 -08001774 *release_it = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 }
1776
David S. Miller3f419d22010-11-29 13:37:14 -08001777 return peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778}
David S. Miller3f419d22010-11-29 13:37:14 -08001779EXPORT_SYMBOL(tcp_v4_get_peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780
David S. Millerccb7c412010-12-01 18:09:13 -08001781void *tcp_v4_tw_get_peer(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782{
David S. Millerccb7c412010-12-01 18:09:13 -08001783 struct inet_timewait_sock *tw = inet_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784
David S. Millerccb7c412010-12-01 18:09:13 -08001785 return inet_getpeer_v4(tw->tw_daddr, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786}
David S. Millerccb7c412010-12-01 18:09:13 -08001787EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1788
1789static struct timewait_sock_ops tcp_timewait_sock_ops = {
1790 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1791 .twsk_unique = tcp_twsk_unique,
1792 .twsk_destructor= tcp_twsk_destructor,
1793 .twsk_getpeer = tcp_v4_tw_get_peer,
1794};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795
Stephen Hemminger3b401a82009-09-01 19:25:04 +00001796const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001797 .queue_xmit = ip_queue_xmit,
1798 .send_check = tcp_v4_send_check,
1799 .rebuild_header = inet_sk_rebuild_header,
1800 .conn_request = tcp_v4_conn_request,
1801 .syn_recv_sock = tcp_v4_syn_recv_sock,
David S. Miller3f419d22010-11-29 13:37:14 -08001802 .get_peer = tcp_v4_get_peer,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001803 .net_header_len = sizeof(struct iphdr),
1804 .setsockopt = ip_setsockopt,
1805 .getsockopt = ip_getsockopt,
1806 .addr2sockaddr = inet_csk_addr2sockaddr,
1807 .sockaddr_len = sizeof(struct sockaddr_in),
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001808 .bind_conflict = inet_csk_bind_conflict,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001809#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001810 .compat_setsockopt = compat_ip_setsockopt,
1811 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001812#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001814EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001816#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001817static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001818 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07001819 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001820 .md5_add = tcp_v4_md5_add_func,
1821 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001822};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001823#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001824
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825/* NOTE: A lot of things set to zero explicitly by call to
1826 * sk_alloc() so need not be done here.
1827 */
1828static int tcp_v4_init_sock(struct sock *sk)
1829{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001830 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 struct tcp_sock *tp = tcp_sk(sk);
1832
1833 skb_queue_head_init(&tp->out_of_order_queue);
1834 tcp_init_xmit_timers(sk);
1835 tcp_prequeue_init(tp);
1836
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001837 icsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838 tp->mdev = TCP_TIMEOUT_INIT;
1839
1840 /* So many TCP implementations out there (incorrectly) count the
1841 * initial SYN frame in their delayed-ACK and congestion control
1842 * algorithms that we must have the following bandaid to talk
1843 * efficiently to them. -DaveM
1844 */
1845 tp->snd_cwnd = 2;
1846
1847 /* See draft-stevens-tcpca-spec-01 for discussion of the
1848 * initialization of these values.
1849 */
Ilpo Järvinen0b6a05c2009-09-15 01:30:10 -07001850 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 tp->snd_cwnd_clamp = ~0;
William Allen Simpsonbee7ca92009-11-10 09:51:18 +00001852 tp->mss_cache = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853
1854 tp->reordering = sysctl_tcp_reordering;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001855 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856
1857 sk->sk_state = TCP_CLOSE;
1858
1859 sk->sk_write_space = sk_stream_write_space;
1860 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1861
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001862 icsk->icsk_af_ops = &ipv4_specific;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001863 icsk->icsk_sync_mss = tcp_sync_mss;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001864#ifdef CONFIG_TCP_MD5SIG
1865 tp->af_specific = &tcp_sock_ipv4_specific;
1866#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
William Allen Simpson435cf552009-12-02 18:17:05 +00001868 /* TCP Cookie Transactions */
1869 if (sysctl_tcp_cookie_size > 0) {
1870 /* Default, cookies without s_data_payload. */
1871 tp->cookie_values =
1872 kzalloc(sizeof(*tp->cookie_values),
1873 sk->sk_allocation);
1874 if (tp->cookie_values != NULL)
1875 kref_init(&tp->cookie_values->kref);
1876 }
1877 /* Presumed zeroed, in order of appearance:
1878 * cookie_in_always, cookie_out_never,
1879 * s_data_constant, s_data_in, s_data_out
1880 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1882 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1883
Herbert Xueb4dea52008-12-29 23:04:08 -08001884 local_bh_disable();
Eric Dumazet17483762008-11-25 21:16:35 -08001885 percpu_counter_inc(&tcp_sockets_allocated);
Herbert Xueb4dea52008-12-29 23:04:08 -08001886 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887
1888 return 0;
1889}
1890
Brian Haley7d06b2e2008-06-14 17:04:49 -07001891void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892{
1893 struct tcp_sock *tp = tcp_sk(sk);
1894
1895 tcp_clear_xmit_timers(sk);
1896
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001897 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07001898
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08001900 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901
1902 /* Cleans up our, hopefully empty, out_of_order_queue. */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001903 __skb_queue_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001905#ifdef CONFIG_TCP_MD5SIG
1906 /* Clean up the MD5 key list, if any */
1907 if (tp->md5sig_info) {
1908 tcp_v4_clear_md5_list(sk);
1909 kfree(tp->md5sig_info);
1910 tp->md5sig_info = NULL;
1911 }
1912#endif
1913
Chris Leech1a2449a2006-05-23 18:05:53 -07001914#ifdef CONFIG_NET_DMA
1915 /* Cleans up our sk_async_wait_queue */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001916 __skb_queue_purge(&sk->sk_async_wait_queue);
Chris Leech1a2449a2006-05-23 18:05:53 -07001917#endif
1918
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 /* Clean prequeue, it must be empty really */
1920 __skb_queue_purge(&tp->ucopy.prequeue);
1921
1922 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001923 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001924 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925
1926 /*
1927 * If sendmsg cached page exists, toss it.
1928 */
1929 if (sk->sk_sndmsg_page) {
1930 __free_page(sk->sk_sndmsg_page);
1931 sk->sk_sndmsg_page = NULL;
1932 }
1933
William Allen Simpson435cf552009-12-02 18:17:05 +00001934 /* TCP Cookie Transactions */
1935 if (tp->cookie_values != NULL) {
1936 kref_put(&tp->cookie_values->kref,
1937 tcp_cookie_values_release);
1938 tp->cookie_values = NULL;
1939 }
1940
Eric Dumazet17483762008-11-25 21:16:35 -08001941 percpu_counter_dec(&tcp_sockets_allocated);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943EXPORT_SYMBOL(tcp_v4_destroy_sock);
1944
1945#ifdef CONFIG_PROC_FS
1946/* Proc filesystem TCP sock list dumping. */
1947
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001948static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001950 return hlist_nulls_empty(head) ? NULL :
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001951 list_entry(head->first, struct inet_timewait_sock, tw_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952}
1953
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001954static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001956 return !is_a_nulls(tw->tw_node.next) ?
1957 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958}
1959
Tom Herberta8b690f2010-06-07 00:43:42 -07001960/*
1961 * Get next listener socket follow cur. If cur is NULL, get first socket
1962 * starting from bucket given in st->bucket; when st->bucket is zero the
1963 * very first socket in the hash table is returned.
1964 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965static void *listening_get_next(struct seq_file *seq, void *cur)
1966{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001967 struct inet_connection_sock *icsk;
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001968 struct hlist_nulls_node *node;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 struct sock *sk = cur;
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001970 struct inet_listen_hashbucket *ilb;
Jianjun Kong5799de02008-11-03 02:49:10 -08001971 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001972 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973
1974 if (!sk) {
Tom Herberta8b690f2010-06-07 00:43:42 -07001975 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001976 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001977 sk = sk_nulls_head(&ilb->head);
Tom Herberta8b690f2010-06-07 00:43:42 -07001978 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 goto get_sk;
1980 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001981 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07001983 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984
1985 if (st->state == TCP_SEQ_STATE_OPENREQ) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001986 struct request_sock *req = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001988 icsk = inet_csk(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989 req = req->dl_next;
1990 while (1) {
1991 while (req) {
Daniel Lezcanobdccc4c2008-07-19 00:15:13 -07001992 if (req->rsk_ops->family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993 cur = req;
1994 goto out;
1995 }
1996 req = req->dl_next;
1997 }
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001998 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 break;
2000get_req:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002001 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 }
Eric Dumazet1bde5ac2010-12-23 09:32:46 -08002003 sk = sk_nulls_next(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 st->state = TCP_SEQ_STATE_LISTENING;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002005 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 } else {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002007 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002008 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2009 if (reqsk_queue_len(&icsk->icsk_accept_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 goto start_req;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002011 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Eric Dumazet1bde5ac2010-12-23 09:32:46 -08002012 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 }
2014get_sk:
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08002015 sk_nulls_for_each_from(sk, node) {
Pavel Emelyanov8475ef92010-11-22 03:26:12 +00002016 if (!net_eq(sock_net(sk), net))
2017 continue;
2018 if (sk->sk_family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 cur = sk;
2020 goto out;
2021 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002022 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002023 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2024 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025start_req:
2026 st->uid = sock_i_uid(sk);
2027 st->syn_wait_sk = sk;
2028 st->state = TCP_SEQ_STATE_OPENREQ;
2029 st->sbucket = 0;
2030 goto get_req;
2031 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002032 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002034 spin_unlock_bh(&ilb->lock);
Tom Herberta8b690f2010-06-07 00:43:42 -07002035 st->offset = 0;
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07002036 if (++st->bucket < INET_LHTABLE_SIZE) {
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002037 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2038 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08002039 sk = sk_nulls_head(&ilb->head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 goto get_sk;
2041 }
2042 cur = NULL;
2043out:
2044 return cur;
2045}
2046
2047static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2048{
Tom Herberta8b690f2010-06-07 00:43:42 -07002049 struct tcp_iter_state *st = seq->private;
2050 void *rc;
2051
2052 st->bucket = 0;
2053 st->offset = 0;
2054 rc = listening_get_next(seq, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055
2056 while (rc && *pos) {
2057 rc = listening_get_next(seq, rc);
2058 --*pos;
2059 }
2060 return rc;
2061}
2062
Andi Kleen6eac5602008-08-28 01:08:02 -07002063static inline int empty_bucket(struct tcp_iter_state *st)
2064{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002065 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2066 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
Andi Kleen6eac5602008-08-28 01:08:02 -07002067}
2068
Tom Herberta8b690f2010-06-07 00:43:42 -07002069/*
2070 * Get first established socket starting from bucket given in st->bucket.
2071 * If st->bucket is zero, the very first socket in the hash is returned.
2072 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073static void *established_get_first(struct seq_file *seq)
2074{
Jianjun Kong5799de02008-11-03 02:49:10 -08002075 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002076 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077 void *rc = NULL;
2078
Tom Herberta8b690f2010-06-07 00:43:42 -07002079 st->offset = 0;
2080 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002082 struct hlist_nulls_node *node;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002083 struct inet_timewait_sock *tw;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002084 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085
Andi Kleen6eac5602008-08-28 01:08:02 -07002086 /* Lockless fast path for the common case of empty buckets */
2087 if (empty_bucket(st))
2088 continue;
2089
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002090 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002091 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002092 if (sk->sk_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002093 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094 continue;
2095 }
2096 rc = sk;
2097 goto out;
2098 }
2099 st->state = TCP_SEQ_STATE_TIME_WAIT;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002100 inet_twsk_for_each(tw, node,
Eric Dumazetdbca9b2752007-02-08 14:16:46 -08002101 &tcp_hashinfo.ehash[st->bucket].twchain) {
Pavel Emelyanov28518fc2008-03-21 15:52:00 -07002102 if (tw->tw_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002103 !net_eq(twsk_net(tw), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 continue;
2105 }
2106 rc = tw;
2107 goto out;
2108 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002109 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 st->state = TCP_SEQ_STATE_ESTABLISHED;
2111 }
2112out:
2113 return rc;
2114}
2115
2116static void *established_get_next(struct seq_file *seq, void *cur)
2117{
2118 struct sock *sk = cur;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002119 struct inet_timewait_sock *tw;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002120 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002121 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002122 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123
2124 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002125 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126
2127 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2128 tw = cur;
2129 tw = tw_next(tw);
2130get_tw:
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002131 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 tw = tw_next(tw);
2133 }
2134 if (tw) {
2135 cur = tw;
2136 goto out;
2137 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002138 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 st->state = TCP_SEQ_STATE_ESTABLISHED;
2140
Andi Kleen6eac5602008-08-28 01:08:02 -07002141 /* Look for next non empty bucket */
Tom Herberta8b690f2010-06-07 00:43:42 -07002142 st->offset = 0;
Eric Dumazetf373b532009-10-09 00:16:19 +00002143 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
Andi Kleen6eac5602008-08-28 01:08:02 -07002144 empty_bucket(st))
2145 ;
Eric Dumazetf373b532009-10-09 00:16:19 +00002146 if (st->bucket > tcp_hashinfo.ehash_mask)
Andi Kleen6eac5602008-08-28 01:08:02 -07002147 return NULL;
2148
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002149 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002150 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 } else
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002152 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002154 sk_nulls_for_each_from(sk, node) {
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002155 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 goto found;
2157 }
2158
2159 st->state = TCP_SEQ_STATE_TIME_WAIT;
Eric Dumazetdbca9b2752007-02-08 14:16:46 -08002160 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 goto get_tw;
2162found:
2163 cur = sk;
2164out:
2165 return cur;
2166}
2167
2168static void *established_get_idx(struct seq_file *seq, loff_t pos)
2169{
Tom Herberta8b690f2010-06-07 00:43:42 -07002170 struct tcp_iter_state *st = seq->private;
2171 void *rc;
2172
2173 st->bucket = 0;
2174 rc = established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175
2176 while (rc && pos) {
2177 rc = established_get_next(seq, rc);
2178 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002179 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 return rc;
2181}
2182
2183static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2184{
2185 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002186 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 st->state = TCP_SEQ_STATE_LISTENING;
2189 rc = listening_get_idx(seq, &pos);
2190
2191 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 st->state = TCP_SEQ_STATE_ESTABLISHED;
2193 rc = established_get_idx(seq, pos);
2194 }
2195
2196 return rc;
2197}
2198
Tom Herberta8b690f2010-06-07 00:43:42 -07002199static void *tcp_seek_last_pos(struct seq_file *seq)
2200{
2201 struct tcp_iter_state *st = seq->private;
2202 int offset = st->offset;
2203 int orig_num = st->num;
2204 void *rc = NULL;
2205
2206 switch (st->state) {
2207 case TCP_SEQ_STATE_OPENREQ:
2208 case TCP_SEQ_STATE_LISTENING:
2209 if (st->bucket >= INET_LHTABLE_SIZE)
2210 break;
2211 st->state = TCP_SEQ_STATE_LISTENING;
2212 rc = listening_get_next(seq, NULL);
2213 while (offset-- && rc)
2214 rc = listening_get_next(seq, rc);
2215 if (rc)
2216 break;
2217 st->bucket = 0;
2218 /* Fallthrough */
2219 case TCP_SEQ_STATE_ESTABLISHED:
2220 case TCP_SEQ_STATE_TIME_WAIT:
2221 st->state = TCP_SEQ_STATE_ESTABLISHED;
2222 if (st->bucket > tcp_hashinfo.ehash_mask)
2223 break;
2224 rc = established_get_first(seq);
2225 while (offset-- && rc)
2226 rc = established_get_next(seq, rc);
2227 }
2228
2229 st->num = orig_num;
2230
2231 return rc;
2232}
2233
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2235{
Jianjun Kong5799de02008-11-03 02:49:10 -08002236 struct tcp_iter_state *st = seq->private;
Tom Herberta8b690f2010-06-07 00:43:42 -07002237 void *rc;
2238
2239 if (*pos && *pos == st->last_pos) {
2240 rc = tcp_seek_last_pos(seq);
2241 if (rc)
2242 goto out;
2243 }
2244
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 st->state = TCP_SEQ_STATE_LISTENING;
2246 st->num = 0;
Tom Herberta8b690f2010-06-07 00:43:42 -07002247 st->bucket = 0;
2248 st->offset = 0;
2249 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2250
2251out:
2252 st->last_pos = *pos;
2253 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254}
2255
2256static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2257{
Tom Herberta8b690f2010-06-07 00:43:42 -07002258 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 void *rc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260
2261 if (v == SEQ_START_TOKEN) {
2262 rc = tcp_get_idx(seq, 0);
2263 goto out;
2264 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265
2266 switch (st->state) {
2267 case TCP_SEQ_STATE_OPENREQ:
2268 case TCP_SEQ_STATE_LISTENING:
2269 rc = listening_get_next(seq, v);
2270 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002272 st->bucket = 0;
2273 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 rc = established_get_first(seq);
2275 }
2276 break;
2277 case TCP_SEQ_STATE_ESTABLISHED:
2278 case TCP_SEQ_STATE_TIME_WAIT:
2279 rc = established_get_next(seq, v);
2280 break;
2281 }
2282out:
2283 ++*pos;
Tom Herberta8b690f2010-06-07 00:43:42 -07002284 st->last_pos = *pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 return rc;
2286}
2287
2288static void tcp_seq_stop(struct seq_file *seq, void *v)
2289{
Jianjun Kong5799de02008-11-03 02:49:10 -08002290 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291
2292 switch (st->state) {
2293 case TCP_SEQ_STATE_OPENREQ:
2294 if (v) {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002295 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2296 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 }
2298 case TCP_SEQ_STATE_LISTENING:
2299 if (v != SEQ_START_TOKEN)
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002300 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301 break;
2302 case TCP_SEQ_STATE_TIME_WAIT:
2303 case TCP_SEQ_STATE_ESTABLISHED:
2304 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002305 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 break;
2307 }
2308}
2309
2310static int tcp_seq_open(struct inode *inode, struct file *file)
2311{
2312 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313 struct tcp_iter_state *s;
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002314 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002316 err = seq_open_net(inode, file, &afinfo->seq_ops,
2317 sizeof(struct tcp_iter_state));
2318 if (err < 0)
2319 return err;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002320
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002321 s = ((struct seq_file *)file->private_data)->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 s->family = afinfo->family;
Tom Herberta8b690f2010-06-07 00:43:42 -07002323 s->last_pos = 0;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002324 return 0;
2325}
2326
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002327int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328{
2329 int rc = 0;
2330 struct proc_dir_entry *p;
2331
Denis V. Lunev68fcadd2008-04-13 22:13:30 -07002332 afinfo->seq_fops.open = tcp_seq_open;
2333 afinfo->seq_fops.read = seq_read;
2334 afinfo->seq_fops.llseek = seq_lseek;
2335 afinfo->seq_fops.release = seq_release_net;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002336
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002337 afinfo->seq_ops.start = tcp_seq_start;
2338 afinfo->seq_ops.next = tcp_seq_next;
2339 afinfo->seq_ops.stop = tcp_seq_stop;
2340
Denis V. Lunev84841c32008-05-02 04:10:08 -07002341 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2342 &afinfo->seq_fops, afinfo);
2343 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344 rc = -ENOMEM;
2345 return rc;
2346}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002347EXPORT_SYMBOL(tcp_proc_register);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002349void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350{
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002351 proc_net_remove(net, afinfo->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002353EXPORT_SYMBOL(tcp_proc_unregister);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002355static void get_openreq4(struct sock *sk, struct request_sock *req,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002356 struct seq_file *f, int i, int uid, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002358 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 int ttd = req->expires - jiffies;
2360
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002361 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2362 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 i,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002364 ireq->loc_addr,
Eric Dumazetc720c7e2009-10-15 06:30:45 +00002365 ntohs(inet_sk(sk)->inet_sport),
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002366 ireq->rmt_addr,
2367 ntohs(ireq->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368 TCP_SYN_RECV,
2369 0, 0, /* could print option size, but that is af dependent. */
2370 1, /* timers active (only the expire timer) */
2371 jiffies_to_clock_t(ttd),
2372 req->retrans,
2373 uid,
2374 0, /* non standard timer */
2375 0, /* open_requests have no inode */
2376 atomic_read(&sk->sk_refcnt),
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002377 req,
2378 len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379}
2380
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002381static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382{
2383 int timer_active;
2384 unsigned long timer_expires;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002385 struct tcp_sock *tp = tcp_sk(sk);
2386 const struct inet_connection_sock *icsk = inet_csk(sk);
2387 struct inet_sock *inet = inet_sk(sk);
Eric Dumazetc720c7e2009-10-15 06:30:45 +00002388 __be32 dest = inet->inet_daddr;
2389 __be32 src = inet->inet_rcv_saddr;
2390 __u16 destp = ntohs(inet->inet_dport);
2391 __u16 srcp = ntohs(inet->inet_sport);
Eric Dumazet49d09002009-12-03 16:06:13 -08002392 int rx_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002394 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002396 timer_expires = icsk->icsk_timeout;
2397 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002399 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002400 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002402 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 } else {
2404 timer_active = 0;
2405 timer_expires = jiffies;
2406 }
2407
Eric Dumazet49d09002009-12-03 16:06:13 -08002408 if (sk->sk_state == TCP_LISTEN)
2409 rx_queue = sk->sk_ack_backlog;
2410 else
2411 /*
2412 * because we dont lock socket, we might find a transient negative value
2413 */
2414 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2415
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002416 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Stephen Hemminger7be87352008-06-27 20:00:19 -07002417 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002418 i, src, srcp, dest, destp, sk->sk_state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002419 tp->write_seq - tp->snd_una,
Eric Dumazet49d09002009-12-03 16:06:13 -08002420 rx_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 timer_active,
2422 jiffies_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002423 icsk->icsk_retransmits,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002424 sock_i_uid(sk),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002425 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002426 sock_i_ino(sk),
2427 atomic_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002428 jiffies_to_clock_t(icsk->icsk_rto),
2429 jiffies_to_clock_t(icsk->icsk_ack.ato),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002430 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 tp->snd_cwnd,
Ilpo Järvinen0b6a05c2009-09-15 01:30:10 -07002432 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002433 len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434}
2435
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002436static void get_timewait4_sock(struct inet_timewait_sock *tw,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002437 struct seq_file *f, int i, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438{
Al Viro23f33c22006-09-27 18:43:50 -07002439 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 __u16 destp, srcp;
2441 int ttd = tw->tw_ttd - jiffies;
2442
2443 if (ttd < 0)
2444 ttd = 0;
2445
2446 dest = tw->tw_daddr;
2447 src = tw->tw_rcv_saddr;
2448 destp = ntohs(tw->tw_dport);
2449 srcp = ntohs(tw->tw_sport);
2450
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002451 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2452 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2454 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002455 atomic_read(&tw->tw_refcnt), tw, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456}
2457
2458#define TMPSZ 150
2459
2460static int tcp4_seq_show(struct seq_file *seq, void *v)
2461{
Jianjun Kong5799de02008-11-03 02:49:10 -08002462 struct tcp_iter_state *st;
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002463 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464
2465 if (v == SEQ_START_TOKEN) {
2466 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2467 " sl local_address rem_address st tx_queue "
2468 "rx_queue tr tm->when retrnsmt uid timeout "
2469 "inode");
2470 goto out;
2471 }
2472 st = seq->private;
2473
2474 switch (st->state) {
2475 case TCP_SEQ_STATE_LISTENING:
2476 case TCP_SEQ_STATE_ESTABLISHED:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002477 get_tcp4_sock(v, seq, st->num, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 break;
2479 case TCP_SEQ_STATE_OPENREQ:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002480 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481 break;
2482 case TCP_SEQ_STATE_TIME_WAIT:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002483 get_timewait4_sock(v, seq, st->num, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 break;
2485 }
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002486 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487out:
2488 return 0;
2489}
2490
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 .name = "tcp",
2493 .family = AF_INET,
Denis V. Lunev5f4472c2008-04-13 22:13:53 -07002494 .seq_fops = {
2495 .owner = THIS_MODULE,
2496 },
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002497 .seq_ops = {
2498 .show = tcp4_seq_show,
2499 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500};
2501
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002502static int __net_init tcp4_proc_init_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002503{
2504 return tcp_proc_register(net, &tcp4_seq_afinfo);
2505}
2506
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002507static void __net_exit tcp4_proc_exit_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002508{
2509 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2510}
2511
2512static struct pernet_operations tcp4_net_ops = {
2513 .init = tcp4_proc_init_net,
2514 .exit = tcp4_proc_exit_net,
2515};
2516
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517int __init tcp4_proc_init(void)
2518{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002519 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520}
2521
2522void tcp4_proc_exit(void)
2523{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002524 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525}
2526#endif /* CONFIG_PROC_FS */
2527
Herbert Xubf296b12008-12-15 23:43:36 -08002528struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2529{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002530 const struct iphdr *iph = skb_gro_network_header(skb);
Herbert Xubf296b12008-12-15 23:43:36 -08002531
2532 switch (skb->ip_summed) {
2533 case CHECKSUM_COMPLETE:
Herbert Xu86911732009-01-29 14:19:50 +00002534 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
Herbert Xubf296b12008-12-15 23:43:36 -08002535 skb->csum)) {
2536 skb->ip_summed = CHECKSUM_UNNECESSARY;
2537 break;
2538 }
2539
2540 /* fall through */
2541 case CHECKSUM_NONE:
2542 NAPI_GRO_CB(skb)->flush = 1;
2543 return NULL;
2544 }
2545
2546 return tcp_gro_receive(head, skb);
2547}
Herbert Xubf296b12008-12-15 23:43:36 -08002548
2549int tcp4_gro_complete(struct sk_buff *skb)
2550{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002551 const struct iphdr *iph = ip_hdr(skb);
Herbert Xubf296b12008-12-15 23:43:36 -08002552 struct tcphdr *th = tcp_hdr(skb);
2553
2554 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2555 iph->saddr, iph->daddr, 0);
2556 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2557
2558 return tcp_gro_complete(skb);
2559}
Herbert Xubf296b12008-12-15 23:43:36 -08002560
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561struct proto tcp_prot = {
2562 .name = "TCP",
2563 .owner = THIS_MODULE,
2564 .close = tcp_close,
2565 .connect = tcp_v4_connect,
2566 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002567 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 .ioctl = tcp_ioctl,
2569 .init = tcp_v4_init_sock,
2570 .destroy = tcp_v4_destroy_sock,
2571 .shutdown = tcp_shutdown,
2572 .setsockopt = tcp_setsockopt,
2573 .getsockopt = tcp_getsockopt,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 .recvmsg = tcp_recvmsg,
Changli Gao7ba42912010-07-10 20:41:55 +00002575 .sendmsg = tcp_sendmsg,
2576 .sendpage = tcp_sendpage,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577 .backlog_rcv = tcp_v4_do_rcv,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002578 .hash = inet_hash,
2579 .unhash = inet_unhash,
2580 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 .enter_memory_pressure = tcp_enter_memory_pressure,
2582 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002583 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 .memory_allocated = &tcp_memory_allocated,
2585 .memory_pressure = &tcp_memory_pressure,
2586 .sysctl_mem = sysctl_tcp_mem,
2587 .sysctl_wmem = sysctl_tcp_wmem,
2588 .sysctl_rmem = sysctl_tcp_rmem,
2589 .max_header = MAX_TCP_HEADER,
2590 .obj_size = sizeof(struct tcp_sock),
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002591 .slab_flags = SLAB_DESTROY_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002592 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002593 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002594 .h.hashinfo = &tcp_hashinfo,
Changli Gao7ba42912010-07-10 20:41:55 +00002595 .no_autobind = true,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002596#ifdef CONFIG_COMPAT
2597 .compat_setsockopt = compat_tcp_setsockopt,
2598 .compat_getsockopt = compat_tcp_getsockopt,
2599#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002601EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602
Denis V. Lunev046ee902008-04-03 14:31:33 -07002603
2604static int __net_init tcp_sk_init(struct net *net)
2605{
2606 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2607 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2608}
2609
2610static void __net_exit tcp_sk_exit(struct net *net)
2611{
2612 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002613}
2614
2615static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2616{
2617 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002618}
2619
2620static struct pernet_operations __net_initdata tcp_sk_ops = {
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002621 .init = tcp_sk_init,
2622 .exit = tcp_sk_exit,
2623 .exit_batch = tcp_sk_exit_batch,
Denis V. Lunev046ee902008-04-03 14:31:33 -07002624};
2625
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002626void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627{
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002628 inet_hashinfo_init(&tcp_hashinfo);
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002629 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631}