blob: 27a0cc8cc88852d492375e0d86f97815fb0d2050 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070037 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080040 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
Herbert Xueb4dea52008-12-29 23:04:08 -080054#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090063#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020065#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070067#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030069#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070070#include <net/ipv6.h>
71#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080072#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070073#include <net/xfrm.h>
Chris Leech1a2449a2006-05-23 18:05:53 -070074#include <net/netdma.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76#include <linux/inet.h>
77#include <linux/ipv6.h>
78#include <linux/stddef.h>
79#include <linux/proc_fs.h>
80#include <linux/seq_file.h>
81
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080082#include <linux/crypto.h>
83#include <linux/scatterlist.h>
84
Brian Haleyab32ea52006-09-22 14:15:41 -070085int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly;
Eric Dumazet4bc2f182010-07-09 21:22:10 +000087EXPORT_SYMBOL(sysctl_tcp_low_latency);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080090#ifdef CONFIG_TCP_MD5SIG
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -020091static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
92 __be32 addr);
Adam Langley49a72df2008-07-19 00:01:42 -070093static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
94 __be32 daddr, __be32 saddr, struct tcphdr *th);
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +090095#else
96static inline
97struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
98{
99 return NULL;
100}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800101#endif
102
Eric Dumazet5caea4e2008-11-20 00:40:07 -0800103struct inet_hashinfo tcp_hashinfo;
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000104EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105
Gerrit Renkera94f7232006-11-10 14:06:49 -0800106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700108 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
109 ip_hdr(skb)->saddr,
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700110 tcp_hdr(skb)->dest,
111 tcp_hdr(skb)->source);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112}
113
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800114int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
115{
116 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
117 struct tcp_sock *tp = tcp_sk(sk);
118
119 /* With PAWS, it is safe from the viewpoint
120 of data integrity. Even without PAWS it is safe provided sequence
121 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
122
123 Actually, the idea is close to VJ's one, only timestamp cache is
124 held not per host, but per port pair and TW bucket is used as state
125 holder.
126
127 If TW bucket has been already destroyed we fall back to VJ's scheme
128 and use initial timestamp retrieved from peer table.
129 */
130 if (tcptw->tw_ts_recent_stamp &&
131 (twp == NULL || (sysctl_tcp_tw_reuse &&
James Morris9d729f72007-03-04 16:12:44 -0800132 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800133 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
134 if (tp->write_seq == 0)
135 tp->write_seq = 1;
136 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
137 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
138 sock_hold(sktw);
139 return 1;
140 }
141
142 return 0;
143}
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
145
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146/* This will initiate an outgoing connection. */
147int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
148{
149 struct inet_sock *inet = inet_sk(sk);
150 struct tcp_sock *tp = tcp_sk(sk);
151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
David S. Millerdca8b082011-02-24 13:38:12 -0800152 __be16 orig_sport, orig_dport;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 struct rtable *rt;
Al Virobada8ad2006-09-26 21:27:15 -0700154 __be32 daddr, nexthop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 int tmp;
156 int err;
157
158 if (addr_len < sizeof(struct sockaddr_in))
159 return -EINVAL;
160
161 if (usin->sin_family != AF_INET)
162 return -EAFNOSUPPORT;
163
164 nexthop = daddr = usin->sin_addr.s_addr;
165 if (inet->opt && inet->opt->srr) {
166 if (!daddr)
167 return -EINVAL;
168 nexthop = inet->opt->faddr;
169 }
170
David S. Millerdca8b082011-02-24 13:38:12 -0800171 orig_sport = inet->inet_sport;
172 orig_dport = usin->sin_port;
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000173 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
David S. Millerdca8b082011-02-24 13:38:12 -0800176 orig_sport, orig_dport, sk, 1);
Wei Dong584bdf82007-05-31 22:49:28 -0700177 if (tmp < 0) {
178 if (tmp == -ENETUNREACH)
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -0700179 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 return tmp;
Wei Dong584bdf82007-05-31 22:49:28 -0700181 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
184 ip_rt_put(rt);
185 return -ENETUNREACH;
186 }
187
188 if (!inet->opt || !inet->opt->srr)
189 daddr = rt->rt_dst;
190
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000191 if (!inet->inet_saddr)
192 inet->inet_saddr = rt->rt_src;
193 inet->inet_rcv_saddr = inet->inet_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000195 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 /* Reset inherited state */
197 tp->rx_opt.ts_recent = 0;
198 tp->rx_opt.ts_recent_stamp = 0;
199 tp->write_seq = 0;
200 }
201
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700202 if (tcp_death_row.sysctl_tw_recycle &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
204 struct inet_peer *peer = rt_get_peer(rt);
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200205 /*
206 * VJ's idea. We save last timestamp seen from
207 * the destination in peer table, when entering state
208 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
209 * when trying new connection.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 */
Eric Dumazet317fe0e2010-06-16 04:52:13 +0000211 if (peer) {
212 inet_peer_refcheck(peer);
213 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
214 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
215 tp->rx_opt.ts_recent = peer->tcp_ts;
216 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 }
218 }
219
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000220 inet->inet_dport = usin->sin_port;
221 inet->inet_daddr = daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800223 inet_csk(sk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224 if (inet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800225 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226
William Allen Simpsonbee7ca92009-11-10 09:51:18 +0000227 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228
229 /* Socket identity is still unknown (sport may be zero).
230 * However we set state to SYN-SENT and not releasing socket
231 * lock select source port, enter ourselves into the hash tables and
232 * complete initialization after this.
233 */
234 tcp_set_state(sk, TCP_SYN_SENT);
Arnaldo Carvalho de Meloa7f5e7f2005-12-13 23:25:31 -0800235 err = inet_hash_connect(&tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 if (err)
237 goto failure;
238
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200239 err = ip_route_newports(&rt, IPPROTO_TCP,
David S. Millerdca8b082011-02-24 13:38:12 -0800240 orig_sport, orig_dport,
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000241 inet->inet_sport, inet->inet_dport, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 if (err)
243 goto failure;
244
245 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700246 sk->sk_gso_type = SKB_GSO_TCPV4;
Changli Gaod8d1f302010-06-10 23:31:35 -0700247 sk_setup_caps(sk, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248
249 if (!tp->write_seq)
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000250 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
251 inet->inet_daddr,
252 inet->inet_sport,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 usin->sin_port);
254
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000255 inet->inet_id = tp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257 err = tcp_connect(sk);
258 rt = NULL;
259 if (err)
260 goto failure;
261
262 return 0;
263
264failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200265 /*
266 * This unhashes the socket and releases the local port,
267 * if necessary.
268 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 tcp_set_state(sk, TCP_CLOSE);
270 ip_rt_put(rt);
271 sk->sk_route_caps = 0;
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000272 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 return err;
274}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000275EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277/*
278 * This routine does path mtu discovery as defined in RFC1191.
279 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800280static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281{
282 struct dst_entry *dst;
283 struct inet_sock *inet = inet_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
285 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
286 * send out by Linux are always <576bytes so they should go through
287 * unfragmented).
288 */
289 if (sk->sk_state == TCP_LISTEN)
290 return;
291
292 /* We don't check in the destentry if pmtu discovery is forbidden
293 * on this route. We just assume that no packet_to_big packets
294 * are send back when pmtu discovery is not active.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900295 * There is a small race when the user changes this flag in the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 * route, but I think that's acceptable.
297 */
298 if ((dst = __sk_dst_check(sk, 0)) == NULL)
299 return;
300
301 dst->ops->update_pmtu(dst, mtu);
302
303 /* Something is about to be wrong... Remember soft error
304 * for the case, if this connection will not able to recover.
305 */
306 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
307 sk->sk_err_soft = EMSGSIZE;
308
309 mtu = dst_mtu(dst);
310
311 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800312 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 tcp_sync_mss(sk, mtu);
314
315 /* Resend the TCP packet because it's
316 * clear that the old packet has been
317 * dropped. This is the new "fast" path mtu
318 * discovery.
319 */
320 tcp_simple_retransmit(sk);
321 } /* else let the usual retransmit timer handle it */
322}
323
324/*
325 * This routine is called by the ICMP module when it gets some
326 * sort of error condition. If err < 0 then the socket should
327 * be closed and the error returned to the user. If err > 0
328 * it's just the icmp type << 8 | icmp code. After adjustment
329 * header points to the first 8 bytes of the tcp header. We need
330 * to find the appropriate port.
331 *
332 * The locking strategy used here is very "optimistic". When
333 * someone else accesses the socket the ICMP is just dropped
334 * and for some paths there is no check at all.
335 * A more general error queue to queue errors for later handling
336 * is probably better.
337 *
338 */
339
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000340void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341{
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000342 struct iphdr *iph = (struct iphdr *)icmp_skb->data;
343 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000344 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 struct tcp_sock *tp;
346 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000347 const int type = icmp_hdr(icmp_skb)->type;
348 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000350 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 __u32 seq;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000352 __u32 remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000354 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000356 if (icmp_skb->len < (iph->ihl << 2) + 8) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700357 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 return;
359 }
360
Pavel Emelyanovfd54d712008-07-14 23:01:40 -0700361 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000362 iph->saddr, th->source, inet_iif(icmp_skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 if (!sk) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700364 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 return;
366 }
367 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700368 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 return;
370 }
371
372 bh_lock_sock(sk);
373 /* If too many ICMPs get dropped on busy
374 * servers this needs to be solved differently.
375 */
376 if (sock_owned_by_user(sk))
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700377 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
379 if (sk->sk_state == TCP_CLOSE)
380 goto out;
381
stephen hemminger97e3ecd2010-03-18 11:27:32 +0000382 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
383 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
384 goto out;
385 }
386
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000387 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388 tp = tcp_sk(sk);
389 seq = ntohl(th->seq);
390 if (sk->sk_state != TCP_LISTEN &&
391 !between(seq, tp->snd_una, tp->snd_nxt)) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700392 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 goto out;
394 }
395
396 switch (type) {
397 case ICMP_SOURCE_QUENCH:
398 /* Just silently ignore these. */
399 goto out;
400 case ICMP_PARAMETERPROB:
401 err = EPROTO;
402 break;
403 case ICMP_DEST_UNREACH:
404 if (code > NR_ICMP_UNREACH)
405 goto out;
406
407 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
408 if (!sock_owned_by_user(sk))
409 do_pmtu_discovery(sk, iph, info);
410 goto out;
411 }
412
413 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000414 /* check if icmp_skb allows revert of backoff
415 * (see draft-zimmermann-tcp-lcd) */
416 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
417 break;
418 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
419 !icsk->icsk_backoff)
420 break;
421
David S. Miller8f49c272010-11-12 13:35:00 -0800422 if (sock_owned_by_user(sk))
423 break;
424
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000425 icsk->icsk_backoff--;
426 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
427 icsk->icsk_backoff;
428 tcp_bound_rto(sk);
429
430 skb = tcp_write_queue_head(sk);
431 BUG_ON(!skb);
432
433 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
434 tcp_time_stamp - TCP_SKB_CB(skb)->when);
435
436 if (remaining) {
437 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
438 remaining, TCP_RTO_MAX);
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000439 } else {
440 /* RTO revert clocked out retransmission.
441 * Will retransmit now */
442 tcp_retransmit_timer(sk);
443 }
444
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 break;
446 case ICMP_TIME_EXCEEDED:
447 err = EHOSTUNREACH;
448 break;
449 default:
450 goto out;
451 }
452
453 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700454 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 case TCP_LISTEN:
456 if (sock_owned_by_user(sk))
457 goto out;
458
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700459 req = inet_csk_search_req(sk, &prev, th->dest,
460 iph->daddr, iph->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 if (!req)
462 goto out;
463
464 /* ICMPs are not backlogged, hence we cannot get
465 an established socket here.
466 */
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700467 WARN_ON(req->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700469 if (seq != tcp_rsk(req)->snt_isn) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700470 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 goto out;
472 }
473
474 /*
475 * Still in SYN_RECV, just remove it silently.
476 * There is no good way to pass the error to the newly
477 * created socket, and POSIX does not want network
478 * errors returned from accept().
479 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700480 inet_csk_reqsk_queue_drop(sk, req, prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 goto out;
482
483 case TCP_SYN_SENT:
484 case TCP_SYN_RECV: /* Cannot happen.
485 It can f.e. if SYNs crossed.
486 */
487 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 sk->sk_err = err;
489
490 sk->sk_error_report(sk);
491
492 tcp_done(sk);
493 } else {
494 sk->sk_err_soft = err;
495 }
496 goto out;
497 }
498
499 /* If we've already connected we will keep trying
500 * until we time out, or the user gives up.
501 *
502 * rfc1122 4.2.3.9 allows to consider as hard errors
503 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
504 * but it is obsoleted by pmtu discovery).
505 *
506 * Note, that in modern internet, where routing is unreliable
507 * and in each dark corner broken firewalls sit, sending random
508 * errors ordered by their masters even this two messages finally lose
509 * their original sense (even Linux sends invalid PORT_UNREACHs)
510 *
511 * Now we are in compliance with RFCs.
512 * --ANK (980905)
513 */
514
515 inet = inet_sk(sk);
516 if (!sock_owned_by_user(sk) && inet->recverr) {
517 sk->sk_err = err;
518 sk->sk_error_report(sk);
519 } else { /* Only an error on timeout */
520 sk->sk_err_soft = err;
521 }
522
523out:
524 bh_unlock_sock(sk);
525 sock_put(sk);
526}
527
Herbert Xu419f9f82010-04-11 02:15:53 +0000528static void __tcp_v4_send_check(struct sk_buff *skb,
529 __be32 saddr, __be32 daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700531 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
Patrick McHardy84fa7932006-08-29 16:44:56 -0700533 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Herbert Xu419f9f82010-04-11 02:15:53 +0000534 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
Herbert Xu663ead32007-04-09 11:59:07 -0700535 skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viroff1dcad2006-11-20 18:07:29 -0800536 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 } else {
Herbert Xu419f9f82010-04-11 02:15:53 +0000538 th->check = tcp_v4_check(skb->len, saddr, daddr,
Joe Perches07f07572008-11-19 15:44:53 -0800539 csum_partial(th,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 th->doff << 2,
541 skb->csum));
542 }
543}
544
Herbert Xu419f9f82010-04-11 02:15:53 +0000545/* This routine computes an IPv4 TCP checksum. */
Herbert Xubb296242010-04-11 02:15:55 +0000546void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
Herbert Xu419f9f82010-04-11 02:15:53 +0000547{
548 struct inet_sock *inet = inet_sk(sk);
549
550 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
551}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000552EXPORT_SYMBOL(tcp_v4_send_check);
Herbert Xu419f9f82010-04-11 02:15:53 +0000553
Herbert Xua430a432006-07-08 13:34:56 -0700554int tcp_v4_gso_send_check(struct sk_buff *skb)
555{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700556 const struct iphdr *iph;
Herbert Xua430a432006-07-08 13:34:56 -0700557 struct tcphdr *th;
558
559 if (!pskb_may_pull(skb, sizeof(*th)))
560 return -EINVAL;
561
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700562 iph = ip_hdr(skb);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700563 th = tcp_hdr(skb);
Herbert Xua430a432006-07-08 13:34:56 -0700564
565 th->check = 0;
Patrick McHardy84fa7932006-08-29 16:44:56 -0700566 skb->ip_summed = CHECKSUM_PARTIAL;
Herbert Xu419f9f82010-04-11 02:15:53 +0000567 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
Herbert Xua430a432006-07-08 13:34:56 -0700568 return 0;
569}
570
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571/*
572 * This routine will send an RST to the other tcp.
573 *
574 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
575 * for reset.
576 * Answer: if a packet caused RST, it is not for a socket
577 * existing in our system, if it is matched to a socket,
578 * it is just duplicate segment or bug in other side's TCP.
579 * So that we build reply only basing on parameters
580 * arrived with segment.
581 * Exception: precedence violation. We do not implement it in any case.
582 */
583
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800584static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700586 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800587 struct {
588 struct tcphdr th;
589#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800590 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800591#endif
592 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800594#ifdef CONFIG_TCP_MD5SIG
595 struct tcp_md5sig_key *key;
596#endif
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700597 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598
599 /* Never send a reset in response to a reset. */
600 if (th->rst)
601 return;
602
Eric Dumazet511c3f92009-06-02 05:14:27 +0000603 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 return;
605
606 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800607 memset(&rep, 0, sizeof(rep));
608 rep.th.dest = th->source;
609 rep.th.source = th->dest;
610 rep.th.doff = sizeof(struct tcphdr) / 4;
611 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612
613 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800614 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800616 rep.th.ack = 1;
617 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
618 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 }
620
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200621 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800622 arg.iov[0].iov_base = (unsigned char *)&rep;
623 arg.iov[0].iov_len = sizeof(rep.th);
624
625#ifdef CONFIG_TCP_MD5SIG
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700626 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800627 if (key) {
628 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
629 (TCPOPT_NOP << 16) |
630 (TCPOPT_MD5SIG << 8) |
631 TCPOLEN_MD5SIG);
632 /* Update length and the length the header thinks exists */
633 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
634 rep.th.doff = arg.iov[0].iov_len / 4;
635
Adam Langley49a72df2008-07-19 00:01:42 -0700636 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645c2008-10-09 14:37:47 -0700637 key, ip_hdr(skb)->saddr,
638 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800639 }
640#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700641 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
642 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700643 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700645 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
Eric Dumazetadf30902009-06-02 05:19:30 +0000647 net = dev_net(skb_dst(skb)->dev);
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700648 ip_send_reply(net->ipv4.tcp_sock, skb,
Denis V. Lunev7feb49c2008-04-03 14:32:00 -0700649 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700651 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
652 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653}
654
655/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
656 outside socket context is ugly, certainly. What can I do?
657 */
658
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900659static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
660 u32 win, u32 ts, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700661 struct tcp_md5sig_key *key,
662 int reply_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700664 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 struct {
666 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800667 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800668#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800669 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800670#endif
671 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 } rep;
673 struct ip_reply_arg arg;
Eric Dumazetadf30902009-06-02 05:19:30 +0000674 struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675
676 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200677 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
679 arg.iov[0].iov_base = (unsigned char *)&rep;
680 arg.iov[0].iov_len = sizeof(rep.th);
681 if (ts) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800682 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
683 (TCPOPT_TIMESTAMP << 8) |
684 TCPOLEN_TIMESTAMP);
685 rep.opt[1] = htonl(tcp_time_stamp);
686 rep.opt[2] = htonl(ts);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800687 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 }
689
690 /* Swap the send and the receive. */
691 rep.th.dest = th->source;
692 rep.th.source = th->dest;
693 rep.th.doff = arg.iov[0].iov_len / 4;
694 rep.th.seq = htonl(seq);
695 rep.th.ack_seq = htonl(ack);
696 rep.th.ack = 1;
697 rep.th.window = htons(win);
698
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800699#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800700 if (key) {
701 int offset = (ts) ? 3 : 0;
702
703 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
704 (TCPOPT_NOP << 16) |
705 (TCPOPT_MD5SIG << 8) |
706 TCPOLEN_MD5SIG);
707 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
708 rep.th.doff = arg.iov[0].iov_len/4;
709
Adam Langley49a72df2008-07-19 00:01:42 -0700710 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700711 key, ip_hdr(skb)->saddr,
712 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800713 }
714#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700715 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700716 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
717 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 arg.iov[0].iov_len, IPPROTO_TCP, 0);
719 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900720 if (oif)
721 arg.bound_dev_if = oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700723 ip_send_reply(net->ipv4.tcp_sock, skb,
Denis V. Lunev7feb49c2008-04-03 14:32:00 -0700724 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700726 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727}
728
729static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
730{
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700731 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800732 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900734 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200735 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900736 tcptw->tw_ts_recent,
737 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700738 tcp_twsk_md5_key(tcptw),
739 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900740 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700742 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743}
744
Gui Jianfeng6edafaa2008-08-06 23:50:04 -0700745static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200746 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747{
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900748 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800749 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900750 req->ts_recent,
751 0,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700752 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
753 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754}
755
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800757 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700758 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 * socket.
760 */
Octavian Purdila72659ec2010-01-17 19:09:39 -0800761static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
762 struct request_sock *req,
763 struct request_values *rvp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700765 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 int err = -1;
767 struct sk_buff * skb;
768
769 /* First, grab a route. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700770 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800771 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772
William Allen Simpsone6b4d112009-12-02 18:07:39 +0000773 skb = tcp_make_synack(sk, dst, req, rvp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774
775 if (skb) {
Herbert Xu419f9f82010-04-11 02:15:53 +0000776 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700778 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
779 ireq->rmt_addr,
780 ireq->opt);
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200781 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 }
783
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 dst_release(dst);
785 return err;
786}
787
Octavian Purdila72659ec2010-01-17 19:09:39 -0800788static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
William Allen Simpsone6b4d112009-12-02 18:07:39 +0000789 struct request_values *rvp)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800790{
Octavian Purdila72659ec2010-01-17 19:09:39 -0800791 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
792 return tcp_v4_send_synack(sk, NULL, req, rvp);
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800793}
794
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700796 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700798static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799{
Jesper Juhla51482b2005-11-08 09:41:34 -0800800 kfree(inet_rsk(req)->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801}
802
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000803static void syn_flood_warning(const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804{
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000805 const char *msg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000807#ifdef CONFIG_SYN_COOKIES
808 if (sysctl_tcp_syncookies)
809 msg = "Sending cookies";
810 else
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200811#endif
Florian Westphal2a1d4bd2010-06-03 00:43:12 +0000812 msg = "Dropping request";
813
814 pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
815 ntohs(tcp_hdr(skb)->dest), msg);
816}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817
818/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700819 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800821static struct ip_options *tcp_v4_save_options(struct sock *sk,
822 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823{
824 struct ip_options *opt = &(IPCB(skb)->opt);
825 struct ip_options *dopt = NULL;
826
827 if (opt && opt->optlen) {
828 int opt_size = optlength(opt);
829 dopt = kmalloc(opt_size, GFP_ATOMIC);
830 if (dopt) {
831 if (ip_options_echo(dopt, skb)) {
832 kfree(dopt);
833 dopt = NULL;
834 }
835 }
836 }
837 return dopt;
838}
839
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800840#ifdef CONFIG_TCP_MD5SIG
841/*
842 * RFC2385 MD5 checksumming requires a mapping of
843 * IP address->MD5 Key.
844 * We need to maintain these in the sk structure.
845 */
846
847/* Find the Key structure for an address. */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200848static struct tcp_md5sig_key *
849 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800850{
851 struct tcp_sock *tp = tcp_sk(sk);
852 int i;
853
854 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
855 return NULL;
856 for (i = 0; i < tp->md5sig_info->entries4; i++) {
857 if (tp->md5sig_info->keys4[i].addr == addr)
David S. Millerf8ab18d2007-09-28 15:18:35 -0700858 return &tp->md5sig_info->keys4[i].base;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800859 }
860 return NULL;
861}
862
863struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
864 struct sock *addr_sk)
865{
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000866 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800867}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800868EXPORT_SYMBOL(tcp_v4_md5_lookup);
869
Adrian Bunkf5b99bc2006-11-30 17:22:29 -0800870static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
871 struct request_sock *req)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800872{
873 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
874}
875
876/* This can be called on a newly created socket, from other files */
877int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
878 u8 *newkey, u8 newkeylen)
879{
880 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700881 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800882 struct tcp_sock *tp = tcp_sk(sk);
883 struct tcp4_md5sig_key *keys;
884
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700885 key = tcp_v4_md5_do_lookup(sk, addr);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800886 if (key) {
887 /* Pre-existing entry - just update that one. */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700888 kfree(key->key);
889 key->key = newkey;
890 key->keylen = newkeylen;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800891 } else {
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200892 struct tcp_md5sig_info *md5sig;
893
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800894 if (!tp->md5sig_info) {
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200895 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
896 GFP_ATOMIC);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800897 if (!tp->md5sig_info) {
898 kfree(newkey);
899 return -ENOMEM;
900 }
Eric Dumazeta4654192010-05-16 00:36:33 -0700901 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800902 }
Wu Fengguangaa133072009-09-02 23:45:45 -0700903 if (tcp_alloc_md5sig_pool(sk) == NULL) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800904 kfree(newkey);
905 return -ENOMEM;
906 }
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200907 md5sig = tp->md5sig_info;
908
909 if (md5sig->alloced4 == md5sig->entries4) {
910 keys = kmalloc((sizeof(*keys) *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900911 (md5sig->entries4 + 1)), GFP_ATOMIC);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800912 if (!keys) {
913 kfree(newkey);
914 tcp_free_md5sig_pool();
915 return -ENOMEM;
916 }
917
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200918 if (md5sig->entries4)
919 memcpy(keys, md5sig->keys4,
920 sizeof(*keys) * md5sig->entries4);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800921
922 /* Free old key list, and reference new one */
YOSHIFUJI Hideakia80cc202007-11-20 17:30:06 -0800923 kfree(md5sig->keys4);
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200924 md5sig->keys4 = keys;
925 md5sig->alloced4++;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800926 }
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200927 md5sig->entries4++;
David S. Millerf8ab18d2007-09-28 15:18:35 -0700928 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
929 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
930 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800931 }
932 return 0;
933}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800934EXPORT_SYMBOL(tcp_v4_md5_do_add);
935
936static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
937 u8 *newkey, u8 newkeylen)
938{
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000939 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800940 newkey, newkeylen);
941}
942
943int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
944{
945 struct tcp_sock *tp = tcp_sk(sk);
946 int i;
947
948 for (i = 0; i < tp->md5sig_info->entries4; i++) {
949 if (tp->md5sig_info->keys4[i].addr == addr) {
950 /* Free the key */
David S. Millerf8ab18d2007-09-28 15:18:35 -0700951 kfree(tp->md5sig_info->keys4[i].base.key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800952 tp->md5sig_info->entries4--;
953
954 if (tp->md5sig_info->entries4 == 0) {
955 kfree(tp->md5sig_info->keys4);
956 tp->md5sig_info->keys4 = NULL;
Leigh Brown8228a18d2006-12-17 17:12:30 -0800957 tp->md5sig_info->alloced4 = 0;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200958 } else if (tp->md5sig_info->entries4 != i) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800959 /* Need to do some manipulation */
YOSHIFUJI Hideaki354faf02007-11-20 17:30:31 -0800960 memmove(&tp->md5sig_info->keys4[i],
961 &tp->md5sig_info->keys4[i+1],
962 (tp->md5sig_info->entries4 - i) *
963 sizeof(struct tcp4_md5sig_key));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800964 }
965 tcp_free_md5sig_pool();
966 return 0;
967 }
968 }
969 return -ENOENT;
970}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800971EXPORT_SYMBOL(tcp_v4_md5_do_del);
972
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200973static void tcp_v4_clear_md5_list(struct sock *sk)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800974{
975 struct tcp_sock *tp = tcp_sk(sk);
976
977 /* Free each key, then the set of key keys,
978 * the crypto element, and then decrement our
979 * hold on the last resort crypto.
980 */
981 if (tp->md5sig_info->entries4) {
982 int i;
983 for (i = 0; i < tp->md5sig_info->entries4; i++)
David S. Millerf8ab18d2007-09-28 15:18:35 -0700984 kfree(tp->md5sig_info->keys4[i].base.key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800985 tp->md5sig_info->entries4 = 0;
986 tcp_free_md5sig_pool();
987 }
988 if (tp->md5sig_info->keys4) {
989 kfree(tp->md5sig_info->keys4);
990 tp->md5sig_info->keys4 = NULL;
991 tp->md5sig_info->alloced4 = 0;
992 }
993}
994
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200995static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
996 int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800997{
998 struct tcp_md5sig cmd;
999 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1000 u8 *newkey;
1001
1002 if (optlen < sizeof(cmd))
1003 return -EINVAL;
1004
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001005 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001006 return -EFAULT;
1007
1008 if (sin->sin_family != AF_INET)
1009 return -EINVAL;
1010
1011 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1012 if (!tcp_sk(sk)->md5sig_info)
1013 return -ENOENT;
1014 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1015 }
1016
1017 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1018 return -EINVAL;
1019
1020 if (!tcp_sk(sk)->md5sig_info) {
1021 struct tcp_sock *tp = tcp_sk(sk);
Wu Fengguangaa133072009-09-02 23:45:45 -07001022 struct tcp_md5sig_info *p;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001023
Wu Fengguangaa133072009-09-02 23:45:45 -07001024 p = kzalloc(sizeof(*p), sk->sk_allocation);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001025 if (!p)
1026 return -EINVAL;
1027
1028 tp->md5sig_info = p;
Eric Dumazeta4654192010-05-16 00:36:33 -07001029 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001030 }
1031
Wu Fengguangaa133072009-09-02 23:45:45 -07001032 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001033 if (!newkey)
1034 return -ENOMEM;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001035 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1036 newkey, cmd.tcpm_keylen);
1037}
1038
Adam Langley49a72df2008-07-19 00:01:42 -07001039static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1040 __be32 daddr, __be32 saddr, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001041{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001042 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001043 struct scatterlist sg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001044
1045 bp = &hp->md5_blk.ip4;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001046
1047 /*
Adam Langley49a72df2008-07-19 00:01:42 -07001048 * 1. the TCP pseudo-header (in the order: source IP address,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001049 * destination IP address, zero-padded protocol number, and
1050 * segment length)
1051 */
1052 bp->saddr = saddr;
1053 bp->daddr = daddr;
1054 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001055 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001056 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001057
Adam Langley49a72df2008-07-19 00:01:42 -07001058 sg_init_one(&sg, bp, sizeof(*bp));
1059 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1060}
1061
1062static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1063 __be32 daddr, __be32 saddr, struct tcphdr *th)
1064{
1065 struct tcp_md5sig_pool *hp;
1066 struct hash_desc *desc;
1067
1068 hp = tcp_get_md5sig_pool();
1069 if (!hp)
1070 goto clear_hash_noput;
1071 desc = &hp->md5_desc;
1072
1073 if (crypto_hash_init(desc))
1074 goto clear_hash;
1075 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1076 goto clear_hash;
1077 if (tcp_md5_hash_header(hp, th))
1078 goto clear_hash;
1079 if (tcp_md5_hash_key(hp, key))
1080 goto clear_hash;
1081 if (crypto_hash_final(desc, md5_hash))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001082 goto clear_hash;
1083
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001084 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001085 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001086
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001087clear_hash:
1088 tcp_put_md5sig_pool();
1089clear_hash_noput:
1090 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001091 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001092}
1093
Adam Langley49a72df2008-07-19 00:01:42 -07001094int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1095 struct sock *sk, struct request_sock *req,
1096 struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001097{
Adam Langley49a72df2008-07-19 00:01:42 -07001098 struct tcp_md5sig_pool *hp;
1099 struct hash_desc *desc;
1100 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001101 __be32 saddr, daddr;
1102
1103 if (sk) {
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001104 saddr = inet_sk(sk)->inet_saddr;
1105 daddr = inet_sk(sk)->inet_daddr;
Adam Langley49a72df2008-07-19 00:01:42 -07001106 } else if (req) {
1107 saddr = inet_rsk(req)->loc_addr;
1108 daddr = inet_rsk(req)->rmt_addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001109 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001110 const struct iphdr *iph = ip_hdr(skb);
1111 saddr = iph->saddr;
1112 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001113 }
Adam Langley49a72df2008-07-19 00:01:42 -07001114
1115 hp = tcp_get_md5sig_pool();
1116 if (!hp)
1117 goto clear_hash_noput;
1118 desc = &hp->md5_desc;
1119
1120 if (crypto_hash_init(desc))
1121 goto clear_hash;
1122
1123 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1124 goto clear_hash;
1125 if (tcp_md5_hash_header(hp, th))
1126 goto clear_hash;
1127 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1128 goto clear_hash;
1129 if (tcp_md5_hash_key(hp, key))
1130 goto clear_hash;
1131 if (crypto_hash_final(desc, md5_hash))
1132 goto clear_hash;
1133
1134 tcp_put_md5sig_pool();
1135 return 0;
1136
1137clear_hash:
1138 tcp_put_md5sig_pool();
1139clear_hash_noput:
1140 memset(md5_hash, 0, 16);
1141 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001142}
Adam Langley49a72df2008-07-19 00:01:42 -07001143EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001144
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001145static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001146{
1147 /*
1148 * This gets called for each TCP segment that arrives
1149 * so we want to be efficient.
1150 * We have 3 drop cases:
1151 * o No MD5 hash and one expected.
1152 * o MD5 hash and we're not expecting one.
1153 * o MD5 hash and its wrong.
1154 */
1155 __u8 *hash_location = NULL;
1156 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001157 const struct iphdr *iph = ip_hdr(skb);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001158 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001159 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001160 unsigned char newhash[16];
1161
1162 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001163 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001164
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001165 /* We've parsed the options - do we have a hash? */
1166 if (!hash_expected && !hash_location)
1167 return 0;
1168
1169 if (hash_expected && !hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001170 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001171 return 1;
1172 }
1173
1174 if (!hash_expected && hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001175 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001176 return 1;
1177 }
1178
1179 /* Okay, so this is hash_expected and hash_location -
1180 * so we need to calculate the checksum.
1181 */
Adam Langley49a72df2008-07-19 00:01:42 -07001182 genhash = tcp_v4_md5_hash_skb(newhash,
1183 hash_expected,
1184 NULL, NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001185
1186 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1187 if (net_ratelimit()) {
Harvey Harrison673d57e2008-10-31 00:53:57 -07001188 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1189 &iph->saddr, ntohs(th->source),
1190 &iph->daddr, ntohs(th->dest),
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001191 genhash ? " tcp_v4_calc_md5_hash failed" : "");
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001192 }
1193 return 1;
1194 }
1195 return 0;
1196}
1197
1198#endif
1199
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001200struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001202 .obj_size = sizeof(struct tcp_request_sock),
Octavian Purdila72659ec2010-01-17 19:09:39 -08001203 .rtx_syn_ack = tcp_v4_rtx_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001204 .send_ack = tcp_v4_reqsk_send_ack,
1205 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 .send_reset = tcp_v4_send_reset,
Octavian Purdila72659ec2010-01-17 19:09:39 -08001207 .syn_ack_timeout = tcp_syn_ack_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208};
1209
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001210#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001211static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001212 .md5_lookup = tcp_v4_reqsk_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001213 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001214};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001215#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001216
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1218{
William Allen Simpson4957faa2009-12-02 18:25:27 +00001219 struct tcp_extend_values tmp_ext;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 struct tcp_options_received tmp_opt;
William Allen Simpson4957faa2009-12-02 18:25:27 +00001221 u8 *hash_location;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001222 struct request_sock *req;
William Allen Simpsone6b4d112009-12-02 18:07:39 +00001223 struct inet_request_sock *ireq;
William Allen Simpson4957faa2009-12-02 18:25:27 +00001224 struct tcp_sock *tp = tcp_sk(sk);
William Allen Simpsone6b4d112009-12-02 18:07:39 +00001225 struct dst_entry *dst = NULL;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001226 __be32 saddr = ip_hdr(skb)->saddr;
1227 __be32 daddr = ip_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 __u32 isn = TCP_SKB_CB(skb)->when;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229#ifdef CONFIG_SYN_COOKIES
1230 int want_cookie = 0;
1231#else
1232#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1233#endif
1234
1235 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001236 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 goto drop;
1238
1239 /* TW buckets are converted to open requests without
1240 * limitations, they conserve resources and peer is
1241 * evidently real one.
1242 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001243 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
Florian Westphal2a1d4bd2010-06-03 00:43:12 +00001244 if (net_ratelimit())
1245 syn_flood_warning(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246#ifdef CONFIG_SYN_COOKIES
1247 if (sysctl_tcp_syncookies) {
1248 want_cookie = 1;
1249 } else
1250#endif
1251 goto drop;
1252 }
1253
1254 /* Accept backlog is full. If we have already queued enough
1255 * of warm entries in syn queue, drop request. It is better than
1256 * clogging syn queue with openreqs with exponentially increasing
1257 * timeout.
1258 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001259 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 goto drop;
1261
Arnaldo Carvalho de Meloce4a7d02008-06-10 12:39:35 -07001262 req = inet_reqsk_alloc(&tcp_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 if (!req)
1264 goto drop;
1265
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001266#ifdef CONFIG_TCP_MD5SIG
1267 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1268#endif
1269
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 tcp_clear_options(&tmp_opt);
William Allen Simpsonbee7ca92009-11-10 09:51:18 +00001271 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
William Allen Simpson4957faa2009-12-02 18:25:27 +00001272 tmp_opt.user_mss = tp->rx_opt.user_mss;
David S. Millerbb5b7c12009-12-15 20:56:42 -08001273 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274
William Allen Simpson4957faa2009-12-02 18:25:27 +00001275 if (tmp_opt.cookie_plus > 0 &&
1276 tmp_opt.saw_tstamp &&
1277 !tp->rx_opt.cookie_out_never &&
1278 (sysctl_tcp_cookie_size > 0 ||
1279 (tp->cookie_values != NULL &&
1280 tp->cookie_values->cookie_desired > 0))) {
1281 u8 *c;
1282 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1283 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1284
1285 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1286 goto drop_and_release;
1287
1288 /* Secret recipe starts with IP addresses */
Eric Dumazet0eae88f2010-04-20 19:06:52 -07001289 *mess++ ^= (__force u32)daddr;
1290 *mess++ ^= (__force u32)saddr;
William Allen Simpson4957faa2009-12-02 18:25:27 +00001291
1292 /* plus variable length Initiator Cookie */
1293 c = (u8 *)mess;
1294 while (l-- > 0)
1295 *c++ ^= *hash_location++;
1296
1297#ifdef CONFIG_SYN_COOKIES
1298 want_cookie = 0; /* not our kind of cookie */
1299#endif
1300 tmp_ext.cookie_out_never = 0; /* false */
1301 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1302 } else if (!tp->rx_opt.cookie_in_always) {
1303 /* redundant indications, but ensure initialization. */
1304 tmp_ext.cookie_out_never = 1; /* true */
1305 tmp_ext.cookie_plus = 0;
1306 } else {
1307 goto drop_and_release;
1308 }
1309 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310
Florian Westphal4dfc2812008-04-10 03:12:40 -07001311 if (want_cookie && !tmp_opt.saw_tstamp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 tcp_clear_options(&tmp_opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 tcp_openreq_init(req, &tmp_opt, skb);
1316
David S. Millerbb5b7c12009-12-15 20:56:42 -08001317 ireq = inet_rsk(req);
1318 ireq->loc_addr = daddr;
1319 ireq->rmt_addr = saddr;
1320 ireq->no_srccheck = inet_sk(sk)->transparent;
1321 ireq->opt = tcp_v4_save_options(sk, skb);
1322
Paul Moore284904a2009-03-27 17:10:28 -04001323 if (security_inet_conn_request(sk, skb, req))
David S. Millerbb5b7c12009-12-15 20:56:42 -08001324 goto drop_and_free;
Paul Moore284904a2009-03-27 17:10:28 -04001325
Florian Westphal172d69e2010-06-21 11:48:45 +00001326 if (!want_cookie || tmp_opt.tstamp_ok)
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001327 TCP_ECN_create_request(req, tcp_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328
1329 if (want_cookie) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
Florian Westphal172d69e2010-06-21 11:48:45 +00001331 req->cookie_ts = tmp_opt.tstamp_ok;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 } else if (!isn) {
1333 struct inet_peer *peer = NULL;
1334
1335 /* VJ's idea. We save last timestamp seen
1336 * from the destination in peer table, when entering
1337 * state TIME-WAIT, and check against it before
1338 * accepting new connection request.
1339 *
1340 * If "isn" is not zero, this request hit alive
1341 * timewait bucket, so that all the necessary checks
1342 * are made in the function processing timewait state.
1343 */
1344 if (tmp_opt.saw_tstamp &&
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -07001345 tcp_death_row.sysctl_tw_recycle &&
David S. Millerbb5b7c12009-12-15 20:56:42 -08001346 (dst = inet_csk_route_req(sk, req)) != NULL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
David S. Miller7a71ed82011-02-09 14:30:26 -08001348 peer->daddr.addr.a4 == saddr) {
Eric Dumazet317fe0e2010-06-16 04:52:13 +00001349 inet_peer_refcheck(peer);
Eric Dumazet2c1409a2009-11-12 09:33:09 +00001350 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 (s32)(peer->tcp_ts - req->ts_recent) >
1352 TCP_PAWS_WINDOW) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001353 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001354 goto drop_and_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 }
1356 }
1357 /* Kill the following clause, if you dislike this way. */
1358 else if (!sysctl_tcp_syncookies &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001359 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 (sysctl_max_syn_backlog >> 2)) &&
1361 (!peer || !peer->tcp_ts_stamp) &&
1362 (!dst || !dst_metric(dst, RTAX_RTT))) {
1363 /* Without syncookies last quarter of
1364 * backlog is filled with destinations,
1365 * proven to be alive.
1366 * It means that we continue to communicate
1367 * to destinations, already remembered
1368 * to the moment of synflood.
1369 */
Harvey Harrison673d57e2008-10-31 00:53:57 -07001370 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1371 &saddr, ntohs(tcp_hdr(skb)->source));
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001372 goto drop_and_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 }
1374
Gerrit Renkera94f7232006-11-10 14:06:49 -08001375 isn = tcp_v4_init_sequence(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001377 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378
Octavian Purdila72659ec2010-01-17 19:09:39 -08001379 if (tcp_v4_send_synack(sk, dst, req,
1380 (struct request_values *)&tmp_ext) ||
William Allen Simpson4957faa2009-12-02 18:25:27 +00001381 want_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 goto drop_and_free;
1383
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001384 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 return 0;
1386
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001387drop_and_release:
1388 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389drop_and_free:
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001390 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 return 0;
1393}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001394EXPORT_SYMBOL(tcp_v4_conn_request);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395
1396
1397/*
1398 * The three way handshake has completed - we got a valid synack -
1399 * now create the new socket.
1400 */
1401struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001402 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 struct dst_entry *dst)
1404{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001405 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 struct inet_sock *newinet;
1407 struct tcp_sock *newtp;
1408 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001409#ifdef CONFIG_TCP_MD5SIG
1410 struct tcp_md5sig_key *key;
1411#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412
1413 if (sk_acceptq_is_full(sk))
1414 goto exit_overflow;
1415
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001416 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 goto exit;
1418
1419 newsk = tcp_create_openreq_child(sk, req, skb);
1420 if (!newsk)
Balazs Scheidler093d2822010-10-21 13:06:43 +02001421 goto exit_nonewsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422
Herbert Xubcd76112006-06-30 13:36:35 -07001423 newsk->sk_gso_type = SKB_GSO_TCPV4;
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -07001424 sk_setup_caps(newsk, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425
1426 newtp = tcp_sk(newsk);
1427 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001428 ireq = inet_rsk(req);
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001429 newinet->inet_daddr = ireq->rmt_addr;
1430 newinet->inet_rcv_saddr = ireq->loc_addr;
1431 newinet->inet_saddr = ireq->loc_addr;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001432 newinet->opt = ireq->opt;
1433 ireq->opt = NULL;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001434 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001435 newinet->mc_ttl = ip_hdr(skb)->ttl;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001436 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 if (newinet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001438 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001439 newinet->inet_id = newtp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440
John Heffner5d424d52006-03-20 17:53:41 -08001441 tcp_mtup_init(newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 tcp_sync_mss(newsk, dst_mtu(dst));
David S. Miller0dbaee32010-12-13 12:52:14 -08001443 newtp->advmss = dst_metric_advmss(dst);
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001444 if (tcp_sk(sk)->rx_opt.user_mss &&
1445 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1446 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1447
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 tcp_initialize_rcv_mss(newsk);
1449
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001450#ifdef CONFIG_TCP_MD5SIG
1451 /* Copy over the MD5 key from the original socket */
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001452 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1453 if (key != NULL) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001454 /*
1455 * We're using one, so create a matching key
1456 * on the newsk structure. If we fail to get
1457 * memory, then we end up not copying the key
1458 * across. Shucks.
1459 */
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -02001460 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1461 if (newkey != NULL)
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001462 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001463 newkey, key->keylen);
Eric Dumazeta4654192010-05-16 00:36:33 -07001464 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001465 }
1466#endif
1467
Balazs Scheidler093d2822010-10-21 13:06:43 +02001468 if (__inet_inherit_port(sk, newsk) < 0) {
1469 sock_put(newsk);
1470 goto exit;
1471 }
Eric Dumazet9327f702009-12-04 03:46:54 +00001472 __inet_hash_nolisten(newsk, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473
1474 return newsk;
1475
1476exit_overflow:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001477 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Balazs Scheidler093d2822010-10-21 13:06:43 +02001478exit_nonewsk:
1479 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480exit:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001481 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 return NULL;
1483}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001484EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485
1486static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1487{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001488 struct tcphdr *th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001489 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 struct sock *nsk;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001491 struct request_sock **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001493 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1494 iph->saddr, iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 if (req)
1496 return tcp_check_req(sk, skb, req, prev);
1497
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001498 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001499 th->source, iph->daddr, th->dest, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500
1501 if (nsk) {
1502 if (nsk->sk_state != TCP_TIME_WAIT) {
1503 bh_lock_sock(nsk);
1504 return nsk;
1505 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001506 inet_twsk_put(inet_twsk(nsk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 return NULL;
1508 }
1509
1510#ifdef CONFIG_SYN_COOKIES
Florian Westphalaf9b4732010-06-03 00:43:44 +00001511 if (!th->syn)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1513#endif
1514 return sk;
1515}
1516
Al Virob51655b2006-11-14 21:40:42 -08001517static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001519 const struct iphdr *iph = ip_hdr(skb);
1520
Patrick McHardy84fa7932006-08-29 16:44:56 -07001521 if (skb->ip_summed == CHECKSUM_COMPLETE) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001522 if (!tcp_v4_check(skb->len, iph->saddr,
1523 iph->daddr, skb->csum)) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001524 skb->ip_summed = CHECKSUM_UNNECESSARY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 return 0;
Herbert Xufb286bb2005-11-10 13:01:24 -08001526 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 }
Herbert Xufb286bb2005-11-10 13:01:24 -08001528
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001529 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
Herbert Xufb286bb2005-11-10 13:01:24 -08001530 skb->len, IPPROTO_TCP, 0);
1531
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 if (skb->len <= 76) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001533 return __skb_checksum_complete(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 }
1535 return 0;
1536}
1537
1538
1539/* The socket must have it's spinlock held when we get
1540 * here.
1541 *
1542 * We have a potential double-lock case here, so even when
1543 * doing backlog processing we use the BH locking scheme.
1544 * This is because we cannot sleep with the original spinlock
1545 * held.
1546 */
1547int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1548{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001549 struct sock *rsk;
1550#ifdef CONFIG_TCP_MD5SIG
1551 /*
1552 * We really want to reject the packet as early as possible
1553 * if:
1554 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1555 * o There is an MD5 option and we're not expecting one
1556 */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001557 if (tcp_v4_inbound_md5_hash(sk, skb))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001558 goto discard;
1559#endif
1560
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
Eric Dumazetca551582010-06-03 09:03:58 +00001562 sock_rps_save_rxhash(sk, skb->rxhash);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001563 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001564 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001566 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 return 0;
1568 }
1569
Arnaldo Carvalho de Meloab6a5bb2007-03-18 17:43:48 -07001570 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 goto csum_err;
1572
1573 if (sk->sk_state == TCP_LISTEN) {
1574 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1575 if (!nsk)
1576 goto discard;
1577
1578 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001579 if (tcp_child_process(sk, nsk, skb)) {
1580 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001582 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 return 0;
1584 }
Eric Dumazetca551582010-06-03 09:03:58 +00001585 } else
1586 sock_rps_save_rxhash(sk, skb->rxhash);
1587
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001588 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001589 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001591 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 return 0;
1593
1594reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001595 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596discard:
1597 kfree_skb(skb);
1598 /* Be careful here. If this function gets more complicated and
1599 * gcc suffers from register pressure on the x86, sk (in %ebx)
1600 * might be destroyed here. This current version compiles correctly,
1601 * but you have been warned.
1602 */
1603 return 0;
1604
1605csum_err:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001606 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 goto discard;
1608}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001609EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610
1611/*
1612 * From tcp_input.c
1613 */
1614
1615int tcp_v4_rcv(struct sk_buff *skb)
1616{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001617 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 struct tcphdr *th;
1619 struct sock *sk;
1620 int ret;
Pavel Emelyanova86b1e32008-07-16 20:20:58 -07001621 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622
1623 if (skb->pkt_type != PACKET_HOST)
1624 goto discard_it;
1625
1626 /* Count it even if it's bad */
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001627 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628
1629 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1630 goto discard_it;
1631
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001632 th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633
1634 if (th->doff < sizeof(struct tcphdr) / 4)
1635 goto bad_packet;
1636 if (!pskb_may_pull(skb, th->doff * 4))
1637 goto discard_it;
1638
1639 /* An explanation is required here, I think.
1640 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001641 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 * So, we defer the checks. */
Herbert Xu60476372007-04-09 11:59:39 -07001643 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 goto bad_packet;
1645
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001646 th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001647 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1649 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1650 skb->len - th->doff * 4);
1651 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1652 TCP_SKB_CB(skb)->when = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001653 TCP_SKB_CB(skb)->flags = iph->tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 TCP_SKB_CB(skb)->sacked = 0;
1655
Arnaldo Carvalho de Melo9a1f27c2008-10-07 11:41:57 -07001656 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657 if (!sk)
1658 goto no_tcp_socket;
1659
Eric Dumazetbb134d52010-03-09 05:55:56 +00001660process:
1661 if (sk->sk_state == TCP_TIME_WAIT)
1662 goto do_time_wait;
1663
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001664 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1665 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001666 goto discard_and_relse;
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001667 }
Stephen Hemmingerd218d112010-01-11 16:28:01 -08001668
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1670 goto discard_and_relse;
Patrick McHardyb59c2702006-01-06 23:06:10 -08001671 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001673 if (sk_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 goto discard_and_relse;
1675
1676 skb->dev = NULL;
1677
Ingo Molnarc6366182006-07-03 00:25:13 -07001678 bh_lock_sock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 ret = 0;
1680 if (!sock_owned_by_user(sk)) {
Chris Leech1a2449a2006-05-23 18:05:53 -07001681#ifdef CONFIG_NET_DMA
1682 struct tcp_sock *tp = tcp_sk(sk);
1683 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
Dan Williamsf67b4592009-01-06 11:38:15 -07001684 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
Chris Leech1a2449a2006-05-23 18:05:53 -07001685 if (tp->ucopy.dma_chan)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001687 else
1688#endif
1689 {
1690 if (!tcp_prequeue(sk, skb))
Shan Weiae8d7f82009-05-05 01:01:29 +00001691 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001692 }
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001693 } else if (unlikely(sk_add_backlog(sk, skb))) {
Zhu Yi6b03a532010-03-04 18:01:41 +00001694 bh_unlock_sock(sk);
Eric Dumazet6cce09f2010-03-07 23:21:57 +00001695 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
Zhu Yi6b03a532010-03-04 18:01:41 +00001696 goto discard_and_relse;
1697 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 bh_unlock_sock(sk);
1699
1700 sock_put(sk);
1701
1702 return ret;
1703
1704no_tcp_socket:
1705 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1706 goto discard_it;
1707
1708 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1709bad_packet:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001710 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001712 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 }
1714
1715discard_it:
1716 /* Discard frame. */
1717 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001718 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719
1720discard_and_relse:
1721 sock_put(sk);
1722 goto discard_it;
1723
1724do_time_wait:
1725 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001726 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 goto discard_it;
1728 }
1729
1730 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001731 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001732 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 goto discard_it;
1734 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001735 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001737 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001738 &tcp_hashinfo,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001739 iph->daddr, th->dest,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001740 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 if (sk2) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001742 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1743 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 sk = sk2;
1745 goto process;
1746 }
1747 /* Fall through to ACK */
1748 }
1749 case TCP_TW_ACK:
1750 tcp_v4_timewait_ack(sk, skb);
1751 break;
1752 case TCP_TW_RST:
1753 goto no_tcp_socket;
1754 case TCP_TW_SUCCESS:;
1755 }
1756 goto discard_it;
1757}
1758
David S. Miller3f419d22010-11-29 13:37:14 -08001759struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760{
David S. Miller3f419d22010-11-29 13:37:14 -08001761 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 struct inet_sock *inet = inet_sk(sk);
David S. Miller3f419d22010-11-29 13:37:14 -08001763 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001765 if (!rt || rt->rt_dst != inet->inet_daddr) {
David S. Millerb534ecf2010-11-30 11:54:19 -08001766 peer = inet_getpeer_v4(inet->inet_daddr, 1);
David S. Miller3f419d22010-11-29 13:37:14 -08001767 *release_it = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 } else {
1769 if (!rt->peer)
1770 rt_bind_peer(rt, 1);
1771 peer = rt->peer;
David S. Miller3f419d22010-11-29 13:37:14 -08001772 *release_it = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773 }
1774
David S. Miller3f419d22010-11-29 13:37:14 -08001775 return peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776}
David S. Miller3f419d22010-11-29 13:37:14 -08001777EXPORT_SYMBOL(tcp_v4_get_peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778
David S. Millerccb7c412010-12-01 18:09:13 -08001779void *tcp_v4_tw_get_peer(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780{
David S. Millerccb7c412010-12-01 18:09:13 -08001781 struct inet_timewait_sock *tw = inet_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782
David S. Millerccb7c412010-12-01 18:09:13 -08001783 return inet_getpeer_v4(tw->tw_daddr, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784}
David S. Millerccb7c412010-12-01 18:09:13 -08001785EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1786
1787static struct timewait_sock_ops tcp_timewait_sock_ops = {
1788 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1789 .twsk_unique = tcp_twsk_unique,
1790 .twsk_destructor= tcp_twsk_destructor,
1791 .twsk_getpeer = tcp_v4_tw_get_peer,
1792};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793
Stephen Hemminger3b401a82009-09-01 19:25:04 +00001794const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001795 .queue_xmit = ip_queue_xmit,
1796 .send_check = tcp_v4_send_check,
1797 .rebuild_header = inet_sk_rebuild_header,
1798 .conn_request = tcp_v4_conn_request,
1799 .syn_recv_sock = tcp_v4_syn_recv_sock,
David S. Miller3f419d22010-11-29 13:37:14 -08001800 .get_peer = tcp_v4_get_peer,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001801 .net_header_len = sizeof(struct iphdr),
1802 .setsockopt = ip_setsockopt,
1803 .getsockopt = ip_getsockopt,
1804 .addr2sockaddr = inet_csk_addr2sockaddr,
1805 .sockaddr_len = sizeof(struct sockaddr_in),
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001806 .bind_conflict = inet_csk_bind_conflict,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001807#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001808 .compat_setsockopt = compat_ip_setsockopt,
1809 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001810#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00001812EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001814#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001815static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001816 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07001817 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001818 .md5_add = tcp_v4_md5_add_func,
1819 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001820};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001821#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001822
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823/* NOTE: A lot of things set to zero explicitly by call to
1824 * sk_alloc() so need not be done here.
1825 */
1826static int tcp_v4_init_sock(struct sock *sk)
1827{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001828 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 struct tcp_sock *tp = tcp_sk(sk);
1830
1831 skb_queue_head_init(&tp->out_of_order_queue);
1832 tcp_init_xmit_timers(sk);
1833 tcp_prequeue_init(tp);
1834
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001835 icsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 tp->mdev = TCP_TIMEOUT_INIT;
1837
1838 /* So many TCP implementations out there (incorrectly) count the
1839 * initial SYN frame in their delayed-ACK and congestion control
1840 * algorithms that we must have the following bandaid to talk
1841 * efficiently to them. -DaveM
1842 */
1843 tp->snd_cwnd = 2;
1844
1845 /* See draft-stevens-tcpca-spec-01 for discussion of the
1846 * initialization of these values.
1847 */
Ilpo Järvinen0b6a05c2009-09-15 01:30:10 -07001848 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 tp->snd_cwnd_clamp = ~0;
William Allen Simpsonbee7ca92009-11-10 09:51:18 +00001850 tp->mss_cache = TCP_MSS_DEFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851
1852 tp->reordering = sysctl_tcp_reordering;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001853 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854
1855 sk->sk_state = TCP_CLOSE;
1856
1857 sk->sk_write_space = sk_stream_write_space;
1858 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1859
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001860 icsk->icsk_af_ops = &ipv4_specific;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001861 icsk->icsk_sync_mss = tcp_sync_mss;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001862#ifdef CONFIG_TCP_MD5SIG
1863 tp->af_specific = &tcp_sock_ipv4_specific;
1864#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865
William Allen Simpson435cf552009-12-02 18:17:05 +00001866 /* TCP Cookie Transactions */
1867 if (sysctl_tcp_cookie_size > 0) {
1868 /* Default, cookies without s_data_payload. */
1869 tp->cookie_values =
1870 kzalloc(sizeof(*tp->cookie_values),
1871 sk->sk_allocation);
1872 if (tp->cookie_values != NULL)
1873 kref_init(&tp->cookie_values->kref);
1874 }
1875 /* Presumed zeroed, in order of appearance:
1876 * cookie_in_always, cookie_out_never,
1877 * s_data_constant, s_data_in, s_data_out
1878 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1880 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1881
Herbert Xueb4dea52008-12-29 23:04:08 -08001882 local_bh_disable();
Eric Dumazet17483762008-11-25 21:16:35 -08001883 percpu_counter_inc(&tcp_sockets_allocated);
Herbert Xueb4dea52008-12-29 23:04:08 -08001884 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
1886 return 0;
1887}
1888
Brian Haley7d06b2e2008-06-14 17:04:49 -07001889void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890{
1891 struct tcp_sock *tp = tcp_sk(sk);
1892
1893 tcp_clear_xmit_timers(sk);
1894
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001895 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07001896
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08001898 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899
1900 /* Cleans up our, hopefully empty, out_of_order_queue. */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001901 __skb_queue_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001903#ifdef CONFIG_TCP_MD5SIG
1904 /* Clean up the MD5 key list, if any */
1905 if (tp->md5sig_info) {
1906 tcp_v4_clear_md5_list(sk);
1907 kfree(tp->md5sig_info);
1908 tp->md5sig_info = NULL;
1909 }
1910#endif
1911
Chris Leech1a2449a2006-05-23 18:05:53 -07001912#ifdef CONFIG_NET_DMA
1913 /* Cleans up our sk_async_wait_queue */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001914 __skb_queue_purge(&sk->sk_async_wait_queue);
Chris Leech1a2449a2006-05-23 18:05:53 -07001915#endif
1916
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 /* Clean prequeue, it must be empty really */
1918 __skb_queue_purge(&tp->ucopy.prequeue);
1919
1920 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001921 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001922 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923
1924 /*
1925 * If sendmsg cached page exists, toss it.
1926 */
1927 if (sk->sk_sndmsg_page) {
1928 __free_page(sk->sk_sndmsg_page);
1929 sk->sk_sndmsg_page = NULL;
1930 }
1931
William Allen Simpson435cf552009-12-02 18:17:05 +00001932 /* TCP Cookie Transactions */
1933 if (tp->cookie_values != NULL) {
1934 kref_put(&tp->cookie_values->kref,
1935 tcp_cookie_values_release);
1936 tp->cookie_values = NULL;
1937 }
1938
Eric Dumazet17483762008-11-25 21:16:35 -08001939 percpu_counter_dec(&tcp_sockets_allocated);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941EXPORT_SYMBOL(tcp_v4_destroy_sock);
1942
1943#ifdef CONFIG_PROC_FS
1944/* Proc filesystem TCP sock list dumping. */
1945
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001946static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001948 return hlist_nulls_empty(head) ? NULL :
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001949 list_entry(head->first, struct inet_timewait_sock, tw_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950}
1951
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001952static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001954 return !is_a_nulls(tw->tw_node.next) ?
1955 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956}
1957
Tom Herberta8b690f2010-06-07 00:43:42 -07001958/*
1959 * Get next listener socket follow cur. If cur is NULL, get first socket
1960 * starting from bucket given in st->bucket; when st->bucket is zero the
1961 * very first socket in the hash table is returned.
1962 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963static void *listening_get_next(struct seq_file *seq, void *cur)
1964{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001965 struct inet_connection_sock *icsk;
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001966 struct hlist_nulls_node *node;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 struct sock *sk = cur;
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001968 struct inet_listen_hashbucket *ilb;
Jianjun Kong5799de02008-11-03 02:49:10 -08001969 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001970 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971
1972 if (!sk) {
Tom Herberta8b690f2010-06-07 00:43:42 -07001973 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001974 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001975 sk = sk_nulls_head(&ilb->head);
Tom Herberta8b690f2010-06-07 00:43:42 -07001976 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977 goto get_sk;
1978 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001979 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07001981 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982
1983 if (st->state == TCP_SEQ_STATE_OPENREQ) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001984 struct request_sock *req = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001986 icsk = inet_csk(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987 req = req->dl_next;
1988 while (1) {
1989 while (req) {
Daniel Lezcanobdccc4c2008-07-19 00:15:13 -07001990 if (req->rsk_ops->family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 cur = req;
1992 goto out;
1993 }
1994 req = req->dl_next;
1995 }
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001996 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 break;
1998get_req:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001999 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 }
Eric Dumazet1bde5ac2010-12-23 09:32:46 -08002001 sk = sk_nulls_next(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 st->state = TCP_SEQ_STATE_LISTENING;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002003 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 } else {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002005 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002006 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2007 if (reqsk_queue_len(&icsk->icsk_accept_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 goto start_req;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002009 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Eric Dumazet1bde5ac2010-12-23 09:32:46 -08002010 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 }
2012get_sk:
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08002013 sk_nulls_for_each_from(sk, node) {
Pavel Emelyanov8475ef92010-11-22 03:26:12 +00002014 if (!net_eq(sock_net(sk), net))
2015 continue;
2016 if (sk->sk_family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017 cur = sk;
2018 goto out;
2019 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002020 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002021 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2022 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023start_req:
2024 st->uid = sock_i_uid(sk);
2025 st->syn_wait_sk = sk;
2026 st->state = TCP_SEQ_STATE_OPENREQ;
2027 st->sbucket = 0;
2028 goto get_req;
2029 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002030 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002031 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002032 spin_unlock_bh(&ilb->lock);
Tom Herberta8b690f2010-06-07 00:43:42 -07002033 st->offset = 0;
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07002034 if (++st->bucket < INET_LHTABLE_SIZE) {
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002035 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2036 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08002037 sk = sk_nulls_head(&ilb->head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038 goto get_sk;
2039 }
2040 cur = NULL;
2041out:
2042 return cur;
2043}
2044
2045static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2046{
Tom Herberta8b690f2010-06-07 00:43:42 -07002047 struct tcp_iter_state *st = seq->private;
2048 void *rc;
2049
2050 st->bucket = 0;
2051 st->offset = 0;
2052 rc = listening_get_next(seq, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053
2054 while (rc && *pos) {
2055 rc = listening_get_next(seq, rc);
2056 --*pos;
2057 }
2058 return rc;
2059}
2060
Andi Kleen6eac5602008-08-28 01:08:02 -07002061static inline int empty_bucket(struct tcp_iter_state *st)
2062{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002063 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2064 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
Andi Kleen6eac5602008-08-28 01:08:02 -07002065}
2066
Tom Herberta8b690f2010-06-07 00:43:42 -07002067/*
2068 * Get first established socket starting from bucket given in st->bucket.
2069 * If st->bucket is zero, the very first socket in the hash is returned.
2070 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071static void *established_get_first(struct seq_file *seq)
2072{
Jianjun Kong5799de02008-11-03 02:49:10 -08002073 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002074 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075 void *rc = NULL;
2076
Tom Herberta8b690f2010-06-07 00:43:42 -07002077 st->offset = 0;
2078 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002080 struct hlist_nulls_node *node;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002081 struct inet_timewait_sock *tw;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002082 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083
Andi Kleen6eac5602008-08-28 01:08:02 -07002084 /* Lockless fast path for the common case of empty buckets */
2085 if (empty_bucket(st))
2086 continue;
2087
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002088 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002089 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002090 if (sk->sk_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002091 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092 continue;
2093 }
2094 rc = sk;
2095 goto out;
2096 }
2097 st->state = TCP_SEQ_STATE_TIME_WAIT;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002098 inet_twsk_for_each(tw, node,
Eric Dumazetdbca9b2752007-02-08 14:16:46 -08002099 &tcp_hashinfo.ehash[st->bucket].twchain) {
Pavel Emelyanov28518fc2008-03-21 15:52:00 -07002100 if (tw->tw_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002101 !net_eq(twsk_net(tw), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102 continue;
2103 }
2104 rc = tw;
2105 goto out;
2106 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002107 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 st->state = TCP_SEQ_STATE_ESTABLISHED;
2109 }
2110out:
2111 return rc;
2112}
2113
2114static void *established_get_next(struct seq_file *seq, void *cur)
2115{
2116 struct sock *sk = cur;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002117 struct inet_timewait_sock *tw;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002118 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002119 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002120 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121
2122 ++st->num;
Tom Herberta8b690f2010-06-07 00:43:42 -07002123 ++st->offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124
2125 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2126 tw = cur;
2127 tw = tw_next(tw);
2128get_tw:
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002129 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130 tw = tw_next(tw);
2131 }
2132 if (tw) {
2133 cur = tw;
2134 goto out;
2135 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002136 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137 st->state = TCP_SEQ_STATE_ESTABLISHED;
2138
Andi Kleen6eac5602008-08-28 01:08:02 -07002139 /* Look for next non empty bucket */
Tom Herberta8b690f2010-06-07 00:43:42 -07002140 st->offset = 0;
Eric Dumazetf373b532009-10-09 00:16:19 +00002141 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
Andi Kleen6eac5602008-08-28 01:08:02 -07002142 empty_bucket(st))
2143 ;
Eric Dumazetf373b532009-10-09 00:16:19 +00002144 if (st->bucket > tcp_hashinfo.ehash_mask)
Andi Kleen6eac5602008-08-28 01:08:02 -07002145 return NULL;
2146
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002147 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002148 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 } else
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002150 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002152 sk_nulls_for_each_from(sk, node) {
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002153 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 goto found;
2155 }
2156
2157 st->state = TCP_SEQ_STATE_TIME_WAIT;
Eric Dumazetdbca9b2752007-02-08 14:16:46 -08002158 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159 goto get_tw;
2160found:
2161 cur = sk;
2162out:
2163 return cur;
2164}
2165
2166static void *established_get_idx(struct seq_file *seq, loff_t pos)
2167{
Tom Herberta8b690f2010-06-07 00:43:42 -07002168 struct tcp_iter_state *st = seq->private;
2169 void *rc;
2170
2171 st->bucket = 0;
2172 rc = established_get_first(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173
2174 while (rc && pos) {
2175 rc = established_get_next(seq, rc);
2176 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002177 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178 return rc;
2179}
2180
2181static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2182{
2183 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002184 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186 st->state = TCP_SEQ_STATE_LISTENING;
2187 rc = listening_get_idx(seq, &pos);
2188
2189 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 st->state = TCP_SEQ_STATE_ESTABLISHED;
2191 rc = established_get_idx(seq, pos);
2192 }
2193
2194 return rc;
2195}
2196
Tom Herberta8b690f2010-06-07 00:43:42 -07002197static void *tcp_seek_last_pos(struct seq_file *seq)
2198{
2199 struct tcp_iter_state *st = seq->private;
2200 int offset = st->offset;
2201 int orig_num = st->num;
2202 void *rc = NULL;
2203
2204 switch (st->state) {
2205 case TCP_SEQ_STATE_OPENREQ:
2206 case TCP_SEQ_STATE_LISTENING:
2207 if (st->bucket >= INET_LHTABLE_SIZE)
2208 break;
2209 st->state = TCP_SEQ_STATE_LISTENING;
2210 rc = listening_get_next(seq, NULL);
2211 while (offset-- && rc)
2212 rc = listening_get_next(seq, rc);
2213 if (rc)
2214 break;
2215 st->bucket = 0;
2216 /* Fallthrough */
2217 case TCP_SEQ_STATE_ESTABLISHED:
2218 case TCP_SEQ_STATE_TIME_WAIT:
2219 st->state = TCP_SEQ_STATE_ESTABLISHED;
2220 if (st->bucket > tcp_hashinfo.ehash_mask)
2221 break;
2222 rc = established_get_first(seq);
2223 while (offset-- && rc)
2224 rc = established_get_next(seq, rc);
2225 }
2226
2227 st->num = orig_num;
2228
2229 return rc;
2230}
2231
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2233{
Jianjun Kong5799de02008-11-03 02:49:10 -08002234 struct tcp_iter_state *st = seq->private;
Tom Herberta8b690f2010-06-07 00:43:42 -07002235 void *rc;
2236
2237 if (*pos && *pos == st->last_pos) {
2238 rc = tcp_seek_last_pos(seq);
2239 if (rc)
2240 goto out;
2241 }
2242
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 st->state = TCP_SEQ_STATE_LISTENING;
2244 st->num = 0;
Tom Herberta8b690f2010-06-07 00:43:42 -07002245 st->bucket = 0;
2246 st->offset = 0;
2247 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2248
2249out:
2250 st->last_pos = *pos;
2251 return rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252}
2253
2254static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2255{
Tom Herberta8b690f2010-06-07 00:43:42 -07002256 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 void *rc = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258
2259 if (v == SEQ_START_TOKEN) {
2260 rc = tcp_get_idx(seq, 0);
2261 goto out;
2262 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263
2264 switch (st->state) {
2265 case TCP_SEQ_STATE_OPENREQ:
2266 case TCP_SEQ_STATE_LISTENING:
2267 rc = listening_get_next(seq, v);
2268 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 st->state = TCP_SEQ_STATE_ESTABLISHED;
Tom Herberta8b690f2010-06-07 00:43:42 -07002270 st->bucket = 0;
2271 st->offset = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 rc = established_get_first(seq);
2273 }
2274 break;
2275 case TCP_SEQ_STATE_ESTABLISHED:
2276 case TCP_SEQ_STATE_TIME_WAIT:
2277 rc = established_get_next(seq, v);
2278 break;
2279 }
2280out:
2281 ++*pos;
Tom Herberta8b690f2010-06-07 00:43:42 -07002282 st->last_pos = *pos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 return rc;
2284}
2285
2286static void tcp_seq_stop(struct seq_file *seq, void *v)
2287{
Jianjun Kong5799de02008-11-03 02:49:10 -08002288 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289
2290 switch (st->state) {
2291 case TCP_SEQ_STATE_OPENREQ:
2292 if (v) {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002293 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2294 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 }
2296 case TCP_SEQ_STATE_LISTENING:
2297 if (v != SEQ_START_TOKEN)
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002298 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 break;
2300 case TCP_SEQ_STATE_TIME_WAIT:
2301 case TCP_SEQ_STATE_ESTABLISHED:
2302 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002303 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304 break;
2305 }
2306}
2307
2308static int tcp_seq_open(struct inode *inode, struct file *file)
2309{
2310 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 struct tcp_iter_state *s;
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002312 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002314 err = seq_open_net(inode, file, &afinfo->seq_ops,
2315 sizeof(struct tcp_iter_state));
2316 if (err < 0)
2317 return err;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002318
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002319 s = ((struct seq_file *)file->private_data)->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320 s->family = afinfo->family;
Tom Herberta8b690f2010-06-07 00:43:42 -07002321 s->last_pos = 0;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002322 return 0;
2323}
2324
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002325int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326{
2327 int rc = 0;
2328 struct proc_dir_entry *p;
2329
Denis V. Lunev68fcadd2008-04-13 22:13:30 -07002330 afinfo->seq_fops.open = tcp_seq_open;
2331 afinfo->seq_fops.read = seq_read;
2332 afinfo->seq_fops.llseek = seq_lseek;
2333 afinfo->seq_fops.release = seq_release_net;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002334
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002335 afinfo->seq_ops.start = tcp_seq_start;
2336 afinfo->seq_ops.next = tcp_seq_next;
2337 afinfo->seq_ops.stop = tcp_seq_stop;
2338
Denis V. Lunev84841c32008-05-02 04:10:08 -07002339 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2340 &afinfo->seq_fops, afinfo);
2341 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 rc = -ENOMEM;
2343 return rc;
2344}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002345EXPORT_SYMBOL(tcp_proc_register);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002347void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348{
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002349 proc_net_remove(net, afinfo->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350}
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002351EXPORT_SYMBOL(tcp_proc_unregister);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002353static void get_openreq4(struct sock *sk, struct request_sock *req,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002354 struct seq_file *f, int i, int uid, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002356 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357 int ttd = req->expires - jiffies;
2358
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002359 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2360 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 i,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002362 ireq->loc_addr,
Eric Dumazetc720c7e2009-10-15 06:30:45 +00002363 ntohs(inet_sk(sk)->inet_sport),
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002364 ireq->rmt_addr,
2365 ntohs(ireq->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 TCP_SYN_RECV,
2367 0, 0, /* could print option size, but that is af dependent. */
2368 1, /* timers active (only the expire timer) */
2369 jiffies_to_clock_t(ttd),
2370 req->retrans,
2371 uid,
2372 0, /* non standard timer */
2373 0, /* open_requests have no inode */
2374 atomic_read(&sk->sk_refcnt),
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002375 req,
2376 len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377}
2378
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002379static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380{
2381 int timer_active;
2382 unsigned long timer_expires;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002383 struct tcp_sock *tp = tcp_sk(sk);
2384 const struct inet_connection_sock *icsk = inet_csk(sk);
2385 struct inet_sock *inet = inet_sk(sk);
Eric Dumazetc720c7e2009-10-15 06:30:45 +00002386 __be32 dest = inet->inet_daddr;
2387 __be32 src = inet->inet_rcv_saddr;
2388 __u16 destp = ntohs(inet->inet_dport);
2389 __u16 srcp = ntohs(inet->inet_sport);
Eric Dumazet49d09002009-12-03 16:06:13 -08002390 int rx_queue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002392 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002394 timer_expires = icsk->icsk_timeout;
2395 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002396 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002397 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002398 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002400 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 } else {
2402 timer_active = 0;
2403 timer_expires = jiffies;
2404 }
2405
Eric Dumazet49d09002009-12-03 16:06:13 -08002406 if (sk->sk_state == TCP_LISTEN)
2407 rx_queue = sk->sk_ack_backlog;
2408 else
2409 /*
2410 * because we dont lock socket, we might find a transient negative value
2411 */
2412 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2413
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002414 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Stephen Hemminger7be87352008-06-27 20:00:19 -07002415 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002416 i, src, srcp, dest, destp, sk->sk_state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002417 tp->write_seq - tp->snd_una,
Eric Dumazet49d09002009-12-03 16:06:13 -08002418 rx_queue,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 timer_active,
2420 jiffies_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002421 icsk->icsk_retransmits,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002422 sock_i_uid(sk),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002423 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002424 sock_i_ino(sk),
2425 atomic_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002426 jiffies_to_clock_t(icsk->icsk_rto),
2427 jiffies_to_clock_t(icsk->icsk_ack.ato),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002428 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 tp->snd_cwnd,
Ilpo Järvinen0b6a05c2009-09-15 01:30:10 -07002430 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002431 len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432}
2433
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002434static void get_timewait4_sock(struct inet_timewait_sock *tw,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002435 struct seq_file *f, int i, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436{
Al Viro23f33c22006-09-27 18:43:50 -07002437 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 __u16 destp, srcp;
2439 int ttd = tw->tw_ttd - jiffies;
2440
2441 if (ttd < 0)
2442 ttd = 0;
2443
2444 dest = tw->tw_daddr;
2445 src = tw->tw_rcv_saddr;
2446 destp = ntohs(tw->tw_dport);
2447 srcp = ntohs(tw->tw_sport);
2448
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002449 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2450 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2452 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002453 atomic_read(&tw->tw_refcnt), tw, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454}
2455
2456#define TMPSZ 150
2457
2458static int tcp4_seq_show(struct seq_file *seq, void *v)
2459{
Jianjun Kong5799de02008-11-03 02:49:10 -08002460 struct tcp_iter_state *st;
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002461 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462
2463 if (v == SEQ_START_TOKEN) {
2464 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2465 " sl local_address rem_address st tx_queue "
2466 "rx_queue tr tm->when retrnsmt uid timeout "
2467 "inode");
2468 goto out;
2469 }
2470 st = seq->private;
2471
2472 switch (st->state) {
2473 case TCP_SEQ_STATE_LISTENING:
2474 case TCP_SEQ_STATE_ESTABLISHED:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002475 get_tcp4_sock(v, seq, st->num, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 break;
2477 case TCP_SEQ_STATE_OPENREQ:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002478 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 break;
2480 case TCP_SEQ_STATE_TIME_WAIT:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002481 get_timewait4_sock(v, seq, st->num, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 break;
2483 }
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002484 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485out:
2486 return 0;
2487}
2488
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 .name = "tcp",
2491 .family = AF_INET,
Denis V. Lunev5f4472c2008-04-13 22:13:53 -07002492 .seq_fops = {
2493 .owner = THIS_MODULE,
2494 },
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002495 .seq_ops = {
2496 .show = tcp4_seq_show,
2497 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498};
2499
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002500static int __net_init tcp4_proc_init_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002501{
2502 return tcp_proc_register(net, &tcp4_seq_afinfo);
2503}
2504
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002505static void __net_exit tcp4_proc_exit_net(struct net *net)
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002506{
2507 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2508}
2509
2510static struct pernet_operations tcp4_net_ops = {
2511 .init = tcp4_proc_init_net,
2512 .exit = tcp4_proc_exit_net,
2513};
2514
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515int __init tcp4_proc_init(void)
2516{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002517 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518}
2519
2520void tcp4_proc_exit(void)
2521{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002522 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523}
2524#endif /* CONFIG_PROC_FS */
2525
Herbert Xubf296b12008-12-15 23:43:36 -08002526struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2527{
Herbert Xu36e7b1b2009-04-27 05:44:45 -07002528 struct iphdr *iph = skb_gro_network_header(skb);
Herbert Xubf296b12008-12-15 23:43:36 -08002529
2530 switch (skb->ip_summed) {
2531 case CHECKSUM_COMPLETE:
Herbert Xu86911732009-01-29 14:19:50 +00002532 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
Herbert Xubf296b12008-12-15 23:43:36 -08002533 skb->csum)) {
2534 skb->ip_summed = CHECKSUM_UNNECESSARY;
2535 break;
2536 }
2537
2538 /* fall through */
2539 case CHECKSUM_NONE:
2540 NAPI_GRO_CB(skb)->flush = 1;
2541 return NULL;
2542 }
2543
2544 return tcp_gro_receive(head, skb);
2545}
Herbert Xubf296b12008-12-15 23:43:36 -08002546
2547int tcp4_gro_complete(struct sk_buff *skb)
2548{
2549 struct iphdr *iph = ip_hdr(skb);
2550 struct tcphdr *th = tcp_hdr(skb);
2551
2552 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2553 iph->saddr, iph->daddr, 0);
2554 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2555
2556 return tcp_gro_complete(skb);
2557}
Herbert Xubf296b12008-12-15 23:43:36 -08002558
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559struct proto tcp_prot = {
2560 .name = "TCP",
2561 .owner = THIS_MODULE,
2562 .close = tcp_close,
2563 .connect = tcp_v4_connect,
2564 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002565 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 .ioctl = tcp_ioctl,
2567 .init = tcp_v4_init_sock,
2568 .destroy = tcp_v4_destroy_sock,
2569 .shutdown = tcp_shutdown,
2570 .setsockopt = tcp_setsockopt,
2571 .getsockopt = tcp_getsockopt,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 .recvmsg = tcp_recvmsg,
Changli Gao7ba42912010-07-10 20:41:55 +00002573 .sendmsg = tcp_sendmsg,
2574 .sendpage = tcp_sendpage,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575 .backlog_rcv = tcp_v4_do_rcv,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002576 .hash = inet_hash,
2577 .unhash = inet_unhash,
2578 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 .enter_memory_pressure = tcp_enter_memory_pressure,
2580 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002581 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582 .memory_allocated = &tcp_memory_allocated,
2583 .memory_pressure = &tcp_memory_pressure,
2584 .sysctl_mem = sysctl_tcp_mem,
2585 .sysctl_wmem = sysctl_tcp_wmem,
2586 .sysctl_rmem = sysctl_tcp_rmem,
2587 .max_header = MAX_TCP_HEADER,
2588 .obj_size = sizeof(struct tcp_sock),
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002589 .slab_flags = SLAB_DESTROY_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002590 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002591 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002592 .h.hashinfo = &tcp_hashinfo,
Changli Gao7ba42912010-07-10 20:41:55 +00002593 .no_autobind = true,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002594#ifdef CONFIG_COMPAT
2595 .compat_setsockopt = compat_tcp_setsockopt,
2596 .compat_getsockopt = compat_tcp_getsockopt,
2597#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598};
Eric Dumazet4bc2f182010-07-09 21:22:10 +00002599EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600
Denis V. Lunev046ee902008-04-03 14:31:33 -07002601
2602static int __net_init tcp_sk_init(struct net *net)
2603{
2604 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2605 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2606}
2607
2608static void __net_exit tcp_sk_exit(struct net *net)
2609{
2610 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002611}
2612
2613static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2614{
2615 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002616}
2617
2618static struct pernet_operations __net_initdata tcp_sk_ops = {
Eric W. Biedermanb099ce22009-12-03 02:29:09 +00002619 .init = tcp_sk_init,
2620 .exit = tcp_sk_exit,
2621 .exit_batch = tcp_sk_exit_batch,
Denis V. Lunev046ee902008-04-03 14:31:33 -07002622};
2623
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002624void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625{
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002626 inet_hashinfo_init(&tcp_hashinfo);
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002627 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629}