blob: 6728772a943ab8a6699a7a05156e942f9635d958 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9 *
10 * IPv4 specific functions
11 *
12 *
13 * code split from:
14 * linux/ipv4/tcp.c
15 * linux/ipv4/tcp_input.c
16 * linux/ipv4/tcp_output.c
17 *
18 * See tcp.c for author information
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 */
25
26/*
27 * Changes:
28 * David S. Miller : New socket lookup architecture.
29 * This code is dedicated to John Dyson.
30 * David S. Miller : Change semantics of established hash,
31 * half is devoted to TIME_WAIT sockets
32 * and the rest go in the other half.
33 * Andi Kleen : Add support for syncookies and fixed
34 * some bugs: ip options weren't passed to
35 * the TCP layer, missed a check for an
36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070039 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080042 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 * Mike McLagan : Routing by source
44 * Juan Jose Ciarlante: ip_dynaddr bits
45 * Andi Kleen: various fixes.
46 * Vitaly E. Lavrov : Transparent proxy revived after year
47 * coma.
48 * Andi Kleen : Fix new listen.
49 * Andi Kleen : Fix accept error reporting.
50 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
51 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
52 * a single port at the same time.
53 */
54
55#include <linux/config.h>
56
57#include <linux/types.h>
58#include <linux/fcntl.h>
59#include <linux/module.h>
60#include <linux/random.h>
61#include <linux/cache.h>
62#include <linux/jhash.h>
63#include <linux/init.h>
64#include <linux/times.h>
65
66#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070067#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030069#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070070#include <net/ipv6.h>
71#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080072#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070073#include <net/xfrm.h>
74
75#include <linux/inet.h>
76#include <linux/ipv6.h>
77#include <linux/stddef.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80
Linus Torvalds1da177e2005-04-16 15:20:36 -070081int sysctl_tcp_tw_reuse;
82int sysctl_tcp_low_latency;
83
84/* Check TCP sequence numbers in ICMP packets. */
85#define ICMP_MIN_LENGTH 8
86
87/* Socket used for sending RSTs */
88static struct socket *tcp_socket;
89
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -080090void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -070091
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -070092struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
93 .lhash_lock = RW_LOCK_UNLOCKED,
94 .lhash_users = ATOMIC_INIT(0),
95 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
Linus Torvalds1da177e2005-04-16 15:20:36 -070096};
97
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -070098static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
99{
Arnaldo Carvalho de Melo971af182005-12-13 23:14:47 -0800100 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
101 inet_csk_bind_conflict);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700102}
103
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104static void tcp_v4_hash(struct sock *sk)
105{
Arnaldo Carvalho de Melo81849d12005-08-09 20:08:50 -0700106 inet_hash(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107}
108
109void tcp_unhash(struct sock *sk)
110{
Arnaldo Carvalho de Melo81849d12005-08-09 20:08:50 -0700111 inet_unhash(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112}
113
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
115{
116 return secure_tcp_sequence_number(skb->nh.iph->daddr,
117 skb->nh.iph->saddr,
118 skb->h.th->dest,
119 skb->h.th->source);
120}
121
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800122int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
123{
124 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
125 struct tcp_sock *tp = tcp_sk(sk);
126
127 /* With PAWS, it is safe from the viewpoint
128 of data integrity. Even without PAWS it is safe provided sequence
129 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
130
131 Actually, the idea is close to VJ's one, only timestamp cache is
132 held not per host, but per port pair and TW bucket is used as state
133 holder.
134
135 If TW bucket has been already destroyed we fall back to VJ's scheme
136 and use initial timestamp retrieved from peer table.
137 */
138 if (tcptw->tw_ts_recent_stamp &&
139 (twp == NULL || (sysctl_tcp_tw_reuse &&
140 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
141 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
142 if (tp->write_seq == 0)
143 tp->write_seq = 1;
144 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
145 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
146 sock_hold(sktw);
147 return 1;
148 }
149
150 return 0;
151}
152
153EXPORT_SYMBOL_GPL(tcp_twsk_unique);
154
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155/* called with local bh disabled */
156static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700157 struct inet_timewait_sock **twp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158{
159 struct inet_sock *inet = inet_sk(sk);
160 u32 daddr = inet->rcv_saddr;
161 u32 saddr = inet->daddr;
162 int dif = sk->sk_bound_dev_if;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700163 INET_ADDR_COOKIE(acookie, saddr, daddr)
164 const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
Eric Dumazet81c3d542005-10-03 14:13:38 -0700165 unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
166 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 struct sock *sk2;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700168 const struct hlist_node *node;
169 struct inet_timewait_sock *tw;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170
Eric Dumazet81c3d542005-10-03 14:13:38 -0700171 prefetch(head->chain.first);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 write_lock(&head->lock);
173
174 /* Check TIME-WAIT sockets first. */
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700175 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700176 tw = inet_twsk(sk2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Eric Dumazet81c3d542005-10-03 14:13:38 -0700178 if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800179 if (twsk_unique(sk, sk2, twp))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 goto unique;
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800181 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 goto not_unique;
183 }
184 }
185 tw = NULL;
186
187 /* And established part... */
188 sk_for_each(sk2, node, &head->chain) {
Eric Dumazet81c3d542005-10-03 14:13:38 -0700189 if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 goto not_unique;
191 }
192
193unique:
194 /* Must record num and sport now. Otherwise we will see
195 * in hash table socket with a funny identity. */
196 inet->num = lport;
197 inet->sport = htons(lport);
Eric Dumazet81c3d542005-10-03 14:13:38 -0700198 sk->sk_hash = hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 BUG_TRAP(sk_unhashed(sk));
200 __sk_add_node(sk, &head->chain);
201 sock_prot_inc_use(sk->sk_prot);
202 write_unlock(&head->lock);
203
204 if (twp) {
205 *twp = tw;
206 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
207 } else if (tw) {
208 /* Silly. Should hash-dance instead... */
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700209 inet_twsk_deschedule(tw, &tcp_death_row);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
211
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700212 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 }
214
215 return 0;
216
217not_unique:
218 write_unlock(&head->lock);
219 return -EADDRNOTAVAIL;
220}
221
222static inline u32 connect_port_offset(const struct sock *sk)
223{
224 const struct inet_sock *inet = inet_sk(sk);
225
226 return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr,
227 inet->dport);
228}
229
230/*
231 * Bind a port for a connect operation and hash it.
232 */
233static inline int tcp_v4_hash_connect(struct sock *sk)
234{
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700235 const unsigned short snum = inet_sk(sk)->num;
236 struct inet_bind_hashbucket *head;
237 struct inet_bind_bucket *tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 int ret;
239
240 if (!snum) {
241 int low = sysctl_local_port_range[0];
242 int high = sysctl_local_port_range[1];
243 int range = high - low;
244 int i;
245 int port;
246 static u32 hint;
247 u32 offset = hint + connect_port_offset(sk);
248 struct hlist_node *node;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700249 struct inet_timewait_sock *tw = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
251 local_bh_disable();
252 for (i = 1; i <= range; i++) {
253 port = low + (i + offset) % range;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700254 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 spin_lock(&head->lock);
256
257 /* Does not bother with rcv_saddr checks,
258 * because the established check is already
259 * unique enough.
260 */
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700261 inet_bind_bucket_for_each(tb, node, &head->chain) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 if (tb->port == port) {
263 BUG_TRAP(!hlist_empty(&tb->owners));
264 if (tb->fastreuse >= 0)
265 goto next_port;
266 if (!__tcp_v4_check_established(sk,
267 port,
268 &tw))
269 goto ok;
270 goto next_port;
271 }
272 }
273
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700274 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (!tb) {
276 spin_unlock(&head->lock);
277 break;
278 }
279 tb->fastreuse = -1;
280 goto ok;
281
282 next_port:
283 spin_unlock(&head->lock);
284 }
285 local_bh_enable();
286
287 return -EADDRNOTAVAIL;
288
289ok:
290 hint += i;
291
292 /* Head lock still held and bh's disabled */
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -0700293 inet_bind_hash(sk, tb, port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 if (sk_unhashed(sk)) {
295 inet_sk(sk)->sport = htons(port);
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -0700296 __inet_hash(&tcp_hashinfo, sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 }
298 spin_unlock(&head->lock);
299
300 if (tw) {
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700301 inet_twsk_deschedule(tw, &tcp_death_row);;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700302 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 }
304
305 ret = 0;
306 goto out;
307 }
308
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700309 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700310 tb = inet_csk(sk)->icsk_bind_hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 spin_lock_bh(&head->lock);
312 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -0700313 __inet_hash(&tcp_hashinfo, sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 spin_unlock_bh(&head->lock);
315 return 0;
316 } else {
317 spin_unlock(&head->lock);
318 /* No definite answer... Walk to established hash table */
319 ret = __tcp_v4_check_established(sk, snum, NULL);
320out:
321 local_bh_enable();
322 return ret;
323 }
324}
325
326/* This will initiate an outgoing connection. */
327int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
328{
329 struct inet_sock *inet = inet_sk(sk);
330 struct tcp_sock *tp = tcp_sk(sk);
331 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
332 struct rtable *rt;
333 u32 daddr, nexthop;
334 int tmp;
335 int err;
336
337 if (addr_len < sizeof(struct sockaddr_in))
338 return -EINVAL;
339
340 if (usin->sin_family != AF_INET)
341 return -EAFNOSUPPORT;
342
343 nexthop = daddr = usin->sin_addr.s_addr;
344 if (inet->opt && inet->opt->srr) {
345 if (!daddr)
346 return -EINVAL;
347 nexthop = inet->opt->faddr;
348 }
349
350 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
351 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
352 IPPROTO_TCP,
353 inet->sport, usin->sin_port, sk);
354 if (tmp < 0)
355 return tmp;
356
357 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
358 ip_rt_put(rt);
359 return -ENETUNREACH;
360 }
361
362 if (!inet->opt || !inet->opt->srr)
363 daddr = rt->rt_dst;
364
365 if (!inet->saddr)
366 inet->saddr = rt->rt_src;
367 inet->rcv_saddr = inet->saddr;
368
369 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
370 /* Reset inherited state */
371 tp->rx_opt.ts_recent = 0;
372 tp->rx_opt.ts_recent_stamp = 0;
373 tp->write_seq = 0;
374 }
375
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700376 if (tcp_death_row.sysctl_tw_recycle &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
378 struct inet_peer *peer = rt_get_peer(rt);
379
380 /* VJ's idea. We save last timestamp seen from
381 * the destination in peer table, when entering state TIME-WAIT
382 * and initialize rx_opt.ts_recent from it, when trying new connection.
383 */
384
385 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
386 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
387 tp->rx_opt.ts_recent = peer->tcp_ts;
388 }
389 }
390
391 inet->dport = usin->sin_port;
392 inet->daddr = daddr;
393
394 tp->ext_header_len = 0;
395 if (inet->opt)
396 tp->ext_header_len = inet->opt->optlen;
397
398 tp->rx_opt.mss_clamp = 536;
399
400 /* Socket identity is still unknown (sport may be zero).
401 * However we set state to SYN-SENT and not releasing socket
402 * lock select source port, enter ourselves into the hash tables and
403 * complete initialization after this.
404 */
405 tcp_set_state(sk, TCP_SYN_SENT);
406 err = tcp_v4_hash_connect(sk);
407 if (err)
408 goto failure;
409
410 err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
411 if (err)
412 goto failure;
413
414 /* OK, now commit destination to socket. */
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -0700415 sk_setup_caps(sk, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
417 if (!tp->write_seq)
418 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
419 inet->daddr,
420 inet->sport,
421 usin->sin_port);
422
423 inet->id = tp->write_seq ^ jiffies;
424
425 err = tcp_connect(sk);
426 rt = NULL;
427 if (err)
428 goto failure;
429
430 return 0;
431
432failure:
433 /* This unhashes the socket and releases the local port, if necessary. */
434 tcp_set_state(sk, TCP_CLOSE);
435 ip_rt_put(rt);
436 sk->sk_route_caps = 0;
437 inet->dport = 0;
438 return err;
439}
440
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441/*
442 * This routine does path mtu discovery as defined in RFC1191.
443 */
444static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
445 u32 mtu)
446{
447 struct dst_entry *dst;
448 struct inet_sock *inet = inet_sk(sk);
449 struct tcp_sock *tp = tcp_sk(sk);
450
451 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
452 * send out by Linux are always <576bytes so they should go through
453 * unfragmented).
454 */
455 if (sk->sk_state == TCP_LISTEN)
456 return;
457
458 /* We don't check in the destentry if pmtu discovery is forbidden
459 * on this route. We just assume that no packet_to_big packets
460 * are send back when pmtu discovery is not active.
461 * There is a small race when the user changes this flag in the
462 * route, but I think that's acceptable.
463 */
464 if ((dst = __sk_dst_check(sk, 0)) == NULL)
465 return;
466
467 dst->ops->update_pmtu(dst, mtu);
468
469 /* Something is about to be wrong... Remember soft error
470 * for the case, if this connection will not able to recover.
471 */
472 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
473 sk->sk_err_soft = EMSGSIZE;
474
475 mtu = dst_mtu(dst);
476
477 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
478 tp->pmtu_cookie > mtu) {
479 tcp_sync_mss(sk, mtu);
480
481 /* Resend the TCP packet because it's
482 * clear that the old packet has been
483 * dropped. This is the new "fast" path mtu
484 * discovery.
485 */
486 tcp_simple_retransmit(sk);
487 } /* else let the usual retransmit timer handle it */
488}
489
490/*
491 * This routine is called by the ICMP module when it gets some
492 * sort of error condition. If err < 0 then the socket should
493 * be closed and the error returned to the user. If err > 0
494 * it's just the icmp type << 8 | icmp code. After adjustment
495 * header points to the first 8 bytes of the tcp header. We need
496 * to find the appropriate port.
497 *
498 * The locking strategy used here is very "optimistic". When
499 * someone else accesses the socket the ICMP is just dropped
500 * and for some paths there is no check at all.
501 * A more general error queue to queue errors for later handling
502 * is probably better.
503 *
504 */
505
506void tcp_v4_err(struct sk_buff *skb, u32 info)
507{
508 struct iphdr *iph = (struct iphdr *)skb->data;
509 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
510 struct tcp_sock *tp;
511 struct inet_sock *inet;
512 int type = skb->h.icmph->type;
513 int code = skb->h.icmph->code;
514 struct sock *sk;
515 __u32 seq;
516 int err;
517
518 if (skb->len < (iph->ihl << 2) + 8) {
519 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
520 return;
521 }
522
Arnaldo Carvalho de Meloe48c4142005-08-09 20:09:46 -0700523 sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700524 th->source, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 if (!sk) {
526 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
527 return;
528 }
529 if (sk->sk_state == TCP_TIME_WAIT) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700530 inet_twsk_put((struct inet_timewait_sock *)sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 return;
532 }
533
534 bh_lock_sock(sk);
535 /* If too many ICMPs get dropped on busy
536 * servers this needs to be solved differently.
537 */
538 if (sock_owned_by_user(sk))
539 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
540
541 if (sk->sk_state == TCP_CLOSE)
542 goto out;
543
544 tp = tcp_sk(sk);
545 seq = ntohl(th->seq);
546 if (sk->sk_state != TCP_LISTEN &&
547 !between(seq, tp->snd_una, tp->snd_nxt)) {
548 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
549 goto out;
550 }
551
552 switch (type) {
553 case ICMP_SOURCE_QUENCH:
554 /* Just silently ignore these. */
555 goto out;
556 case ICMP_PARAMETERPROB:
557 err = EPROTO;
558 break;
559 case ICMP_DEST_UNREACH:
560 if (code > NR_ICMP_UNREACH)
561 goto out;
562
563 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
564 if (!sock_owned_by_user(sk))
565 do_pmtu_discovery(sk, iph, info);
566 goto out;
567 }
568
569 err = icmp_err_convert[code].errno;
570 break;
571 case ICMP_TIME_EXCEEDED:
572 err = EHOSTUNREACH;
573 break;
574 default:
575 goto out;
576 }
577
578 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700579 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 case TCP_LISTEN:
581 if (sock_owned_by_user(sk))
582 goto out;
583
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700584 req = inet_csk_search_req(sk, &prev, th->dest,
585 iph->daddr, iph->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 if (!req)
587 goto out;
588
589 /* ICMPs are not backlogged, hence we cannot get
590 an established socket here.
591 */
592 BUG_TRAP(!req->sk);
593
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700594 if (seq != tcp_rsk(req)->snt_isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
596 goto out;
597 }
598
599 /*
600 * Still in SYN_RECV, just remove it silently.
601 * There is no good way to pass the error to the newly
602 * created socket, and POSIX does not want network
603 * errors returned from accept().
604 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700605 inet_csk_reqsk_queue_drop(sk, req, prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 goto out;
607
608 case TCP_SYN_SENT:
609 case TCP_SYN_RECV: /* Cannot happen.
610 It can f.e. if SYNs crossed.
611 */
612 if (!sock_owned_by_user(sk)) {
613 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
614 sk->sk_err = err;
615
616 sk->sk_error_report(sk);
617
618 tcp_done(sk);
619 } else {
620 sk->sk_err_soft = err;
621 }
622 goto out;
623 }
624
625 /* If we've already connected we will keep trying
626 * until we time out, or the user gives up.
627 *
628 * rfc1122 4.2.3.9 allows to consider as hard errors
629 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
630 * but it is obsoleted by pmtu discovery).
631 *
632 * Note, that in modern internet, where routing is unreliable
633 * and in each dark corner broken firewalls sit, sending random
634 * errors ordered by their masters even this two messages finally lose
635 * their original sense (even Linux sends invalid PORT_UNREACHs)
636 *
637 * Now we are in compliance with RFCs.
638 * --ANK (980905)
639 */
640
641 inet = inet_sk(sk);
642 if (!sock_owned_by_user(sk) && inet->recverr) {
643 sk->sk_err = err;
644 sk->sk_error_report(sk);
645 } else { /* Only an error on timeout */
646 sk->sk_err_soft = err;
647 }
648
649out:
650 bh_unlock_sock(sk);
651 sock_put(sk);
652}
653
654/* This routine computes an IPv4 TCP checksum. */
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -0800655void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656{
657 struct inet_sock *inet = inet_sk(sk);
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -0800658 struct tcphdr *th = skb->h.th;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659
660 if (skb->ip_summed == CHECKSUM_HW) {
661 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
662 skb->csum = offsetof(struct tcphdr, check);
663 } else {
664 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
665 csum_partial((char *)th,
666 th->doff << 2,
667 skb->csum));
668 }
669}
670
671/*
672 * This routine will send an RST to the other tcp.
673 *
674 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
675 * for reset.
676 * Answer: if a packet caused RST, it is not for a socket
677 * existing in our system, if it is matched to a socket,
678 * it is just duplicate segment or bug in other side's TCP.
679 * So that we build reply only basing on parameters
680 * arrived with segment.
681 * Exception: precedence violation. We do not implement it in any case.
682 */
683
684static void tcp_v4_send_reset(struct sk_buff *skb)
685{
686 struct tcphdr *th = skb->h.th;
687 struct tcphdr rth;
688 struct ip_reply_arg arg;
689
690 /* Never send a reset in response to a reset. */
691 if (th->rst)
692 return;
693
694 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
695 return;
696
697 /* Swap the send and the receive. */
698 memset(&rth, 0, sizeof(struct tcphdr));
699 rth.dest = th->source;
700 rth.source = th->dest;
701 rth.doff = sizeof(struct tcphdr) / 4;
702 rth.rst = 1;
703
704 if (th->ack) {
705 rth.seq = th->ack_seq;
706 } else {
707 rth.ack = 1;
708 rth.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
709 skb->len - (th->doff << 2));
710 }
711
712 memset(&arg, 0, sizeof arg);
713 arg.iov[0].iov_base = (unsigned char *)&rth;
714 arg.iov[0].iov_len = sizeof rth;
715 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
716 skb->nh.iph->saddr, /*XXX*/
717 sizeof(struct tcphdr), IPPROTO_TCP, 0);
718 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
719
720 ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
721
722 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
723 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
724}
725
726/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
727 outside socket context is ugly, certainly. What can I do?
728 */
729
730static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
731 u32 win, u32 ts)
732{
733 struct tcphdr *th = skb->h.th;
734 struct {
735 struct tcphdr th;
736 u32 tsopt[3];
737 } rep;
738 struct ip_reply_arg arg;
739
740 memset(&rep.th, 0, sizeof(struct tcphdr));
741 memset(&arg, 0, sizeof arg);
742
743 arg.iov[0].iov_base = (unsigned char *)&rep;
744 arg.iov[0].iov_len = sizeof(rep.th);
745 if (ts) {
746 rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
747 (TCPOPT_TIMESTAMP << 8) |
748 TCPOLEN_TIMESTAMP);
749 rep.tsopt[1] = htonl(tcp_time_stamp);
750 rep.tsopt[2] = htonl(ts);
751 arg.iov[0].iov_len = sizeof(rep);
752 }
753
754 /* Swap the send and the receive. */
755 rep.th.dest = th->source;
756 rep.th.source = th->dest;
757 rep.th.doff = arg.iov[0].iov_len / 4;
758 rep.th.seq = htonl(seq);
759 rep.th.ack_seq = htonl(ack);
760 rep.th.ack = 1;
761 rep.th.window = htons(win);
762
763 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
764 skb->nh.iph->saddr, /*XXX*/
765 arg.iov[0].iov_len, IPPROTO_TCP, 0);
766 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
767
768 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
769
770 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
771}
772
773static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
774{
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700775 struct inet_timewait_sock *tw = inet_twsk(sk);
776 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700778 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
779 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700781 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782}
783
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700784static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700786 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 req->ts_recent);
788}
789
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790/*
791 * Send a SYN-ACK after having received an ACK.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700792 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 * socket.
794 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700795static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 struct dst_entry *dst)
797{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700798 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 int err = -1;
800 struct sk_buff * skb;
801
802 /* First, grab a route. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700803 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 goto out;
805
806 skb = tcp_make_synack(sk, dst, req);
807
808 if (skb) {
809 struct tcphdr *th = skb->h.th;
810
811 th->check = tcp_v4_check(th, skb->len,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700812 ireq->loc_addr,
813 ireq->rmt_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 csum_partial((char *)th, skb->len,
815 skb->csum));
816
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700817 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
818 ireq->rmt_addr,
819 ireq->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 if (err == NET_XMIT_CN)
821 err = 0;
822 }
823
824out:
825 dst_release(dst);
826 return err;
827}
828
829/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700830 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700832static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833{
Jesper Juhla51482b2005-11-08 09:41:34 -0800834 kfree(inet_rsk(req)->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835}
836
837static inline void syn_flood_warning(struct sk_buff *skb)
838{
839 static unsigned long warntime;
840
841 if (time_after(jiffies, (warntime + HZ * 60))) {
842 warntime = jiffies;
843 printk(KERN_INFO
844 "possible SYN flooding on port %d. Sending cookies.\n",
845 ntohs(skb->h.th->dest));
846 }
847}
848
849/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700850 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 */
852static inline struct ip_options *tcp_v4_save_options(struct sock *sk,
853 struct sk_buff *skb)
854{
855 struct ip_options *opt = &(IPCB(skb)->opt);
856 struct ip_options *dopt = NULL;
857
858 if (opt && opt->optlen) {
859 int opt_size = optlength(opt);
860 dopt = kmalloc(opt_size, GFP_ATOMIC);
861 if (dopt) {
862 if (ip_options_echo(dopt, skb)) {
863 kfree(dopt);
864 dopt = NULL;
865 }
866 }
867 }
868 return dopt;
869}
870
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700871struct request_sock_ops tcp_request_sock_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700873 .obj_size = sizeof(struct tcp_request_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 .rtx_syn_ack = tcp_v4_send_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700875 .send_ack = tcp_v4_reqsk_send_ack,
876 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 .send_reset = tcp_v4_send_reset,
878};
879
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800880static struct timewait_sock_ops tcp_timewait_sock_ops = {
881 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
882 .twsk_unique = tcp_twsk_unique,
883};
884
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
886{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700887 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 struct tcp_options_received tmp_opt;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700889 struct request_sock *req;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 __u32 saddr = skb->nh.iph->saddr;
891 __u32 daddr = skb->nh.iph->daddr;
892 __u32 isn = TCP_SKB_CB(skb)->when;
893 struct dst_entry *dst = NULL;
894#ifdef CONFIG_SYN_COOKIES
895 int want_cookie = 0;
896#else
897#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
898#endif
899
900 /* Never answer to SYNs send to broadcast or multicast */
901 if (((struct rtable *)skb->dst)->rt_flags &
902 (RTCF_BROADCAST | RTCF_MULTICAST))
903 goto drop;
904
905 /* TW buckets are converted to open requests without
906 * limitations, they conserve resources and peer is
907 * evidently real one.
908 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700909 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910#ifdef CONFIG_SYN_COOKIES
911 if (sysctl_tcp_syncookies) {
912 want_cookie = 1;
913 } else
914#endif
915 goto drop;
916 }
917
918 /* Accept backlog is full. If we have already queued enough
919 * of warm entries in syn queue, drop request. It is better than
920 * clogging syn queue with openreqs with exponentially increasing
921 * timeout.
922 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700923 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 goto drop;
925
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700926 req = reqsk_alloc(&tcp_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 if (!req)
928 goto drop;
929
930 tcp_clear_options(&tmp_opt);
931 tmp_opt.mss_clamp = 536;
932 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
933
934 tcp_parse_options(skb, &tmp_opt, 0);
935
936 if (want_cookie) {
937 tcp_clear_options(&tmp_opt);
938 tmp_opt.saw_tstamp = 0;
939 }
940
941 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
942 /* Some OSes (unknown ones, but I see them on web server, which
943 * contains information interesting only for windows'
944 * users) do not send their stamp in SYN. It is easy case.
945 * We simply do not advertise TS support.
946 */
947 tmp_opt.saw_tstamp = 0;
948 tmp_opt.tstamp_ok = 0;
949 }
950 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
951
952 tcp_openreq_init(req, &tmp_opt, skb);
953
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700954 ireq = inet_rsk(req);
955 ireq->loc_addr = daddr;
956 ireq->rmt_addr = saddr;
957 ireq->opt = tcp_v4_save_options(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 if (!want_cookie)
959 TCP_ECN_create_request(req, skb->h.th);
960
961 if (want_cookie) {
962#ifdef CONFIG_SYN_COOKIES
963 syn_flood_warning(skb);
964#endif
965 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
966 } else if (!isn) {
967 struct inet_peer *peer = NULL;
968
969 /* VJ's idea. We save last timestamp seen
970 * from the destination in peer table, when entering
971 * state TIME-WAIT, and check against it before
972 * accepting new connection request.
973 *
974 * If "isn" is not zero, this request hit alive
975 * timewait bucket, so that all the necessary checks
976 * are made in the function processing timewait state.
977 */
978 if (tmp_opt.saw_tstamp &&
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700979 tcp_death_row.sysctl_tw_recycle &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700980 (dst = inet_csk_route_req(sk, req)) != NULL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
982 peer->v4daddr == saddr) {
983 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
984 (s32)(peer->tcp_ts - req->ts_recent) >
985 TCP_PAWS_WINDOW) {
986 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
987 dst_release(dst);
988 goto drop_and_free;
989 }
990 }
991 /* Kill the following clause, if you dislike this way. */
992 else if (!sysctl_tcp_syncookies &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700993 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 (sysctl_max_syn_backlog >> 2)) &&
995 (!peer || !peer->tcp_ts_stamp) &&
996 (!dst || !dst_metric(dst, RTAX_RTT))) {
997 /* Without syncookies last quarter of
998 * backlog is filled with destinations,
999 * proven to be alive.
1000 * It means that we continue to communicate
1001 * to destinations, already remembered
1002 * to the moment of synflood.
1003 */
Patrick McHardy64ce2072005-08-09 20:50:53 -07001004 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1005 "request from %u.%u.%u.%u/%u\n",
1006 NIPQUAD(saddr),
1007 ntohs(skb->h.th->source));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 dst_release(dst);
1009 goto drop_and_free;
1010 }
1011
1012 isn = tcp_v4_init_sequence(sk, skb);
1013 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001014 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015
1016 if (tcp_v4_send_synack(sk, req, dst))
1017 goto drop_and_free;
1018
1019 if (want_cookie) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001020 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 } else {
Arnaldo Carvalho de Melo3f421ba2005-08-09 20:11:08 -07001022 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 }
1024 return 0;
1025
1026drop_and_free:
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001027 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028drop:
1029 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1030 return 0;
1031}
1032
1033
1034/*
1035 * The three way handshake has completed - we got a valid synack -
1036 * now create the new socket.
1037 */
1038struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001039 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 struct dst_entry *dst)
1041{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001042 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 struct inet_sock *newinet;
1044 struct tcp_sock *newtp;
1045 struct sock *newsk;
1046
1047 if (sk_acceptq_is_full(sk))
1048 goto exit_overflow;
1049
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001050 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051 goto exit;
1052
1053 newsk = tcp_create_openreq_child(sk, req, skb);
1054 if (!newsk)
1055 goto exit;
1056
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -07001057 sk_setup_caps(newsk, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058
1059 newtp = tcp_sk(newsk);
1060 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001061 ireq = inet_rsk(req);
1062 newinet->daddr = ireq->rmt_addr;
1063 newinet->rcv_saddr = ireq->loc_addr;
1064 newinet->saddr = ireq->loc_addr;
1065 newinet->opt = ireq->opt;
1066 ireq->opt = NULL;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001067 newinet->mc_index = inet_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 newinet->mc_ttl = skb->nh.iph->ttl;
1069 newtp->ext_header_len = 0;
1070 if (newinet->opt)
1071 newtp->ext_header_len = newinet->opt->optlen;
1072 newinet->id = newtp->write_seq ^ jiffies;
1073
1074 tcp_sync_mss(newsk, dst_mtu(dst));
1075 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1076 tcp_initialize_rcv_mss(newsk);
1077
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07001078 __inet_hash(&tcp_hashinfo, newsk, 0);
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -07001079 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080
1081 return newsk;
1082
1083exit_overflow:
1084 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1085exit:
1086 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1087 dst_release(dst);
1088 return NULL;
1089}
1090
1091static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1092{
1093 struct tcphdr *th = skb->h.th;
1094 struct iphdr *iph = skb->nh.iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 struct sock *nsk;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001096 struct request_sock **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001098 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1099 iph->saddr, iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 if (req)
1101 return tcp_check_req(sk, skb, req, prev);
1102
Arnaldo Carvalho de Meloe48c4142005-08-09 20:09:46 -07001103 nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1104 th->source, skb->nh.iph->daddr,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001105 ntohs(th->dest), inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106
1107 if (nsk) {
1108 if (nsk->sk_state != TCP_TIME_WAIT) {
1109 bh_lock_sock(nsk);
1110 return nsk;
1111 }
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001112 inet_twsk_put((struct inet_timewait_sock *)nsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 return NULL;
1114 }
1115
1116#ifdef CONFIG_SYN_COOKIES
1117 if (!th->rst && !th->syn && th->ack)
1118 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1119#endif
1120 return sk;
1121}
1122
1123static int tcp_v4_checksum_init(struct sk_buff *skb)
1124{
1125 if (skb->ip_summed == CHECKSUM_HW) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
Herbert Xufb286bb2005-11-10 13:01:24 -08001127 skb->nh.iph->daddr, skb->csum)) {
1128 skb->ip_summed = CHECKSUM_UNNECESSARY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 return 0;
Herbert Xufb286bb2005-11-10 13:01:24 -08001130 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 }
Herbert Xufb286bb2005-11-10 13:01:24 -08001132
1133 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
1134 skb->len, IPPROTO_TCP, 0);
1135
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 if (skb->len <= 76) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001137 return __skb_checksum_complete(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 }
1139 return 0;
1140}
1141
1142
1143/* The socket must have it's spinlock held when we get
1144 * here.
1145 *
1146 * We have a potential double-lock case here, so even when
1147 * doing backlog processing we use the BH locking scheme.
1148 * This is because we cannot sleep with the original spinlock
1149 * held.
1150 */
1151int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1152{
1153 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1154 TCP_CHECK_TIMER(sk);
1155 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1156 goto reset;
1157 TCP_CHECK_TIMER(sk);
1158 return 0;
1159 }
1160
1161 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1162 goto csum_err;
1163
1164 if (sk->sk_state == TCP_LISTEN) {
1165 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1166 if (!nsk)
1167 goto discard;
1168
1169 if (nsk != sk) {
1170 if (tcp_child_process(sk, nsk, skb))
1171 goto reset;
1172 return 0;
1173 }
1174 }
1175
1176 TCP_CHECK_TIMER(sk);
1177 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1178 goto reset;
1179 TCP_CHECK_TIMER(sk);
1180 return 0;
1181
1182reset:
1183 tcp_v4_send_reset(skb);
1184discard:
1185 kfree_skb(skb);
1186 /* Be careful here. If this function gets more complicated and
1187 * gcc suffers from register pressure on the x86, sk (in %ebx)
1188 * might be destroyed here. This current version compiles correctly,
1189 * but you have been warned.
1190 */
1191 return 0;
1192
1193csum_err:
1194 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1195 goto discard;
1196}
1197
1198/*
1199 * From tcp_input.c
1200 */
1201
1202int tcp_v4_rcv(struct sk_buff *skb)
1203{
1204 struct tcphdr *th;
1205 struct sock *sk;
1206 int ret;
1207
1208 if (skb->pkt_type != PACKET_HOST)
1209 goto discard_it;
1210
1211 /* Count it even if it's bad */
1212 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1213
1214 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1215 goto discard_it;
1216
1217 th = skb->h.th;
1218
1219 if (th->doff < sizeof(struct tcphdr) / 4)
1220 goto bad_packet;
1221 if (!pskb_may_pull(skb, th->doff * 4))
1222 goto discard_it;
1223
1224 /* An explanation is required here, I think.
1225 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001226 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 * So, we defer the checks. */
1228 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
Herbert Xufb286bb2005-11-10 13:01:24 -08001229 tcp_v4_checksum_init(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 goto bad_packet;
1231
1232 th = skb->h.th;
1233 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1234 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1235 skb->len - th->doff * 4);
1236 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1237 TCP_SKB_CB(skb)->when = 0;
1238 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1239 TCP_SKB_CB(skb)->sacked = 0;
1240
Arnaldo Carvalho de Meloe48c4142005-08-09 20:09:46 -07001241 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
1242 skb->nh.iph->daddr, ntohs(th->dest),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001243 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244
1245 if (!sk)
1246 goto no_tcp_socket;
1247
1248process:
1249 if (sk->sk_state == TCP_TIME_WAIT)
1250 goto do_time_wait;
1251
1252 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1253 goto discard_and_relse;
1254
1255 if (sk_filter(sk, skb, 0))
1256 goto discard_and_relse;
1257
1258 skb->dev = NULL;
1259
1260 bh_lock_sock(sk);
1261 ret = 0;
1262 if (!sock_owned_by_user(sk)) {
1263 if (!tcp_prequeue(sk, skb))
1264 ret = tcp_v4_do_rcv(sk, skb);
1265 } else
1266 sk_add_backlog(sk, skb);
1267 bh_unlock_sock(sk);
1268
1269 sock_put(sk);
1270
1271 return ret;
1272
1273no_tcp_socket:
1274 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1275 goto discard_it;
1276
1277 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1278bad_packet:
1279 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1280 } else {
1281 tcp_v4_send_reset(skb);
1282 }
1283
1284discard_it:
1285 /* Discard frame. */
1286 kfree_skb(skb);
1287 return 0;
1288
1289discard_and_relse:
1290 sock_put(sk);
1291 goto discard_it;
1292
1293do_time_wait:
1294 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001295 inet_twsk_put((struct inet_timewait_sock *) sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296 goto discard_it;
1297 }
1298
1299 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1300 TCP_INC_STATS_BH(TCP_MIB_INERRS);
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001301 inet_twsk_put((struct inet_timewait_sock *) sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 goto discard_it;
1303 }
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001304 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1305 skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 case TCP_TW_SYN: {
Arnaldo Carvalho de Melo33b62232005-08-09 20:09:06 -07001307 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1308 skb->nh.iph->daddr,
1309 ntohs(th->dest),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001310 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 if (sk2) {
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -07001312 inet_twsk_deschedule((struct inet_timewait_sock *)sk,
1313 &tcp_death_row);
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001314 inet_twsk_put((struct inet_timewait_sock *)sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 sk = sk2;
1316 goto process;
1317 }
1318 /* Fall through to ACK */
1319 }
1320 case TCP_TW_ACK:
1321 tcp_v4_timewait_ack(sk, skb);
1322 break;
1323 case TCP_TW_RST:
1324 goto no_tcp_socket;
1325 case TCP_TW_SUCCESS:;
1326 }
1327 goto discard_it;
1328}
1329
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330/* VJ's idea. Save last timestamp seen from this destination
1331 * and hold it at least for normal timewait interval to use for duplicate
1332 * segment detection in subsequent connections, before they enter synchronized
1333 * state.
1334 */
1335
1336int tcp_v4_remember_stamp(struct sock *sk)
1337{
1338 struct inet_sock *inet = inet_sk(sk);
1339 struct tcp_sock *tp = tcp_sk(sk);
1340 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1341 struct inet_peer *peer = NULL;
1342 int release_it = 0;
1343
1344 if (!rt || rt->rt_dst != inet->daddr) {
1345 peer = inet_getpeer(inet->daddr, 1);
1346 release_it = 1;
1347 } else {
1348 if (!rt->peer)
1349 rt_bind_peer(rt, 1);
1350 peer = rt->peer;
1351 }
1352
1353 if (peer) {
1354 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1355 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1356 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1357 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1358 peer->tcp_ts = tp->rx_opt.ts_recent;
1359 }
1360 if (release_it)
1361 inet_putpeer(peer);
1362 return 1;
1363 }
1364
1365 return 0;
1366}
1367
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001368int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369{
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001370 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371
1372 if (peer) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001373 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1374
1375 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001377 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1378 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1379 peer->tcp_ts = tcptw->tw_ts_recent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 }
1381 inet_putpeer(peer);
1382 return 1;
1383 }
1384
1385 return 0;
1386}
1387
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001388struct inet_connection_sock_af_ops ipv4_specific = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 .queue_xmit = ip_queue_xmit,
1390 .send_check = tcp_v4_send_check,
Arnaldo Carvalho de Melo32519f12005-08-09 19:50:02 -07001391 .rebuild_header = inet_sk_rebuild_header,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 .conn_request = tcp_v4_conn_request,
1393 .syn_recv_sock = tcp_v4_syn_recv_sock,
1394 .remember_stamp = tcp_v4_remember_stamp,
1395 .net_header_len = sizeof(struct iphdr),
1396 .setsockopt = ip_setsockopt,
1397 .getsockopt = ip_getsockopt,
Arnaldo Carvalho de Meloaf05dc92005-12-13 23:16:04 -08001398 .addr2sockaddr = inet_csk_addr2sockaddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 .sockaddr_len = sizeof(struct sockaddr_in),
1400};
1401
1402/* NOTE: A lot of things set to zero explicitly by call to
1403 * sk_alloc() so need not be done here.
1404 */
1405static int tcp_v4_init_sock(struct sock *sk)
1406{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001407 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 struct tcp_sock *tp = tcp_sk(sk);
1409
1410 skb_queue_head_init(&tp->out_of_order_queue);
1411 tcp_init_xmit_timers(sk);
1412 tcp_prequeue_init(tp);
1413
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001414 icsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 tp->mdev = TCP_TIMEOUT_INIT;
1416
1417 /* So many TCP implementations out there (incorrectly) count the
1418 * initial SYN frame in their delayed-ACK and congestion control
1419 * algorithms that we must have the following bandaid to talk
1420 * efficiently to them. -DaveM
1421 */
1422 tp->snd_cwnd = 2;
1423
1424 /* See draft-stevens-tcpca-spec-01 for discussion of the
1425 * initialization of these values.
1426 */
1427 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1428 tp->snd_cwnd_clamp = ~0;
David S. Millerc1b4a7e2005-07-05 15:24:38 -07001429 tp->mss_cache = 536;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430
1431 tp->reordering = sysctl_tcp_reordering;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001432 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433
1434 sk->sk_state = TCP_CLOSE;
1435
1436 sk->sk_write_space = sk_stream_write_space;
1437 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1438
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001439 icsk->icsk_af_ops = &ipv4_specific;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440
1441 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1442 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1443
1444 atomic_inc(&tcp_sockets_allocated);
1445
1446 return 0;
1447}
1448
1449int tcp_v4_destroy_sock(struct sock *sk)
1450{
1451 struct tcp_sock *tp = tcp_sk(sk);
1452
1453 tcp_clear_xmit_timers(sk);
1454
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001455 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07001456
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 /* Cleanup up the write buffer. */
1458 sk_stream_writequeue_purge(sk);
1459
1460 /* Cleans up our, hopefully empty, out_of_order_queue. */
1461 __skb_queue_purge(&tp->out_of_order_queue);
1462
1463 /* Clean prequeue, it must be empty really */
1464 __skb_queue_purge(&tp->ucopy.prequeue);
1465
1466 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001467 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -07001468 inet_put_port(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469
1470 /*
1471 * If sendmsg cached page exists, toss it.
1472 */
1473 if (sk->sk_sndmsg_page) {
1474 __free_page(sk->sk_sndmsg_page);
1475 sk->sk_sndmsg_page = NULL;
1476 }
1477
1478 atomic_dec(&tcp_sockets_allocated);
1479
1480 return 0;
1481}
1482
1483EXPORT_SYMBOL(tcp_v4_destroy_sock);
1484
1485#ifdef CONFIG_PROC_FS
1486/* Proc filesystem TCP sock list dumping. */
1487
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001488static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489{
1490 return hlist_empty(head) ? NULL :
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001491 list_entry(head->first, struct inet_timewait_sock, tw_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492}
1493
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001494static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495{
1496 return tw->tw_node.next ?
1497 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1498}
1499
1500static void *listening_get_next(struct seq_file *seq, void *cur)
1501{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001502 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 struct hlist_node *node;
1504 struct sock *sk = cur;
1505 struct tcp_iter_state* st = seq->private;
1506
1507 if (!sk) {
1508 st->bucket = 0;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001509 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 goto get_sk;
1511 }
1512
1513 ++st->num;
1514
1515 if (st->state == TCP_SEQ_STATE_OPENREQ) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001516 struct request_sock *req = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001518 icsk = inet_csk(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 req = req->dl_next;
1520 while (1) {
1521 while (req) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001522 if (req->rsk_ops->family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 cur = req;
1524 goto out;
1525 }
1526 req = req->dl_next;
1527 }
1528 if (++st->sbucket >= TCP_SYNQ_HSIZE)
1529 break;
1530get_req:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001531 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 }
1533 sk = sk_next(st->syn_wait_sk);
1534 st->state = TCP_SEQ_STATE_LISTENING;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001535 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 } else {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001537 icsk = inet_csk(sk);
1538 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1539 if (reqsk_queue_len(&icsk->icsk_accept_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 goto start_req;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001541 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 sk = sk_next(sk);
1543 }
1544get_sk:
1545 sk_for_each_from(sk, node) {
1546 if (sk->sk_family == st->family) {
1547 cur = sk;
1548 goto out;
1549 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001550 icsk = inet_csk(sk);
1551 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1552 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553start_req:
1554 st->uid = sock_i_uid(sk);
1555 st->syn_wait_sk = sk;
1556 st->state = TCP_SEQ_STATE_OPENREQ;
1557 st->sbucket = 0;
1558 goto get_req;
1559 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001560 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 }
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07001562 if (++st->bucket < INET_LHTABLE_SIZE) {
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001563 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 goto get_sk;
1565 }
1566 cur = NULL;
1567out:
1568 return cur;
1569}
1570
1571static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1572{
1573 void *rc = listening_get_next(seq, NULL);
1574
1575 while (rc && *pos) {
1576 rc = listening_get_next(seq, rc);
1577 --*pos;
1578 }
1579 return rc;
1580}
1581
1582static void *established_get_first(struct seq_file *seq)
1583{
1584 struct tcp_iter_state* st = seq->private;
1585 void *rc = NULL;
1586
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001587 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 struct sock *sk;
1589 struct hlist_node *node;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001590 struct inet_timewait_sock *tw;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591
1592 /* We can reschedule _before_ having picked the target: */
1593 cond_resched_softirq();
1594
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001595 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
1596 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 if (sk->sk_family != st->family) {
1598 continue;
1599 }
1600 rc = sk;
1601 goto out;
1602 }
1603 st->state = TCP_SEQ_STATE_TIME_WAIT;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001604 inet_twsk_for_each(tw, node,
1605 &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 if (tw->tw_family != st->family) {
1607 continue;
1608 }
1609 rc = tw;
1610 goto out;
1611 }
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001612 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 st->state = TCP_SEQ_STATE_ESTABLISHED;
1614 }
1615out:
1616 return rc;
1617}
1618
1619static void *established_get_next(struct seq_file *seq, void *cur)
1620{
1621 struct sock *sk = cur;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001622 struct inet_timewait_sock *tw;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 struct hlist_node *node;
1624 struct tcp_iter_state* st = seq->private;
1625
1626 ++st->num;
1627
1628 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
1629 tw = cur;
1630 tw = tw_next(tw);
1631get_tw:
1632 while (tw && tw->tw_family != st->family) {
1633 tw = tw_next(tw);
1634 }
1635 if (tw) {
1636 cur = tw;
1637 goto out;
1638 }
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001639 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 st->state = TCP_SEQ_STATE_ESTABLISHED;
1641
1642 /* We can reschedule between buckets: */
1643 cond_resched_softirq();
1644
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001645 if (++st->bucket < tcp_hashinfo.ehash_size) {
1646 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
1647 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 } else {
1649 cur = NULL;
1650 goto out;
1651 }
1652 } else
1653 sk = sk_next(sk);
1654
1655 sk_for_each_from(sk, node) {
1656 if (sk->sk_family == st->family)
1657 goto found;
1658 }
1659
1660 st->state = TCP_SEQ_STATE_TIME_WAIT;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001661 tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 goto get_tw;
1663found:
1664 cur = sk;
1665out:
1666 return cur;
1667}
1668
1669static void *established_get_idx(struct seq_file *seq, loff_t pos)
1670{
1671 void *rc = established_get_first(seq);
1672
1673 while (rc && pos) {
1674 rc = established_get_next(seq, rc);
1675 --pos;
1676 }
1677 return rc;
1678}
1679
1680static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
1681{
1682 void *rc;
1683 struct tcp_iter_state* st = seq->private;
1684
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07001685 inet_listen_lock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 st->state = TCP_SEQ_STATE_LISTENING;
1687 rc = listening_get_idx(seq, &pos);
1688
1689 if (!rc) {
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07001690 inet_listen_unlock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 local_bh_disable();
1692 st->state = TCP_SEQ_STATE_ESTABLISHED;
1693 rc = established_get_idx(seq, pos);
1694 }
1695
1696 return rc;
1697}
1698
1699static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
1700{
1701 struct tcp_iter_state* st = seq->private;
1702 st->state = TCP_SEQ_STATE_LISTENING;
1703 st->num = 0;
1704 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1705}
1706
1707static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1708{
1709 void *rc = NULL;
1710 struct tcp_iter_state* st;
1711
1712 if (v == SEQ_START_TOKEN) {
1713 rc = tcp_get_idx(seq, 0);
1714 goto out;
1715 }
1716 st = seq->private;
1717
1718 switch (st->state) {
1719 case TCP_SEQ_STATE_OPENREQ:
1720 case TCP_SEQ_STATE_LISTENING:
1721 rc = listening_get_next(seq, v);
1722 if (!rc) {
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07001723 inet_listen_unlock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 local_bh_disable();
1725 st->state = TCP_SEQ_STATE_ESTABLISHED;
1726 rc = established_get_first(seq);
1727 }
1728 break;
1729 case TCP_SEQ_STATE_ESTABLISHED:
1730 case TCP_SEQ_STATE_TIME_WAIT:
1731 rc = established_get_next(seq, v);
1732 break;
1733 }
1734out:
1735 ++*pos;
1736 return rc;
1737}
1738
1739static void tcp_seq_stop(struct seq_file *seq, void *v)
1740{
1741 struct tcp_iter_state* st = seq->private;
1742
1743 switch (st->state) {
1744 case TCP_SEQ_STATE_OPENREQ:
1745 if (v) {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001746 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
1747 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 }
1749 case TCP_SEQ_STATE_LISTENING:
1750 if (v != SEQ_START_TOKEN)
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -07001751 inet_listen_unlock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 break;
1753 case TCP_SEQ_STATE_TIME_WAIT:
1754 case TCP_SEQ_STATE_ESTABLISHED:
1755 if (v)
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001756 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 local_bh_enable();
1758 break;
1759 }
1760}
1761
1762static int tcp_seq_open(struct inode *inode, struct file *file)
1763{
1764 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1765 struct seq_file *seq;
1766 struct tcp_iter_state *s;
1767 int rc;
1768
1769 if (unlikely(afinfo == NULL))
1770 return -EINVAL;
1771
1772 s = kmalloc(sizeof(*s), GFP_KERNEL);
1773 if (!s)
1774 return -ENOMEM;
1775 memset(s, 0, sizeof(*s));
1776 s->family = afinfo->family;
1777 s->seq_ops.start = tcp_seq_start;
1778 s->seq_ops.next = tcp_seq_next;
1779 s->seq_ops.show = afinfo->seq_show;
1780 s->seq_ops.stop = tcp_seq_stop;
1781
1782 rc = seq_open(file, &s->seq_ops);
1783 if (rc)
1784 goto out_kfree;
1785 seq = file->private_data;
1786 seq->private = s;
1787out:
1788 return rc;
1789out_kfree:
1790 kfree(s);
1791 goto out;
1792}
1793
1794int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
1795{
1796 int rc = 0;
1797 struct proc_dir_entry *p;
1798
1799 if (!afinfo)
1800 return -EINVAL;
1801 afinfo->seq_fops->owner = afinfo->owner;
1802 afinfo->seq_fops->open = tcp_seq_open;
1803 afinfo->seq_fops->read = seq_read;
1804 afinfo->seq_fops->llseek = seq_lseek;
1805 afinfo->seq_fops->release = seq_release_private;
1806
1807 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
1808 if (p)
1809 p->data = afinfo;
1810 else
1811 rc = -ENOMEM;
1812 return rc;
1813}
1814
1815void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
1816{
1817 if (!afinfo)
1818 return;
1819 proc_net_remove(afinfo->name);
1820 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
1821}
1822
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001823static void get_openreq4(struct sock *sk, struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 char *tmpbuf, int i, int uid)
1825{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001826 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 int ttd = req->expires - jiffies;
1828
1829 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1830 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
1831 i,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001832 ireq->loc_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 ntohs(inet_sk(sk)->sport),
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001834 ireq->rmt_addr,
1835 ntohs(ireq->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 TCP_SYN_RECV,
1837 0, 0, /* could print option size, but that is af dependent. */
1838 1, /* timers active (only the expire timer) */
1839 jiffies_to_clock_t(ttd),
1840 req->retrans,
1841 uid,
1842 0, /* non standard timer */
1843 0, /* open_requests have no inode */
1844 atomic_read(&sk->sk_refcnt),
1845 req);
1846}
1847
1848static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
1849{
1850 int timer_active;
1851 unsigned long timer_expires;
1852 struct tcp_sock *tp = tcp_sk(sp);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001853 const struct inet_connection_sock *icsk = inet_csk(sp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 struct inet_sock *inet = inet_sk(sp);
1855 unsigned int dest = inet->daddr;
1856 unsigned int src = inet->rcv_saddr;
1857 __u16 destp = ntohs(inet->dport);
1858 __u16 srcp = ntohs(inet->sport);
1859
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001860 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001862 timer_expires = icsk->icsk_timeout;
1863 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001865 timer_expires = icsk->icsk_timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 } else if (timer_pending(&sp->sk_timer)) {
1867 timer_active = 2;
1868 timer_expires = sp->sk_timer.expires;
1869 } else {
1870 timer_active = 0;
1871 timer_expires = jiffies;
1872 }
1873
1874 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
1875 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
1876 i, src, srcp, dest, destp, sp->sk_state,
1877 tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq,
1878 timer_active,
1879 jiffies_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001880 icsk->icsk_retransmits,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 sock_i_uid(sp),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001882 icsk->icsk_probes_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 sock_i_ino(sp),
1884 atomic_read(&sp->sk_refcnt), sp,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001885 icsk->icsk_rto,
1886 icsk->icsk_ack.ato,
1887 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 tp->snd_cwnd,
1889 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
1890}
1891
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001892static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893{
1894 unsigned int dest, src;
1895 __u16 destp, srcp;
1896 int ttd = tw->tw_ttd - jiffies;
1897
1898 if (ttd < 0)
1899 ttd = 0;
1900
1901 dest = tw->tw_daddr;
1902 src = tw->tw_rcv_saddr;
1903 destp = ntohs(tw->tw_dport);
1904 srcp = ntohs(tw->tw_sport);
1905
1906 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1907 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
1908 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
1909 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1910 atomic_read(&tw->tw_refcnt), tw);
1911}
1912
1913#define TMPSZ 150
1914
1915static int tcp4_seq_show(struct seq_file *seq, void *v)
1916{
1917 struct tcp_iter_state* st;
1918 char tmpbuf[TMPSZ + 1];
1919
1920 if (v == SEQ_START_TOKEN) {
1921 seq_printf(seq, "%-*s\n", TMPSZ - 1,
1922 " sl local_address rem_address st tx_queue "
1923 "rx_queue tr tm->when retrnsmt uid timeout "
1924 "inode");
1925 goto out;
1926 }
1927 st = seq->private;
1928
1929 switch (st->state) {
1930 case TCP_SEQ_STATE_LISTENING:
1931 case TCP_SEQ_STATE_ESTABLISHED:
1932 get_tcp4_sock(v, tmpbuf, st->num);
1933 break;
1934 case TCP_SEQ_STATE_OPENREQ:
1935 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
1936 break;
1937 case TCP_SEQ_STATE_TIME_WAIT:
1938 get_timewait4_sock(v, tmpbuf, st->num);
1939 break;
1940 }
1941 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
1942out:
1943 return 0;
1944}
1945
1946static struct file_operations tcp4_seq_fops;
1947static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1948 .owner = THIS_MODULE,
1949 .name = "tcp",
1950 .family = AF_INET,
1951 .seq_show = tcp4_seq_show,
1952 .seq_fops = &tcp4_seq_fops,
1953};
1954
1955int __init tcp4_proc_init(void)
1956{
1957 return tcp_proc_register(&tcp4_seq_afinfo);
1958}
1959
1960void tcp4_proc_exit(void)
1961{
1962 tcp_proc_unregister(&tcp4_seq_afinfo);
1963}
1964#endif /* CONFIG_PROC_FS */
1965
1966struct proto tcp_prot = {
1967 .name = "TCP",
1968 .owner = THIS_MODULE,
1969 .close = tcp_close,
1970 .connect = tcp_v4_connect,
1971 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001972 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 .ioctl = tcp_ioctl,
1974 .init = tcp_v4_init_sock,
1975 .destroy = tcp_v4_destroy_sock,
1976 .shutdown = tcp_shutdown,
1977 .setsockopt = tcp_setsockopt,
1978 .getsockopt = tcp_getsockopt,
1979 .sendmsg = tcp_sendmsg,
1980 .recvmsg = tcp_recvmsg,
1981 .backlog_rcv = tcp_v4_do_rcv,
1982 .hash = tcp_v4_hash,
1983 .unhash = tcp_unhash,
1984 .get_port = tcp_v4_get_port,
1985 .enter_memory_pressure = tcp_enter_memory_pressure,
1986 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07001987 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 .memory_allocated = &tcp_memory_allocated,
1989 .memory_pressure = &tcp_memory_pressure,
1990 .sysctl_mem = sysctl_tcp_mem,
1991 .sysctl_wmem = sysctl_tcp_wmem,
1992 .sysctl_rmem = sysctl_tcp_rmem,
1993 .max_header = MAX_TCP_HEADER,
1994 .obj_size = sizeof(struct tcp_sock),
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001995 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001996 .rsk_prot = &tcp_request_sock_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997};
1998
1999
2000
2001void __init tcp_v4_init(struct net_proto_family *ops)
2002{
2003 int err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_TCP, &tcp_socket);
2004 if (err < 0)
2005 panic("Failed to create the TCP control socket.\n");
2006 tcp_socket->sk->sk_allocation = GFP_ATOMIC;
2007 inet_sk(tcp_socket->sk)->uc_ttl = -1;
2008
2009 /* Unhash it so that IP input processing does not even
2010 * see it, we do not wish this socket to see incoming
2011 * packets.
2012 */
2013 tcp_socket->sk->sk_prot->unhash(tcp_socket->sk);
2014}
2015
2016EXPORT_SYMBOL(ipv4_specific);
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07002017EXPORT_SYMBOL(inet_bind_bucket_create);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020EXPORT_SYMBOL(tcp_unhash);
2021EXPORT_SYMBOL(tcp_v4_conn_request);
2022EXPORT_SYMBOL(tcp_v4_connect);
2023EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024EXPORT_SYMBOL(tcp_v4_remember_stamp);
2025EXPORT_SYMBOL(tcp_v4_send_check);
2026EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2027
2028#ifdef CONFIG_PROC_FS
2029EXPORT_SYMBOL(tcp_proc_register);
2030EXPORT_SYMBOL(tcp_proc_unregister);
2031#endif
2032EXPORT_SYMBOL(sysctl_local_port_range);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033EXPORT_SYMBOL(sysctl_tcp_low_latency);
2034EXPORT_SYMBOL(sysctl_tcp_tw_reuse);
2035