blob: a81caa1be0cfe33baed4e3a1e0ded3f2c4a04f84 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070037 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080040 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
54#include <linux/types.h>
55#include <linux/fcntl.h>
56#include <linux/module.h>
57#include <linux/random.h>
58#include <linux/cache.h>
59#include <linux/jhash.h>
60#include <linux/init.h>
61#include <linux/times.h>
62
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020063#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070065#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030067#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <net/ipv6.h>
69#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080070#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#include <net/xfrm.h>
Chris Leech1a2449a2006-05-23 18:05:53 -070072#include <net/netdma.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070073
74#include <linux/inet.h>
75#include <linux/ipv6.h>
76#include <linux/stddef.h>
77#include <linux/proc_fs.h>
78#include <linux/seq_file.h>
79
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080080#include <linux/crypto.h>
81#include <linux/scatterlist.h>
82
Brian Haleyab32ea52006-09-22 14:15:41 -070083int sysctl_tcp_tw_reuse __read_mostly;
84int sysctl_tcp_low_latency __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080087#ifdef CONFIG_TCP_MD5SIG
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -020088static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
89 __be32 addr);
Adam Langley49a72df2008-07-19 00:01:42 -070090static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
91 __be32 daddr, __be32 saddr, struct tcphdr *th);
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +090092#else
93static inline
94struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
95{
96 return NULL;
97}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080098#endif
99
Eric Dumazet5caea4e2008-11-20 00:40:07 -0800100struct inet_hashinfo tcp_hashinfo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101
Gerrit Renkera94f7232006-11-10 14:06:49 -0800102static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr,
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700106 tcp_hdr(skb)->dest,
107 tcp_hdr(skb)->source);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108}
109
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800110int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111{
112 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 struct tcp_sock *tp = tcp_sk(sk);
114
115 /* With PAWS, it is safe from the viewpoint
116 of data integrity. Even without PAWS it is safe provided sequence
117 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118
119 Actually, the idea is close to VJ's one, only timestamp cache is
120 held not per host, but per port pair and TW bucket is used as state
121 holder.
122
123 If TW bucket has been already destroyed we fall back to VJ's scheme
124 and use initial timestamp retrieved from peer table.
125 */
126 if (tcptw->tw_ts_recent_stamp &&
127 (twp == NULL || (sysctl_tcp_tw_reuse &&
James Morris9d729f72007-03-04 16:12:44 -0800128 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800129 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 if (tp->write_seq == 0)
131 tp->write_seq = 1;
132 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
133 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 sock_hold(sktw);
135 return 1;
136 }
137
138 return 0;
139}
140
141EXPORT_SYMBOL_GPL(tcp_twsk_unique);
142
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143/* This will initiate an outgoing connection. */
144int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
145{
146 struct inet_sock *inet = inet_sk(sk);
147 struct tcp_sock *tp = tcp_sk(sk);
148 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
149 struct rtable *rt;
Al Virobada8ad2006-09-26 21:27:15 -0700150 __be32 daddr, nexthop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 int tmp;
152 int err;
153
154 if (addr_len < sizeof(struct sockaddr_in))
155 return -EINVAL;
156
157 if (usin->sin_family != AF_INET)
158 return -EAFNOSUPPORT;
159
160 nexthop = daddr = usin->sin_addr.s_addr;
161 if (inet->opt && inet->opt->srr) {
162 if (!daddr)
163 return -EINVAL;
164 nexthop = inet->opt->faddr;
165 }
166
167 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
168 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
169 IPPROTO_TCP,
David S. Miller8eb90862007-02-08 02:09:21 -0800170 inet->sport, usin->sin_port, sk, 1);
Wei Dong584bdf82007-05-31 22:49:28 -0700171 if (tmp < 0) {
172 if (tmp == -ENETUNREACH)
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -0700173 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 return tmp;
Wei Dong584bdf82007-05-31 22:49:28 -0700175 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
177 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
178 ip_rt_put(rt);
179 return -ENETUNREACH;
180 }
181
182 if (!inet->opt || !inet->opt->srr)
183 daddr = rt->rt_dst;
184
185 if (!inet->saddr)
186 inet->saddr = rt->rt_src;
187 inet->rcv_saddr = inet->saddr;
188
189 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
190 /* Reset inherited state */
191 tp->rx_opt.ts_recent = 0;
192 tp->rx_opt.ts_recent_stamp = 0;
193 tp->write_seq = 0;
194 }
195
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700196 if (tcp_death_row.sysctl_tw_recycle &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
198 struct inet_peer *peer = rt_get_peer(rt);
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200199 /*
200 * VJ's idea. We save last timestamp seen from
201 * the destination in peer table, when entering state
202 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
203 * when trying new connection.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200205 if (peer != NULL &&
James Morris9d729f72007-03-04 16:12:44 -0800206 peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
208 tp->rx_opt.ts_recent = peer->tcp_ts;
209 }
210 }
211
212 inet->dport = usin->sin_port;
213 inet->daddr = daddr;
214
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800215 inet_csk(sk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 if (inet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800217 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
219 tp->rx_opt.mss_clamp = 536;
220
221 /* Socket identity is still unknown (sport may be zero).
222 * However we set state to SYN-SENT and not releasing socket
223 * lock select source port, enter ourselves into the hash tables and
224 * complete initialization after this.
225 */
226 tcp_set_state(sk, TCP_SYN_SENT);
Arnaldo Carvalho de Meloa7f5e7f2005-12-13 23:25:31 -0800227 err = inet_hash_connect(&tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 if (err)
229 goto failure;
230
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200231 err = ip_route_newports(&rt, IPPROTO_TCP,
232 inet->sport, inet->dport, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 if (err)
234 goto failure;
235
236 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700237 sk->sk_gso_type = SKB_GSO_TCPV4;
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -0700238 sk_setup_caps(sk, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239
240 if (!tp->write_seq)
241 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
242 inet->daddr,
243 inet->sport,
244 usin->sin_port);
245
246 inet->id = tp->write_seq ^ jiffies;
247
248 err = tcp_connect(sk);
249 rt = NULL;
250 if (err)
251 goto failure;
252
253 return 0;
254
255failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200256 /*
257 * This unhashes the socket and releases the local port,
258 * if necessary.
259 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 tcp_set_state(sk, TCP_CLOSE);
261 ip_rt_put(rt);
262 sk->sk_route_caps = 0;
263 inet->dport = 0;
264 return err;
265}
266
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267/*
268 * This routine does path mtu discovery as defined in RFC1191.
269 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800270static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271{
272 struct dst_entry *dst;
273 struct inet_sock *inet = inet_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
275 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
276 * send out by Linux are always <576bytes so they should go through
277 * unfragmented).
278 */
279 if (sk->sk_state == TCP_LISTEN)
280 return;
281
282 /* We don't check in the destentry if pmtu discovery is forbidden
283 * on this route. We just assume that no packet_to_big packets
284 * are send back when pmtu discovery is not active.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900285 * There is a small race when the user changes this flag in the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 * route, but I think that's acceptable.
287 */
288 if ((dst = __sk_dst_check(sk, 0)) == NULL)
289 return;
290
291 dst->ops->update_pmtu(dst, mtu);
292
293 /* Something is about to be wrong... Remember soft error
294 * for the case, if this connection will not able to recover.
295 */
296 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
297 sk->sk_err_soft = EMSGSIZE;
298
299 mtu = dst_mtu(dst);
300
301 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800302 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 tcp_sync_mss(sk, mtu);
304
305 /* Resend the TCP packet because it's
306 * clear that the old packet has been
307 * dropped. This is the new "fast" path mtu
308 * discovery.
309 */
310 tcp_simple_retransmit(sk);
311 } /* else let the usual retransmit timer handle it */
312}
313
314/*
315 * This routine is called by the ICMP module when it gets some
316 * sort of error condition. If err < 0 then the socket should
317 * be closed and the error returned to the user. If err > 0
318 * it's just the icmp type << 8 | icmp code. After adjustment
319 * header points to the first 8 bytes of the tcp header. We need
320 * to find the appropriate port.
321 *
322 * The locking strategy used here is very "optimistic". When
323 * someone else accesses the socket the ICMP is just dropped
324 * and for some paths there is no check at all.
325 * A more general error queue to queue errors for later handling
326 * is probably better.
327 *
328 */
329
330void tcp_v4_err(struct sk_buff *skb, u32 info)
331{
332 struct iphdr *iph = (struct iphdr *)skb->data;
333 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
334 struct tcp_sock *tp;
335 struct inet_sock *inet;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300336 const int type = icmp_hdr(skb)->type;
337 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 struct sock *sk;
339 __u32 seq;
340 int err;
Pavel Emelyanovfd54d712008-07-14 23:01:40 -0700341 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342
343 if (skb->len < (iph->ihl << 2) + 8) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700344 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 return;
346 }
347
Pavel Emelyanovfd54d712008-07-14 23:01:40 -0700348 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
Pavel Emelyanovc67499c2008-01-31 05:06:40 -0800349 iph->saddr, th->source, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 if (!sk) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700351 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 return;
353 }
354 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700355 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 return;
357 }
358
359 bh_lock_sock(sk);
360 /* If too many ICMPs get dropped on busy
361 * servers this needs to be solved differently.
362 */
363 if (sock_owned_by_user(sk))
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700364 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365
366 if (sk->sk_state == TCP_CLOSE)
367 goto out;
368
369 tp = tcp_sk(sk);
370 seq = ntohl(th->seq);
371 if (sk->sk_state != TCP_LISTEN &&
372 !between(seq, tp->snd_una, tp->snd_nxt)) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700373 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 goto out;
375 }
376
377 switch (type) {
378 case ICMP_SOURCE_QUENCH:
379 /* Just silently ignore these. */
380 goto out;
381 case ICMP_PARAMETERPROB:
382 err = EPROTO;
383 break;
384 case ICMP_DEST_UNREACH:
385 if (code > NR_ICMP_UNREACH)
386 goto out;
387
388 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
389 if (!sock_owned_by_user(sk))
390 do_pmtu_discovery(sk, iph, info);
391 goto out;
392 }
393
394 err = icmp_err_convert[code].errno;
395 break;
396 case ICMP_TIME_EXCEEDED:
397 err = EHOSTUNREACH;
398 break;
399 default:
400 goto out;
401 }
402
403 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700404 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 case TCP_LISTEN:
406 if (sock_owned_by_user(sk))
407 goto out;
408
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700409 req = inet_csk_search_req(sk, &prev, th->dest,
410 iph->daddr, iph->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 if (!req)
412 goto out;
413
414 /* ICMPs are not backlogged, hence we cannot get
415 an established socket here.
416 */
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700417 WARN_ON(req->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700419 if (seq != tcp_rsk(req)->snt_isn) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700420 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 goto out;
422 }
423
424 /*
425 * Still in SYN_RECV, just remove it silently.
426 * There is no good way to pass the error to the newly
427 * created socket, and POSIX does not want network
428 * errors returned from accept().
429 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700430 inet_csk_reqsk_queue_drop(sk, req, prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 goto out;
432
433 case TCP_SYN_SENT:
434 case TCP_SYN_RECV: /* Cannot happen.
435 It can f.e. if SYNs crossed.
436 */
437 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 sk->sk_err = err;
439
440 sk->sk_error_report(sk);
441
442 tcp_done(sk);
443 } else {
444 sk->sk_err_soft = err;
445 }
446 goto out;
447 }
448
449 /* If we've already connected we will keep trying
450 * until we time out, or the user gives up.
451 *
452 * rfc1122 4.2.3.9 allows to consider as hard errors
453 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
454 * but it is obsoleted by pmtu discovery).
455 *
456 * Note, that in modern internet, where routing is unreliable
457 * and in each dark corner broken firewalls sit, sending random
458 * errors ordered by their masters even this two messages finally lose
459 * their original sense (even Linux sends invalid PORT_UNREACHs)
460 *
461 * Now we are in compliance with RFCs.
462 * --ANK (980905)
463 */
464
465 inet = inet_sk(sk);
466 if (!sock_owned_by_user(sk) && inet->recverr) {
467 sk->sk_err = err;
468 sk->sk_error_report(sk);
469 } else { /* Only an error on timeout */
470 sk->sk_err_soft = err;
471 }
472
473out:
474 bh_unlock_sock(sk);
475 sock_put(sk);
476}
477
478/* This routine computes an IPv4 TCP checksum. */
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -0800479void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480{
481 struct inet_sock *inet = inet_sk(sk);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700482 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483
Patrick McHardy84fa7932006-08-29 16:44:56 -0700484 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Frederik Deweerdtba7808e2007-02-04 20:15:27 -0800485 th->check = ~tcp_v4_check(len, inet->saddr,
486 inet->daddr, 0);
Herbert Xu663ead32007-04-09 11:59:07 -0700487 skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viroff1dcad2006-11-20 18:07:29 -0800488 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 } else {
Frederik Deweerdtba7808e2007-02-04 20:15:27 -0800490 th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
Joe Perches07f07572008-11-19 15:44:53 -0800491 csum_partial(th,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 th->doff << 2,
493 skb->csum));
494 }
495}
496
Herbert Xua430a432006-07-08 13:34:56 -0700497int tcp_v4_gso_send_check(struct sk_buff *skb)
498{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700499 const struct iphdr *iph;
Herbert Xua430a432006-07-08 13:34:56 -0700500 struct tcphdr *th;
501
502 if (!pskb_may_pull(skb, sizeof(*th)))
503 return -EINVAL;
504
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700505 iph = ip_hdr(skb);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700506 th = tcp_hdr(skb);
Herbert Xua430a432006-07-08 13:34:56 -0700507
508 th->check = 0;
Frederik Deweerdtba7808e2007-02-04 20:15:27 -0800509 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
Herbert Xu663ead32007-04-09 11:59:07 -0700510 skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viroff1dcad2006-11-20 18:07:29 -0800511 skb->csum_offset = offsetof(struct tcphdr, check);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700512 skb->ip_summed = CHECKSUM_PARTIAL;
Herbert Xua430a432006-07-08 13:34:56 -0700513 return 0;
514}
515
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516/*
517 * This routine will send an RST to the other tcp.
518 *
519 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
520 * for reset.
521 * Answer: if a packet caused RST, it is not for a socket
522 * existing in our system, if it is matched to a socket,
523 * it is just duplicate segment or bug in other side's TCP.
524 * So that we build reply only basing on parameters
525 * arrived with segment.
526 * Exception: precedence violation. We do not implement it in any case.
527 */
528
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800529static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700531 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800532 struct {
533 struct tcphdr th;
534#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800535 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800536#endif
537 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800539#ifdef CONFIG_TCP_MD5SIG
540 struct tcp_md5sig_key *key;
541#endif
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700542 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543
544 /* Never send a reset in response to a reset. */
545 if (th->rst)
546 return;
547
Eric Dumazetee6b9672008-03-05 18:30:47 -0800548 if (skb->rtable->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 return;
550
551 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800552 memset(&rep, 0, sizeof(rep));
553 rep.th.dest = th->source;
554 rep.th.source = th->dest;
555 rep.th.doff = sizeof(struct tcphdr) / 4;
556 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
558 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800559 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800561 rep.th.ack = 1;
562 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
563 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 }
565
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200566 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800567 arg.iov[0].iov_base = (unsigned char *)&rep;
568 arg.iov[0].iov_len = sizeof(rep.th);
569
570#ifdef CONFIG_TCP_MD5SIG
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700571 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800572 if (key) {
573 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
574 (TCPOPT_NOP << 16) |
575 (TCPOPT_MD5SIG << 8) |
576 TCPOLEN_MD5SIG);
577 /* Update length and the length the header thinks exists */
578 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
579 rep.th.doff = arg.iov[0].iov_len / 4;
580
Adam Langley49a72df2008-07-19 00:01:42 -0700581 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645cb2008-10-09 14:37:47 -0700582 key, ip_hdr(skb)->saddr,
583 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800584 }
585#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700586 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
587 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700588 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700590 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700592 net = dev_net(skb->dst->dev);
593 ip_send_reply(net->ipv4.tcp_sock, skb,
Denis V. Lunev7feb49c2008-04-03 14:32:00 -0700594 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700596 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
597 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598}
599
600/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
601 outside socket context is ugly, certainly. What can I do?
602 */
603
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900604static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
605 u32 win, u32 ts, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700606 struct tcp_md5sig_key *key,
607 int reply_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700609 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610 struct {
611 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800612 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800613#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800614 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800615#endif
616 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 } rep;
618 struct ip_reply_arg arg;
Vitaliy Gusev4dd79722008-10-01 01:51:39 -0700619 struct net *net = dev_net(skb->dst->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
621 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200622 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
624 arg.iov[0].iov_base = (unsigned char *)&rep;
625 arg.iov[0].iov_len = sizeof(rep.th);
626 if (ts) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800627 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
628 (TCPOPT_TIMESTAMP << 8) |
629 TCPOLEN_TIMESTAMP);
630 rep.opt[1] = htonl(tcp_time_stamp);
631 rep.opt[2] = htonl(ts);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800632 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 }
634
635 /* Swap the send and the receive. */
636 rep.th.dest = th->source;
637 rep.th.source = th->dest;
638 rep.th.doff = arg.iov[0].iov_len / 4;
639 rep.th.seq = htonl(seq);
640 rep.th.ack_seq = htonl(ack);
641 rep.th.ack = 1;
642 rep.th.window = htons(win);
643
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800644#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800645 if (key) {
646 int offset = (ts) ? 3 : 0;
647
648 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
649 (TCPOPT_NOP << 16) |
650 (TCPOPT_MD5SIG << 8) |
651 TCPOLEN_MD5SIG);
652 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
653 rep.th.doff = arg.iov[0].iov_len/4;
654
Adam Langley49a72df2008-07-19 00:01:42 -0700655 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700656 key, ip_hdr(skb)->saddr,
657 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800658 }
659#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700660 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700661 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
662 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 arg.iov[0].iov_len, IPPROTO_TCP, 0);
664 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900665 if (oif)
666 arg.bound_dev_if = oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700668 ip_send_reply(net->ipv4.tcp_sock, skb,
Denis V. Lunev7feb49c2008-04-03 14:32:00 -0700669 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700671 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672}
673
674static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
675{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700676 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800677 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900679 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200680 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900681 tcptw->tw_ts_recent,
682 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700683 tcp_twsk_md5_key(tcptw),
684 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900685 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700687 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688}
689
Gui Jianfeng6edafaa2008-08-06 23:50:04 -0700690static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200691 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692{
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900693 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800694 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900695 req->ts_recent,
696 0,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700697 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
698 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699}
700
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800702 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700703 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 * socket.
705 */
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800706static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
707 struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700709 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 int err = -1;
711 struct sk_buff * skb;
712
713 /* First, grab a route. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700714 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800715 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
717 skb = tcp_make_synack(sk, dst, req);
718
719 if (skb) {
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700720 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721
Frederik Deweerdtba7808e2007-02-04 20:15:27 -0800722 th->check = tcp_v4_check(skb->len,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700723 ireq->loc_addr,
724 ireq->rmt_addr,
Joe Perches07f07572008-11-19 15:44:53 -0800725 csum_partial(th, skb->len,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 skb->csum));
727
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700728 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
729 ireq->rmt_addr,
730 ireq->opt);
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200731 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
733
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 dst_release(dst);
735 return err;
736}
737
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800738static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
739{
740 return __tcp_v4_send_synack(sk, req, NULL);
741}
742
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700744 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700746static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747{
Jesper Juhla51482b2005-11-08 09:41:34 -0800748 kfree(inet_rsk(req)->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749}
750
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200751#ifdef CONFIG_SYN_COOKIES
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800752static void syn_flood_warning(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753{
754 static unsigned long warntime;
755
756 if (time_after(jiffies, (warntime + HZ * 60))) {
757 warntime = jiffies;
758 printk(KERN_INFO
759 "possible SYN flooding on port %d. Sending cookies.\n",
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700760 ntohs(tcp_hdr(skb)->dest));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 }
762}
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200763#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
765/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700766 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800768static struct ip_options *tcp_v4_save_options(struct sock *sk,
769 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770{
771 struct ip_options *opt = &(IPCB(skb)->opt);
772 struct ip_options *dopt = NULL;
773
774 if (opt && opt->optlen) {
775 int opt_size = optlength(opt);
776 dopt = kmalloc(opt_size, GFP_ATOMIC);
777 if (dopt) {
778 if (ip_options_echo(dopt, skb)) {
779 kfree(dopt);
780 dopt = NULL;
781 }
782 }
783 }
784 return dopt;
785}
786
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800787#ifdef CONFIG_TCP_MD5SIG
788/*
789 * RFC2385 MD5 checksumming requires a mapping of
790 * IP address->MD5 Key.
791 * We need to maintain these in the sk structure.
792 */
793
794/* Find the Key structure for an address. */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200795static struct tcp_md5sig_key *
796 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800797{
798 struct tcp_sock *tp = tcp_sk(sk);
799 int i;
800
801 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
802 return NULL;
803 for (i = 0; i < tp->md5sig_info->entries4; i++) {
804 if (tp->md5sig_info->keys4[i].addr == addr)
David S. Millerf8ab18d2007-09-28 15:18:35 -0700805 return &tp->md5sig_info->keys4[i].base;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800806 }
807 return NULL;
808}
809
810struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
811 struct sock *addr_sk)
812{
813 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
814}
815
816EXPORT_SYMBOL(tcp_v4_md5_lookup);
817
Adrian Bunkf5b99bc2006-11-30 17:22:29 -0800818static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
819 struct request_sock *req)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800820{
821 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
822}
823
824/* This can be called on a newly created socket, from other files */
825int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
826 u8 *newkey, u8 newkeylen)
827{
828 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700829 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800830 struct tcp_sock *tp = tcp_sk(sk);
831 struct tcp4_md5sig_key *keys;
832
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700833 key = tcp_v4_md5_do_lookup(sk, addr);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800834 if (key) {
835 /* Pre-existing entry - just update that one. */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700836 kfree(key->key);
837 key->key = newkey;
838 key->keylen = newkeylen;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800839 } else {
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200840 struct tcp_md5sig_info *md5sig;
841
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800842 if (!tp->md5sig_info) {
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200843 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
844 GFP_ATOMIC);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800845 if (!tp->md5sig_info) {
846 kfree(newkey);
847 return -ENOMEM;
848 }
David S. Miller3d7dbea2007-06-12 14:36:42 -0700849 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800850 }
851 if (tcp_alloc_md5sig_pool() == NULL) {
852 kfree(newkey);
853 return -ENOMEM;
854 }
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200855 md5sig = tp->md5sig_info;
856
857 if (md5sig->alloced4 == md5sig->entries4) {
858 keys = kmalloc((sizeof(*keys) *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900859 (md5sig->entries4 + 1)), GFP_ATOMIC);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800860 if (!keys) {
861 kfree(newkey);
862 tcp_free_md5sig_pool();
863 return -ENOMEM;
864 }
865
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200866 if (md5sig->entries4)
867 memcpy(keys, md5sig->keys4,
868 sizeof(*keys) * md5sig->entries4);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800869
870 /* Free old key list, and reference new one */
YOSHIFUJI Hideakia80cc202007-11-20 17:30:06 -0800871 kfree(md5sig->keys4);
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200872 md5sig->keys4 = keys;
873 md5sig->alloced4++;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800874 }
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200875 md5sig->entries4++;
David S. Millerf8ab18d2007-09-28 15:18:35 -0700876 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
877 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
878 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800879 }
880 return 0;
881}
882
883EXPORT_SYMBOL(tcp_v4_md5_do_add);
884
885static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
886 u8 *newkey, u8 newkeylen)
887{
888 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
889 newkey, newkeylen);
890}
891
892int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
893{
894 struct tcp_sock *tp = tcp_sk(sk);
895 int i;
896
897 for (i = 0; i < tp->md5sig_info->entries4; i++) {
898 if (tp->md5sig_info->keys4[i].addr == addr) {
899 /* Free the key */
David S. Millerf8ab18d2007-09-28 15:18:35 -0700900 kfree(tp->md5sig_info->keys4[i].base.key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800901 tp->md5sig_info->entries4--;
902
903 if (tp->md5sig_info->entries4 == 0) {
904 kfree(tp->md5sig_info->keys4);
905 tp->md5sig_info->keys4 = NULL;
Leigh Brown8228a18d2006-12-17 17:12:30 -0800906 tp->md5sig_info->alloced4 = 0;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200907 } else if (tp->md5sig_info->entries4 != i) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800908 /* Need to do some manipulation */
YOSHIFUJI Hideaki354faf02007-11-20 17:30:31 -0800909 memmove(&tp->md5sig_info->keys4[i],
910 &tp->md5sig_info->keys4[i+1],
911 (tp->md5sig_info->entries4 - i) *
912 sizeof(struct tcp4_md5sig_key));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800913 }
914 tcp_free_md5sig_pool();
915 return 0;
916 }
917 }
918 return -ENOENT;
919}
920
921EXPORT_SYMBOL(tcp_v4_md5_do_del);
922
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200923static void tcp_v4_clear_md5_list(struct sock *sk)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800924{
925 struct tcp_sock *tp = tcp_sk(sk);
926
927 /* Free each key, then the set of key keys,
928 * the crypto element, and then decrement our
929 * hold on the last resort crypto.
930 */
931 if (tp->md5sig_info->entries4) {
932 int i;
933 for (i = 0; i < tp->md5sig_info->entries4; i++)
David S. Millerf8ab18d2007-09-28 15:18:35 -0700934 kfree(tp->md5sig_info->keys4[i].base.key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800935 tp->md5sig_info->entries4 = 0;
936 tcp_free_md5sig_pool();
937 }
938 if (tp->md5sig_info->keys4) {
939 kfree(tp->md5sig_info->keys4);
940 tp->md5sig_info->keys4 = NULL;
941 tp->md5sig_info->alloced4 = 0;
942 }
943}
944
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200945static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
946 int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800947{
948 struct tcp_md5sig cmd;
949 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
950 u8 *newkey;
951
952 if (optlen < sizeof(cmd))
953 return -EINVAL;
954
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200955 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800956 return -EFAULT;
957
958 if (sin->sin_family != AF_INET)
959 return -EINVAL;
960
961 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
962 if (!tcp_sk(sk)->md5sig_info)
963 return -ENOENT;
964 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
965 }
966
967 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
968 return -EINVAL;
969
970 if (!tcp_sk(sk)->md5sig_info) {
971 struct tcp_sock *tp = tcp_sk(sk);
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200972 struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800973
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800974 if (!p)
975 return -EINVAL;
976
977 tp->md5sig_info = p;
David S. Miller3d7dbea2007-06-12 14:36:42 -0700978 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800979 }
980
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200981 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800982 if (!newkey)
983 return -ENOMEM;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800984 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
985 newkey, cmd.tcpm_keylen);
986}
987
Adam Langley49a72df2008-07-19 00:01:42 -0700988static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
989 __be32 daddr, __be32 saddr, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800990{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800991 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -0700992 struct scatterlist sg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800993
994 bp = &hp->md5_blk.ip4;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800995
996 /*
Adam Langley49a72df2008-07-19 00:01:42 -0700997 * 1. the TCP pseudo-header (in the order: source IP address,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800998 * destination IP address, zero-padded protocol number, and
999 * segment length)
1000 */
1001 bp->saddr = saddr;
1002 bp->daddr = daddr;
1003 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001004 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001005 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001006
Adam Langley49a72df2008-07-19 00:01:42 -07001007 sg_init_one(&sg, bp, sizeof(*bp));
1008 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1009}
1010
1011static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1012 __be32 daddr, __be32 saddr, struct tcphdr *th)
1013{
1014 struct tcp_md5sig_pool *hp;
1015 struct hash_desc *desc;
1016
1017 hp = tcp_get_md5sig_pool();
1018 if (!hp)
1019 goto clear_hash_noput;
1020 desc = &hp->md5_desc;
1021
1022 if (crypto_hash_init(desc))
1023 goto clear_hash;
1024 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1025 goto clear_hash;
1026 if (tcp_md5_hash_header(hp, th))
1027 goto clear_hash;
1028 if (tcp_md5_hash_key(hp, key))
1029 goto clear_hash;
1030 if (crypto_hash_final(desc, md5_hash))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001031 goto clear_hash;
1032
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001033 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001034 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001035
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001036clear_hash:
1037 tcp_put_md5sig_pool();
1038clear_hash_noput:
1039 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001040 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001041}
1042
Adam Langley49a72df2008-07-19 00:01:42 -07001043int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1044 struct sock *sk, struct request_sock *req,
1045 struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001046{
Adam Langley49a72df2008-07-19 00:01:42 -07001047 struct tcp_md5sig_pool *hp;
1048 struct hash_desc *desc;
1049 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001050 __be32 saddr, daddr;
1051
1052 if (sk) {
1053 saddr = inet_sk(sk)->saddr;
1054 daddr = inet_sk(sk)->daddr;
Adam Langley49a72df2008-07-19 00:01:42 -07001055 } else if (req) {
1056 saddr = inet_rsk(req)->loc_addr;
1057 daddr = inet_rsk(req)->rmt_addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001058 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001059 const struct iphdr *iph = ip_hdr(skb);
1060 saddr = iph->saddr;
1061 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001062 }
Adam Langley49a72df2008-07-19 00:01:42 -07001063
1064 hp = tcp_get_md5sig_pool();
1065 if (!hp)
1066 goto clear_hash_noput;
1067 desc = &hp->md5_desc;
1068
1069 if (crypto_hash_init(desc))
1070 goto clear_hash;
1071
1072 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1073 goto clear_hash;
1074 if (tcp_md5_hash_header(hp, th))
1075 goto clear_hash;
1076 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1077 goto clear_hash;
1078 if (tcp_md5_hash_key(hp, key))
1079 goto clear_hash;
1080 if (crypto_hash_final(desc, md5_hash))
1081 goto clear_hash;
1082
1083 tcp_put_md5sig_pool();
1084 return 0;
1085
1086clear_hash:
1087 tcp_put_md5sig_pool();
1088clear_hash_noput:
1089 memset(md5_hash, 0, 16);
1090 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001091}
1092
Adam Langley49a72df2008-07-19 00:01:42 -07001093EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001094
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001095static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001096{
1097 /*
1098 * This gets called for each TCP segment that arrives
1099 * so we want to be efficient.
1100 * We have 3 drop cases:
1101 * o No MD5 hash and one expected.
1102 * o MD5 hash and we're not expecting one.
1103 * o MD5 hash and its wrong.
1104 */
1105 __u8 *hash_location = NULL;
1106 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001107 const struct iphdr *iph = ip_hdr(skb);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001108 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001109 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001110 unsigned char newhash[16];
1111
1112 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001113 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001114
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001115 /* We've parsed the options - do we have a hash? */
1116 if (!hash_expected && !hash_location)
1117 return 0;
1118
1119 if (hash_expected && !hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001120 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001121 return 1;
1122 }
1123
1124 if (!hash_expected && hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001125 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001126 return 1;
1127 }
1128
1129 /* Okay, so this is hash_expected and hash_location -
1130 * so we need to calculate the checksum.
1131 */
Adam Langley49a72df2008-07-19 00:01:42 -07001132 genhash = tcp_v4_md5_hash_skb(newhash,
1133 hash_expected,
1134 NULL, NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001135
1136 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1137 if (net_ratelimit()) {
Harvey Harrison673d57e2008-10-31 00:53:57 -07001138 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1139 &iph->saddr, ntohs(th->source),
1140 &iph->daddr, ntohs(th->dest),
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001141 genhash ? " tcp_v4_calc_md5_hash failed" : "");
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001142 }
1143 return 1;
1144 }
1145 return 0;
1146}
1147
1148#endif
1149
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001150struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001152 .obj_size = sizeof(struct tcp_request_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 .rtx_syn_ack = tcp_v4_send_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001154 .send_ack = tcp_v4_reqsk_send_ack,
1155 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 .send_reset = tcp_v4_send_reset,
1157};
1158
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001159#ifdef CONFIG_TCP_MD5SIG
Andrew Mortonb6332e62006-11-30 19:16:28 -08001160static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001161 .md5_lookup = tcp_v4_reqsk_md5_lookup,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001162};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001163#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001164
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001165static struct timewait_sock_ops tcp_timewait_sock_ops = {
1166 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1167 .twsk_unique = tcp_twsk_unique,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001168 .twsk_destructor= tcp_twsk_destructor,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001169};
1170
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1172{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001173 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 struct tcp_options_received tmp_opt;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001175 struct request_sock *req;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001176 __be32 saddr = ip_hdr(skb)->saddr;
1177 __be32 daddr = ip_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 __u32 isn = TCP_SKB_CB(skb)->when;
1179 struct dst_entry *dst = NULL;
1180#ifdef CONFIG_SYN_COOKIES
1181 int want_cookie = 0;
1182#else
1183#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1184#endif
1185
1186 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazetee6b9672008-03-05 18:30:47 -08001187 if (skb->rtable->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 goto drop;
1189
1190 /* TW buckets are converted to open requests without
1191 * limitations, they conserve resources and peer is
1192 * evidently real one.
1193 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001194 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195#ifdef CONFIG_SYN_COOKIES
1196 if (sysctl_tcp_syncookies) {
1197 want_cookie = 1;
1198 } else
1199#endif
1200 goto drop;
1201 }
1202
1203 /* Accept backlog is full. If we have already queued enough
1204 * of warm entries in syn queue, drop request. It is better than
1205 * clogging syn queue with openreqs with exponentially increasing
1206 * timeout.
1207 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001208 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 goto drop;
1210
Arnaldo Carvalho de Meloce4a7d02008-06-10 12:39:35 -07001211 req = inet_reqsk_alloc(&tcp_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 if (!req)
1213 goto drop;
1214
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001215#ifdef CONFIG_TCP_MD5SIG
1216 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1217#endif
1218
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 tcp_clear_options(&tmp_opt);
1220 tmp_opt.mss_clamp = 536;
1221 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1222
1223 tcp_parse_options(skb, &tmp_opt, 0);
1224
Florian Westphal4dfc2812008-04-10 03:12:40 -07001225 if (want_cookie && !tmp_opt.saw_tstamp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 tcp_clear_options(&tmp_opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227
1228 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1229 /* Some OSes (unknown ones, but I see them on web server, which
1230 * contains information interesting only for windows'
1231 * users) do not send their stamp in SYN. It is easy case.
1232 * We simply do not advertise TS support.
1233 */
1234 tmp_opt.saw_tstamp = 0;
1235 tmp_opt.tstamp_ok = 0;
1236 }
1237 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1238
1239 tcp_openreq_init(req, &tmp_opt, skb);
1240
Venkat Yekkirala4237c752006-07-24 23:32:50 -07001241 if (security_inet_conn_request(sk, skb, req))
1242 goto drop_and_free;
1243
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001244 ireq = inet_rsk(req);
1245 ireq->loc_addr = daddr;
1246 ireq->rmt_addr = saddr;
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -07001247 ireq->no_srccheck = inet_sk(sk)->transparent;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001248 ireq->opt = tcp_v4_save_options(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 if (!want_cookie)
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001250 TCP_ECN_create_request(req, tcp_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251
1252 if (want_cookie) {
1253#ifdef CONFIG_SYN_COOKIES
1254 syn_flood_warning(skb);
Florian Westphal4dfc2812008-04-10 03:12:40 -07001255 req->cookie_ts = tmp_opt.tstamp_ok;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256#endif
1257 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1258 } else if (!isn) {
1259 struct inet_peer *peer = NULL;
1260
1261 /* VJ's idea. We save last timestamp seen
1262 * from the destination in peer table, when entering
1263 * state TIME-WAIT, and check against it before
1264 * accepting new connection request.
1265 *
1266 * If "isn" is not zero, this request hit alive
1267 * timewait bucket, so that all the necessary checks
1268 * are made in the function processing timewait state.
1269 */
1270 if (tmp_opt.saw_tstamp &&
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -07001271 tcp_death_row.sysctl_tw_recycle &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001272 (dst = inet_csk_route_req(sk, req)) != NULL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1274 peer->v4daddr == saddr) {
James Morris9d729f72007-03-04 16:12:44 -08001275 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276 (s32)(peer->tcp_ts - req->ts_recent) >
1277 TCP_PAWS_WINDOW) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001278 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001279 goto drop_and_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 }
1281 }
1282 /* Kill the following clause, if you dislike this way. */
1283 else if (!sysctl_tcp_syncookies &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001284 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 (sysctl_max_syn_backlog >> 2)) &&
1286 (!peer || !peer->tcp_ts_stamp) &&
1287 (!dst || !dst_metric(dst, RTAX_RTT))) {
1288 /* Without syncookies last quarter of
1289 * backlog is filled with destinations,
1290 * proven to be alive.
1291 * It means that we continue to communicate
1292 * to destinations, already remembered
1293 * to the moment of synflood.
1294 */
Harvey Harrison673d57e2008-10-31 00:53:57 -07001295 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1296 &saddr, ntohs(tcp_hdr(skb)->source));
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001297 goto drop_and_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 }
1299
Gerrit Renkera94f7232006-11-10 14:06:49 -08001300 isn = tcp_v4_init_sequence(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001302 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001304 if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 goto drop_and_free;
1306
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001307 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 return 0;
1309
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001310drop_and_release:
1311 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312drop_and_free:
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001313 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 return 0;
1316}
1317
1318
1319/*
1320 * The three way handshake has completed - we got a valid synack -
1321 * now create the new socket.
1322 */
1323struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001324 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 struct dst_entry *dst)
1326{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001327 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 struct inet_sock *newinet;
1329 struct tcp_sock *newtp;
1330 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001331#ifdef CONFIG_TCP_MD5SIG
1332 struct tcp_md5sig_key *key;
1333#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
1335 if (sk_acceptq_is_full(sk))
1336 goto exit_overflow;
1337
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001338 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 goto exit;
1340
1341 newsk = tcp_create_openreq_child(sk, req, skb);
1342 if (!newsk)
1343 goto exit;
1344
Herbert Xubcd76112006-06-30 13:36:35 -07001345 newsk->sk_gso_type = SKB_GSO_TCPV4;
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -07001346 sk_setup_caps(newsk, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347
1348 newtp = tcp_sk(newsk);
1349 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001350 ireq = inet_rsk(req);
1351 newinet->daddr = ireq->rmt_addr;
1352 newinet->rcv_saddr = ireq->loc_addr;
1353 newinet->saddr = ireq->loc_addr;
1354 newinet->opt = ireq->opt;
1355 ireq->opt = NULL;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001356 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001357 newinet->mc_ttl = ip_hdr(skb)->ttl;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001358 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 if (newinet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001360 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 newinet->id = newtp->write_seq ^ jiffies;
1362
John Heffner5d424d52006-03-20 17:53:41 -08001363 tcp_mtup_init(newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 tcp_sync_mss(newsk, dst_mtu(dst));
1365 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001366 if (tcp_sk(sk)->rx_opt.user_mss &&
1367 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1368 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1369
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370 tcp_initialize_rcv_mss(newsk);
1371
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001372#ifdef CONFIG_TCP_MD5SIG
1373 /* Copy over the MD5 key from the original socket */
1374 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1375 /*
1376 * We're using one, so create a matching key
1377 * on the newsk structure. If we fail to get
1378 * memory, then we end up not copying the key
1379 * across. Shucks.
1380 */
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -02001381 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1382 if (newkey != NULL)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001383 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1384 newkey, key->keylen);
Adam Langley49a72df2008-07-19 00:01:42 -07001385 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001386 }
1387#endif
1388
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001389 __inet_hash_nolisten(newsk);
1390 __inet_inherit_port(sk, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391
1392 return newsk;
1393
1394exit_overflow:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001395 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396exit:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001397 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 dst_release(dst);
1399 return NULL;
1400}
1401
1402static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1403{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001404 struct tcphdr *th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001405 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 struct sock *nsk;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001407 struct request_sock **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001409 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1410 iph->saddr, iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 if (req)
1412 return tcp_check_req(sk, skb, req, prev);
1413
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001414 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001415 th->source, iph->daddr, th->dest, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416
1417 if (nsk) {
1418 if (nsk->sk_state != TCP_TIME_WAIT) {
1419 bh_lock_sock(nsk);
1420 return nsk;
1421 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001422 inet_twsk_put(inet_twsk(nsk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 return NULL;
1424 }
1425
1426#ifdef CONFIG_SYN_COOKIES
1427 if (!th->rst && !th->syn && th->ack)
1428 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1429#endif
1430 return sk;
1431}
1432
Al Virob51655b2006-11-14 21:40:42 -08001433static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001435 const struct iphdr *iph = ip_hdr(skb);
1436
Patrick McHardy84fa7932006-08-29 16:44:56 -07001437 if (skb->ip_summed == CHECKSUM_COMPLETE) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001438 if (!tcp_v4_check(skb->len, iph->saddr,
1439 iph->daddr, skb->csum)) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001440 skb->ip_summed = CHECKSUM_UNNECESSARY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 return 0;
Herbert Xufb286bb2005-11-10 13:01:24 -08001442 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 }
Herbert Xufb286bb2005-11-10 13:01:24 -08001444
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001445 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
Herbert Xufb286bb2005-11-10 13:01:24 -08001446 skb->len, IPPROTO_TCP, 0);
1447
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 if (skb->len <= 76) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001449 return __skb_checksum_complete(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 }
1451 return 0;
1452}
1453
1454
1455/* The socket must have it's spinlock held when we get
1456 * here.
1457 *
1458 * We have a potential double-lock case here, so even when
1459 * doing backlog processing we use the BH locking scheme.
1460 * This is because we cannot sleep with the original spinlock
1461 * held.
1462 */
1463int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1464{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001465 struct sock *rsk;
1466#ifdef CONFIG_TCP_MD5SIG
1467 /*
1468 * We really want to reject the packet as early as possible
1469 * if:
1470 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1471 * o There is an MD5 option and we're not expecting one
1472 */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001473 if (tcp_v4_inbound_md5_hash(sk, skb))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001474 goto discard;
1475#endif
1476
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1478 TCP_CHECK_TIMER(sk);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001479 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001480 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001482 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 TCP_CHECK_TIMER(sk);
1484 return 0;
1485 }
1486
Arnaldo Carvalho de Meloab6a5bb2007-03-18 17:43:48 -07001487 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 goto csum_err;
1489
1490 if (sk->sk_state == TCP_LISTEN) {
1491 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1492 if (!nsk)
1493 goto discard;
1494
1495 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001496 if (tcp_child_process(sk, nsk, skb)) {
1497 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001499 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 return 0;
1501 }
1502 }
1503
1504 TCP_CHECK_TIMER(sk);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001505 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001506 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001508 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 TCP_CHECK_TIMER(sk);
1510 return 0;
1511
1512reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001513 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514discard:
1515 kfree_skb(skb);
1516 /* Be careful here. If this function gets more complicated and
1517 * gcc suffers from register pressure on the x86, sk (in %ebx)
1518 * might be destroyed here. This current version compiles correctly,
1519 * but you have been warned.
1520 */
1521 return 0;
1522
1523csum_err:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001524 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 goto discard;
1526}
1527
1528/*
1529 * From tcp_input.c
1530 */
1531
1532int tcp_v4_rcv(struct sk_buff *skb)
1533{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001534 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 struct tcphdr *th;
1536 struct sock *sk;
1537 int ret;
Pavel Emelyanova86b1e32008-07-16 20:20:58 -07001538 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539
1540 if (skb->pkt_type != PACKET_HOST)
1541 goto discard_it;
1542
1543 /* Count it even if it's bad */
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001544 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545
1546 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1547 goto discard_it;
1548
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001549 th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550
1551 if (th->doff < sizeof(struct tcphdr) / 4)
1552 goto bad_packet;
1553 if (!pskb_may_pull(skb, th->doff * 4))
1554 goto discard_it;
1555
1556 /* An explanation is required here, I think.
1557 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001558 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 * So, we defer the checks. */
Herbert Xu60476372007-04-09 11:59:39 -07001560 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 goto bad_packet;
1562
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001563 th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001564 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1566 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1567 skb->len - th->doff * 4);
1568 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1569 TCP_SKB_CB(skb)->when = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001570 TCP_SKB_CB(skb)->flags = iph->tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 TCP_SKB_CB(skb)->sacked = 0;
1572
Arnaldo Carvalho de Melo9a1f27c2008-10-07 11:41:57 -07001573 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 if (!sk)
1575 goto no_tcp_socket;
1576
1577process:
1578 if (sk->sk_state == TCP_TIME_WAIT)
1579 goto do_time_wait;
1580
1581 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1582 goto discard_and_relse;
Patrick McHardyb59c2702006-01-06 23:06:10 -08001583 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001585 if (sk_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 goto discard_and_relse;
1587
1588 skb->dev = NULL;
1589
Ingo Molnarc6366182006-07-03 00:25:13 -07001590 bh_lock_sock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 ret = 0;
1592 if (!sock_owned_by_user(sk)) {
Chris Leech1a2449a2006-05-23 18:05:53 -07001593#ifdef CONFIG_NET_DMA
1594 struct tcp_sock *tp = tcp_sk(sk);
1595 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1596 tp->ucopy.dma_chan = get_softnet_dma();
1597 if (tp->ucopy.dma_chan)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001599 else
1600#endif
1601 {
1602 if (!tcp_prequeue(sk, skb))
1603 ret = tcp_v4_do_rcv(sk, skb);
1604 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605 } else
1606 sk_add_backlog(sk, skb);
1607 bh_unlock_sock(sk);
1608
1609 sock_put(sk);
1610
1611 return ret;
1612
1613no_tcp_socket:
1614 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1615 goto discard_it;
1616
1617 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1618bad_packet:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001619 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001621 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622 }
1623
1624discard_it:
1625 /* Discard frame. */
1626 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001627 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628
1629discard_and_relse:
1630 sock_put(sk);
1631 goto discard_it;
1632
1633do_time_wait:
1634 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001635 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 goto discard_it;
1637 }
1638
1639 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001640 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001641 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 goto discard_it;
1643 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001644 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001646 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001647 &tcp_hashinfo,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001648 iph->daddr, th->dest,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001649 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 if (sk2) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001651 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1652 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 sk = sk2;
1654 goto process;
1655 }
1656 /* Fall through to ACK */
1657 }
1658 case TCP_TW_ACK:
1659 tcp_v4_timewait_ack(sk, skb);
1660 break;
1661 case TCP_TW_RST:
1662 goto no_tcp_socket;
1663 case TCP_TW_SUCCESS:;
1664 }
1665 goto discard_it;
1666}
1667
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668/* VJ's idea. Save last timestamp seen from this destination
1669 * and hold it at least for normal timewait interval to use for duplicate
1670 * segment detection in subsequent connections, before they enter synchronized
1671 * state.
1672 */
1673
1674int tcp_v4_remember_stamp(struct sock *sk)
1675{
1676 struct inet_sock *inet = inet_sk(sk);
1677 struct tcp_sock *tp = tcp_sk(sk);
1678 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1679 struct inet_peer *peer = NULL;
1680 int release_it = 0;
1681
1682 if (!rt || rt->rt_dst != inet->daddr) {
1683 peer = inet_getpeer(inet->daddr, 1);
1684 release_it = 1;
1685 } else {
1686 if (!rt->peer)
1687 rt_bind_peer(rt, 1);
1688 peer = rt->peer;
1689 }
1690
1691 if (peer) {
1692 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
James Morris9d729f72007-03-04 16:12:44 -08001693 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1695 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1696 peer->tcp_ts = tp->rx_opt.ts_recent;
1697 }
1698 if (release_it)
1699 inet_putpeer(peer);
1700 return 1;
1701 }
1702
1703 return 0;
1704}
1705
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001706int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001708 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709
1710 if (peer) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001711 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1712
1713 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
James Morris9d729f72007-03-04 16:12:44 -08001714 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001715 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1716 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1717 peer->tcp_ts = tcptw->tw_ts_recent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 }
1719 inet_putpeer(peer);
1720 return 1;
1721 }
1722
1723 return 0;
1724}
1725
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001726struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001727 .queue_xmit = ip_queue_xmit,
1728 .send_check = tcp_v4_send_check,
1729 .rebuild_header = inet_sk_rebuild_header,
1730 .conn_request = tcp_v4_conn_request,
1731 .syn_recv_sock = tcp_v4_syn_recv_sock,
1732 .remember_stamp = tcp_v4_remember_stamp,
1733 .net_header_len = sizeof(struct iphdr),
1734 .setsockopt = ip_setsockopt,
1735 .getsockopt = ip_getsockopt,
1736 .addr2sockaddr = inet_csk_addr2sockaddr,
1737 .sockaddr_len = sizeof(struct sockaddr_in),
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001738 .bind_conflict = inet_csk_bind_conflict,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001739#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001740 .compat_setsockopt = compat_ip_setsockopt,
1741 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001742#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743};
1744
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001745#ifdef CONFIG_TCP_MD5SIG
Andrew Mortonb6332e62006-11-30 19:16:28 -08001746static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001747 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07001748 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001749 .md5_add = tcp_v4_md5_add_func,
1750 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001751};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001752#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001753
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754/* NOTE: A lot of things set to zero explicitly by call to
1755 * sk_alloc() so need not be done here.
1756 */
1757static int tcp_v4_init_sock(struct sock *sk)
1758{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001759 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 struct tcp_sock *tp = tcp_sk(sk);
1761
1762 skb_queue_head_init(&tp->out_of_order_queue);
1763 tcp_init_xmit_timers(sk);
1764 tcp_prequeue_init(tp);
1765
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001766 icsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001767 tp->mdev = TCP_TIMEOUT_INIT;
1768
1769 /* So many TCP implementations out there (incorrectly) count the
1770 * initial SYN frame in their delayed-ACK and congestion control
1771 * algorithms that we must have the following bandaid to talk
1772 * efficiently to them. -DaveM
1773 */
1774 tp->snd_cwnd = 2;
1775
1776 /* See draft-stevens-tcpca-spec-01 for discussion of the
1777 * initialization of these values.
1778 */
1779 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1780 tp->snd_cwnd_clamp = ~0;
David S. Millerc1b4a7e2005-07-05 15:24:38 -07001781 tp->mss_cache = 536;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782
1783 tp->reordering = sysctl_tcp_reordering;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001784 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785
1786 sk->sk_state = TCP_CLOSE;
1787
1788 sk->sk_write_space = sk_stream_write_space;
1789 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1790
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001791 icsk->icsk_af_ops = &ipv4_specific;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001792 icsk->icsk_sync_mss = tcp_sync_mss;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001793#ifdef CONFIG_TCP_MD5SIG
1794 tp->af_specific = &tcp_sock_ipv4_specific;
1795#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796
1797 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1798 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1799
1800 atomic_inc(&tcp_sockets_allocated);
1801
1802 return 0;
1803}
1804
Brian Haley7d06b2e2008-06-14 17:04:49 -07001805void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806{
1807 struct tcp_sock *tp = tcp_sk(sk);
1808
1809 tcp_clear_xmit_timers(sk);
1810
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001811 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07001812
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08001814 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815
1816 /* Cleans up our, hopefully empty, out_of_order_queue. */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001817 __skb_queue_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001819#ifdef CONFIG_TCP_MD5SIG
1820 /* Clean up the MD5 key list, if any */
1821 if (tp->md5sig_info) {
1822 tcp_v4_clear_md5_list(sk);
1823 kfree(tp->md5sig_info);
1824 tp->md5sig_info = NULL;
1825 }
1826#endif
1827
Chris Leech1a2449a2006-05-23 18:05:53 -07001828#ifdef CONFIG_NET_DMA
1829 /* Cleans up our sk_async_wait_queue */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001830 __skb_queue_purge(&sk->sk_async_wait_queue);
Chris Leech1a2449a2006-05-23 18:05:53 -07001831#endif
1832
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 /* Clean prequeue, it must be empty really */
1834 __skb_queue_purge(&tp->ucopy.prequeue);
1835
1836 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001837 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001838 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839
1840 /*
1841 * If sendmsg cached page exists, toss it.
1842 */
1843 if (sk->sk_sndmsg_page) {
1844 __free_page(sk->sk_sndmsg_page);
1845 sk->sk_sndmsg_page = NULL;
1846 }
1847
1848 atomic_dec(&tcp_sockets_allocated);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849}
1850
1851EXPORT_SYMBOL(tcp_v4_destroy_sock);
1852
1853#ifdef CONFIG_PROC_FS
1854/* Proc filesystem TCP sock list dumping. */
1855
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001856static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001858 return hlist_nulls_empty(head) ? NULL :
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001859 list_entry(head->first, struct inet_timewait_sock, tw_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860}
1861
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001862static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001864 return !is_a_nulls(tw->tw_node.next) ?
1865 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866}
1867
1868static void *listening_get_next(struct seq_file *seq, void *cur)
1869{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001870 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 struct hlist_node *node;
1872 struct sock *sk = cur;
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001873 struct inet_listen_hashbucket *ilb;
Jianjun Kong5799de02008-11-03 02:49:10 -08001874 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001875 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876
1877 if (!sk) {
1878 st->bucket = 0;
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001879 ilb = &tcp_hashinfo.listening_hash[0];
1880 spin_lock_bh(&ilb->lock);
1881 sk = sk_head(&ilb->head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 goto get_sk;
1883 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001884 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 ++st->num;
1886
1887 if (st->state == TCP_SEQ_STATE_OPENREQ) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001888 struct request_sock *req = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001890 icsk = inet_csk(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 req = req->dl_next;
1892 while (1) {
1893 while (req) {
Daniel Lezcanobdccc4c2008-07-19 00:15:13 -07001894 if (req->rsk_ops->family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 cur = req;
1896 goto out;
1897 }
1898 req = req->dl_next;
1899 }
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001900 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 break;
1902get_req:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001903 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 }
1905 sk = sk_next(st->syn_wait_sk);
1906 st->state = TCP_SEQ_STATE_LISTENING;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001907 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 } else {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001909 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001910 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1911 if (reqsk_queue_len(&icsk->icsk_accept_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912 goto start_req;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001913 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 sk = sk_next(sk);
1915 }
1916get_sk:
1917 sk_for_each_from(sk, node) {
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09001918 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 cur = sk;
1920 goto out;
1921 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001922 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001923 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1924 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925start_req:
1926 st->uid = sock_i_uid(sk);
1927 st->syn_wait_sk = sk;
1928 st->state = TCP_SEQ_STATE_OPENREQ;
1929 st->sbucket = 0;
1930 goto get_req;
1931 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001932 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001934 spin_unlock_bh(&ilb->lock);
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07001935 if (++st->bucket < INET_LHTABLE_SIZE) {
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001936 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1937 spin_lock_bh(&ilb->lock);
1938 sk = sk_head(&ilb->head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 goto get_sk;
1940 }
1941 cur = NULL;
1942out:
1943 return cur;
1944}
1945
1946static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1947{
1948 void *rc = listening_get_next(seq, NULL);
1949
1950 while (rc && *pos) {
1951 rc = listening_get_next(seq, rc);
1952 --*pos;
1953 }
1954 return rc;
1955}
1956
Andi Kleen6eac5602008-08-28 01:08:02 -07001957static inline int empty_bucket(struct tcp_iter_state *st)
1958{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001959 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
1960 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
Andi Kleen6eac5602008-08-28 01:08:02 -07001961}
1962
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963static void *established_get_first(struct seq_file *seq)
1964{
Jianjun Kong5799de02008-11-03 02:49:10 -08001965 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001966 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 void *rc = NULL;
1968
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -07001969 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001971 struct hlist_nulls_node *node;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001972 struct inet_timewait_sock *tw;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08001973 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974
Andi Kleen6eac5602008-08-28 01:08:02 -07001975 /* Lockless fast path for the common case of empty buckets */
1976 if (empty_bucket(st))
1977 continue;
1978
Eric Dumazet9db66bd2008-11-20 20:39:09 -08001979 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001980 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Daniel Lezcanof40c8172008-03-21 04:13:54 -07001981 if (sk->sk_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09001982 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983 continue;
1984 }
1985 rc = sk;
1986 goto out;
1987 }
1988 st->state = TCP_SEQ_STATE_TIME_WAIT;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001989 inet_twsk_for_each(tw, node,
Eric Dumazetdbca9b2752007-02-08 14:16:46 -08001990 &tcp_hashinfo.ehash[st->bucket].twchain) {
Pavel Emelyanov28518fc2008-03-21 15:52:00 -07001991 if (tw->tw_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09001992 !net_eq(twsk_net(tw), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993 continue;
1994 }
1995 rc = tw;
1996 goto out;
1997 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08001998 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 st->state = TCP_SEQ_STATE_ESTABLISHED;
2000 }
2001out:
2002 return rc;
2003}
2004
2005static void *established_get_next(struct seq_file *seq, void *cur)
2006{
2007 struct sock *sk = cur;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002008 struct inet_timewait_sock *tw;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002009 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002010 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002011 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012
2013 ++st->num;
2014
2015 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2016 tw = cur;
2017 tw = tw_next(tw);
2018get_tw:
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002019 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020 tw = tw_next(tw);
2021 }
2022 if (tw) {
2023 cur = tw;
2024 goto out;
2025 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002026 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 st->state = TCP_SEQ_STATE_ESTABLISHED;
2028
Andi Kleen6eac5602008-08-28 01:08:02 -07002029 /* Look for next non empty bucket */
2030 while (++st->bucket < tcp_hashinfo.ehash_size &&
2031 empty_bucket(st))
2032 ;
2033 if (st->bucket >= tcp_hashinfo.ehash_size)
2034 return NULL;
2035
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002036 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002037 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038 } else
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002039 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002041 sk_nulls_for_each_from(sk, node) {
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002042 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 goto found;
2044 }
2045
2046 st->state = TCP_SEQ_STATE_TIME_WAIT;
Eric Dumazetdbca9b2752007-02-08 14:16:46 -08002047 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 goto get_tw;
2049found:
2050 cur = sk;
2051out:
2052 return cur;
2053}
2054
2055static void *established_get_idx(struct seq_file *seq, loff_t pos)
2056{
2057 void *rc = established_get_first(seq);
2058
2059 while (rc && pos) {
2060 rc = established_get_next(seq, rc);
2061 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002062 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063 return rc;
2064}
2065
2066static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2067{
2068 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002069 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071 st->state = TCP_SEQ_STATE_LISTENING;
2072 rc = listening_get_idx(seq, &pos);
2073
2074 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075 st->state = TCP_SEQ_STATE_ESTABLISHED;
2076 rc = established_get_idx(seq, pos);
2077 }
2078
2079 return rc;
2080}
2081
2082static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2083{
Jianjun Kong5799de02008-11-03 02:49:10 -08002084 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 st->state = TCP_SEQ_STATE_LISTENING;
2086 st->num = 0;
2087 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2088}
2089
2090static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2091{
2092 void *rc = NULL;
Jianjun Kong5799de02008-11-03 02:49:10 -08002093 struct tcp_iter_state *st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094
2095 if (v == SEQ_START_TOKEN) {
2096 rc = tcp_get_idx(seq, 0);
2097 goto out;
2098 }
2099 st = seq->private;
2100
2101 switch (st->state) {
2102 case TCP_SEQ_STATE_OPENREQ:
2103 case TCP_SEQ_STATE_LISTENING:
2104 rc = listening_get_next(seq, v);
2105 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 st->state = TCP_SEQ_STATE_ESTABLISHED;
2107 rc = established_get_first(seq);
2108 }
2109 break;
2110 case TCP_SEQ_STATE_ESTABLISHED:
2111 case TCP_SEQ_STATE_TIME_WAIT:
2112 rc = established_get_next(seq, v);
2113 break;
2114 }
2115out:
2116 ++*pos;
2117 return rc;
2118}
2119
2120static void tcp_seq_stop(struct seq_file *seq, void *v)
2121{
Jianjun Kong5799de02008-11-03 02:49:10 -08002122 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123
2124 switch (st->state) {
2125 case TCP_SEQ_STATE_OPENREQ:
2126 if (v) {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002127 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2128 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129 }
2130 case TCP_SEQ_STATE_LISTENING:
2131 if (v != SEQ_START_TOKEN)
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002132 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 break;
2134 case TCP_SEQ_STATE_TIME_WAIT:
2135 case TCP_SEQ_STATE_ESTABLISHED:
2136 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002137 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 break;
2139 }
2140}
2141
2142static int tcp_seq_open(struct inode *inode, struct file *file)
2143{
2144 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145 struct tcp_iter_state *s;
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002146 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002148 err = seq_open_net(inode, file, &afinfo->seq_ops,
2149 sizeof(struct tcp_iter_state));
2150 if (err < 0)
2151 return err;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002152
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002153 s = ((struct seq_file *)file->private_data)->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 s->family = afinfo->family;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002155 return 0;
2156}
2157
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002158int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159{
2160 int rc = 0;
2161 struct proc_dir_entry *p;
2162
Denis V. Lunev68fcadd2008-04-13 22:13:30 -07002163 afinfo->seq_fops.open = tcp_seq_open;
2164 afinfo->seq_fops.read = seq_read;
2165 afinfo->seq_fops.llseek = seq_lseek;
2166 afinfo->seq_fops.release = seq_release_net;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002167
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002168 afinfo->seq_ops.start = tcp_seq_start;
2169 afinfo->seq_ops.next = tcp_seq_next;
2170 afinfo->seq_ops.stop = tcp_seq_stop;
2171
Denis V. Lunev84841c32008-05-02 04:10:08 -07002172 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2173 &afinfo->seq_fops, afinfo);
2174 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175 rc = -ENOMEM;
2176 return rc;
2177}
2178
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002179void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180{
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002181 proc_net_remove(net, afinfo->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182}
2183
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002184static void get_openreq4(struct sock *sk, struct request_sock *req,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002185 struct seq_file *f, int i, int uid, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002187 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 int ttd = req->expires - jiffies;
2189
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002190 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2191 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 i,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002193 ireq->loc_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 ntohs(inet_sk(sk)->sport),
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002195 ireq->rmt_addr,
2196 ntohs(ireq->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 TCP_SYN_RECV,
2198 0, 0, /* could print option size, but that is af dependent. */
2199 1, /* timers active (only the expire timer) */
2200 jiffies_to_clock_t(ttd),
2201 req->retrans,
2202 uid,
2203 0, /* non standard timer */
2204 0, /* open_requests have no inode */
2205 atomic_read(&sk->sk_refcnt),
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002206 req,
2207 len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208}
2209
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002210static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211{
2212 int timer_active;
2213 unsigned long timer_expires;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002214 struct tcp_sock *tp = tcp_sk(sk);
2215 const struct inet_connection_sock *icsk = inet_csk(sk);
2216 struct inet_sock *inet = inet_sk(sk);
Al Viro714e85b2006-11-14 20:51:49 -08002217 __be32 dest = inet->daddr;
2218 __be32 src = inet->rcv_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 __u16 destp = ntohs(inet->dport);
2220 __u16 srcp = ntohs(inet->sport);
2221
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002222 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002224 timer_expires = icsk->icsk_timeout;
2225 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002227 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002228 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002230 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 } else {
2232 timer_active = 0;
2233 timer_expires = jiffies;
2234 }
2235
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002236 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Stephen Hemminger7be87352008-06-27 20:00:19 -07002237 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002238 i, src, srcp, dest, destp, sk->sk_state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002239 tp->write_seq - tp->snd_una,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002240 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002241 (tp->rcv_nxt - tp->copied_seq),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242 timer_active,
2243 jiffies_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002244 icsk->icsk_retransmits,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002245 sock_i_uid(sk),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002246 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002247 sock_i_ino(sk),
2248 atomic_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002249 jiffies_to_clock_t(icsk->icsk_rto),
2250 jiffies_to_clock_t(icsk->icsk_ack.ato),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002251 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 tp->snd_cwnd,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002253 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
2254 len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255}
2256
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002257static void get_timewait4_sock(struct inet_timewait_sock *tw,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002258 struct seq_file *f, int i, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259{
Al Viro23f33c22006-09-27 18:43:50 -07002260 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 __u16 destp, srcp;
2262 int ttd = tw->tw_ttd - jiffies;
2263
2264 if (ttd < 0)
2265 ttd = 0;
2266
2267 dest = tw->tw_daddr;
2268 src = tw->tw_rcv_saddr;
2269 destp = ntohs(tw->tw_dport);
2270 srcp = ntohs(tw->tw_sport);
2271
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002272 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2273 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2275 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002276 atomic_read(&tw->tw_refcnt), tw, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277}
2278
2279#define TMPSZ 150
2280
2281static int tcp4_seq_show(struct seq_file *seq, void *v)
2282{
Jianjun Kong5799de02008-11-03 02:49:10 -08002283 struct tcp_iter_state *st;
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002284 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285
2286 if (v == SEQ_START_TOKEN) {
2287 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2288 " sl local_address rem_address st tx_queue "
2289 "rx_queue tr tm->when retrnsmt uid timeout "
2290 "inode");
2291 goto out;
2292 }
2293 st = seq->private;
2294
2295 switch (st->state) {
2296 case TCP_SEQ_STATE_LISTENING:
2297 case TCP_SEQ_STATE_ESTABLISHED:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002298 get_tcp4_sock(v, seq, st->num, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 break;
2300 case TCP_SEQ_STATE_OPENREQ:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002301 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 break;
2303 case TCP_SEQ_STATE_TIME_WAIT:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002304 get_timewait4_sock(v, seq, st->num, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 break;
2306 }
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002307 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308out:
2309 return 0;
2310}
2311
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313 .name = "tcp",
2314 .family = AF_INET,
Denis V. Lunev5f4472c2008-04-13 22:13:53 -07002315 .seq_fops = {
2316 .owner = THIS_MODULE,
2317 },
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002318 .seq_ops = {
2319 .show = tcp4_seq_show,
2320 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321};
2322
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002323static int tcp4_proc_init_net(struct net *net)
2324{
2325 return tcp_proc_register(net, &tcp4_seq_afinfo);
2326}
2327
2328static void tcp4_proc_exit_net(struct net *net)
2329{
2330 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2331}
2332
2333static struct pernet_operations tcp4_net_ops = {
2334 .init = tcp4_proc_init_net,
2335 .exit = tcp4_proc_exit_net,
2336};
2337
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338int __init tcp4_proc_init(void)
2339{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002340 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341}
2342
2343void tcp4_proc_exit(void)
2344{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002345 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346}
2347#endif /* CONFIG_PROC_FS */
2348
2349struct proto tcp_prot = {
2350 .name = "TCP",
2351 .owner = THIS_MODULE,
2352 .close = tcp_close,
2353 .connect = tcp_v4_connect,
2354 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002355 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 .ioctl = tcp_ioctl,
2357 .init = tcp_v4_init_sock,
2358 .destroy = tcp_v4_destroy_sock,
2359 .shutdown = tcp_shutdown,
2360 .setsockopt = tcp_setsockopt,
2361 .getsockopt = tcp_getsockopt,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 .recvmsg = tcp_recvmsg,
2363 .backlog_rcv = tcp_v4_do_rcv,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002364 .hash = inet_hash,
2365 .unhash = inet_unhash,
2366 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 .enter_memory_pressure = tcp_enter_memory_pressure,
2368 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002369 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 .memory_allocated = &tcp_memory_allocated,
2371 .memory_pressure = &tcp_memory_pressure,
2372 .sysctl_mem = sysctl_tcp_mem,
2373 .sysctl_wmem = sysctl_tcp_wmem,
2374 .sysctl_rmem = sysctl_tcp_rmem,
2375 .max_header = MAX_TCP_HEADER,
2376 .obj_size = sizeof(struct tcp_sock),
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002377 .slab_flags = SLAB_DESTROY_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002378 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002379 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002380 .h.hashinfo = &tcp_hashinfo,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002381#ifdef CONFIG_COMPAT
2382 .compat_setsockopt = compat_tcp_setsockopt,
2383 .compat_getsockopt = compat_tcp_getsockopt,
2384#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385};
2386
Denis V. Lunev046ee902008-04-03 14:31:33 -07002387
2388static int __net_init tcp_sk_init(struct net *net)
2389{
2390 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2391 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2392}
2393
2394static void __net_exit tcp_sk_exit(struct net *net)
2395{
2396 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
Daniel Lezcanod3154922008-09-08 13:17:27 -07002397 inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002398}
2399
2400static struct pernet_operations __net_initdata tcp_sk_ops = {
2401 .init = tcp_sk_init,
2402 .exit = tcp_sk_exit,
2403};
2404
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002405void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406{
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002407 inet_hashinfo_init(&tcp_hashinfo);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002408 if (register_pernet_device(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410}
2411
2412EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415EXPORT_SYMBOL(tcp_v4_conn_request);
2416EXPORT_SYMBOL(tcp_v4_connect);
2417EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418EXPORT_SYMBOL(tcp_v4_remember_stamp);
2419EXPORT_SYMBOL(tcp_v4_send_check);
2420EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2421
2422#ifdef CONFIG_PROC_FS
2423EXPORT_SYMBOL(tcp_proc_register);
2424EXPORT_SYMBOL(tcp_proc_unregister);
2425#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426EXPORT_SYMBOL(sysctl_tcp_low_latency);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427