blob: a4a3390a52879c58bba1f95439e59065f832bae3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070037 * request_sock handling and moved
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -080040 * Added new listen semantics.
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
Herbert Xueb4dea52008-12-29 23:04:08 -080054#include <linux/bottom_half.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
63
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020064#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065#include <net/icmp.h>
Arnaldo Carvalho de Melo304a1612005-08-09 19:59:20 -070066#include <net/inet_hashtables.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <net/tcp.h>
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -030068#include <net/transp_v6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <net/ipv6.h>
70#include <net/inet_common.h>
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -080071#include <net/timewait_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <net/xfrm.h>
Chris Leech1a2449a2006-05-23 18:05:53 -070073#include <net/netdma.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070074
75#include <linux/inet.h>
76#include <linux/ipv6.h>
77#include <linux/stddef.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080081#include <linux/crypto.h>
82#include <linux/scatterlist.h>
83
Brian Haleyab32ea52006-09-22 14:15:41 -070084int sysctl_tcp_tw_reuse __read_mostly;
85int sysctl_tcp_low_latency __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
Linus Torvalds1da177e2005-04-16 15:20:36 -070087
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080088#ifdef CONFIG_TCP_MD5SIG
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -020089static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
90 __be32 addr);
Adam Langley49a72df2008-07-19 00:01:42 -070091static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
92 __be32 daddr, __be32 saddr, struct tcphdr *th);
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +090093#else
94static inline
95struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
96{
97 return NULL;
98}
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -080099#endif
100
Eric Dumazet5caea4e2008-11-20 00:40:07 -0800101struct inet_hashinfo tcp_hashinfo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102
Gerrit Renkera94f7232006-11-10 14:06:49 -0800103static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700105 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
106 ip_hdr(skb)->saddr,
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700107 tcp_hdr(skb)->dest,
108 tcp_hdr(skb)->source);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109}
110
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800111int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
112{
113 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
114 struct tcp_sock *tp = tcp_sk(sk);
115
116 /* With PAWS, it is safe from the viewpoint
117 of data integrity. Even without PAWS it is safe provided sequence
118 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
119
120 Actually, the idea is close to VJ's one, only timestamp cache is
121 held not per host, but per port pair and TW bucket is used as state
122 holder.
123
124 If TW bucket has been already destroyed we fall back to VJ's scheme
125 and use initial timestamp retrieved from peer table.
126 */
127 if (tcptw->tw_ts_recent_stamp &&
128 (twp == NULL || (sysctl_tcp_tw_reuse &&
James Morris9d729f72007-03-04 16:12:44 -0800129 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -0800130 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
131 if (tp->write_seq == 0)
132 tp->write_seq = 1;
133 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
134 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
135 sock_hold(sktw);
136 return 1;
137 }
138
139 return 0;
140}
141
142EXPORT_SYMBOL_GPL(tcp_twsk_unique);
143
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144/* This will initiate an outgoing connection. */
145int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
146{
147 struct inet_sock *inet = inet_sk(sk);
148 struct tcp_sock *tp = tcp_sk(sk);
149 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
150 struct rtable *rt;
Al Virobada8ad2006-09-26 21:27:15 -0700151 __be32 daddr, nexthop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 int tmp;
153 int err;
154
155 if (addr_len < sizeof(struct sockaddr_in))
156 return -EINVAL;
157
158 if (usin->sin_family != AF_INET)
159 return -EAFNOSUPPORT;
160
161 nexthop = daddr = usin->sin_addr.s_addr;
162 if (inet->opt && inet->opt->srr) {
163 if (!daddr)
164 return -EINVAL;
165 nexthop = inet->opt->faddr;
166 }
167
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000168 tmp = ip_route_connect(&rt, nexthop, inet->inet_saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
170 IPPROTO_TCP,
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000171 inet->inet_sport, usin->sin_port, sk, 1);
Wei Dong584bdf82007-05-31 22:49:28 -0700172 if (tmp < 0) {
173 if (tmp == -ENETUNREACH)
Pavel Emelyanov7c73a6f2008-07-16 20:20:11 -0700174 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 return tmp;
Wei Dong584bdf82007-05-31 22:49:28 -0700176 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
178 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
179 ip_rt_put(rt);
180 return -ENETUNREACH;
181 }
182
183 if (!inet->opt || !inet->opt->srr)
184 daddr = rt->rt_dst;
185
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000186 if (!inet->inet_saddr)
187 inet->inet_saddr = rt->rt_src;
188 inet->inet_rcv_saddr = inet->inet_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000190 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 /* Reset inherited state */
192 tp->rx_opt.ts_recent = 0;
193 tp->rx_opt.ts_recent_stamp = 0;
194 tp->write_seq = 0;
195 }
196
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700197 if (tcp_death_row.sysctl_tw_recycle &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
199 struct inet_peer *peer = rt_get_peer(rt);
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200200 /*
201 * VJ's idea. We save last timestamp seen from
202 * the destination in peer table, when entering state
203 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
204 * when trying new connection.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200206 if (peer != NULL &&
James Morris9d729f72007-03-04 16:12:44 -0800207 peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
209 tp->rx_opt.ts_recent = peer->tcp_ts;
210 }
211 }
212
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000213 inet->inet_dport = usin->sin_port;
214 inet->inet_daddr = daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800216 inet_csk(sk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 if (inet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800218 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219
220 tp->rx_opt.mss_clamp = 536;
221
222 /* Socket identity is still unknown (sport may be zero).
223 * However we set state to SYN-SENT and not releasing socket
224 * lock select source port, enter ourselves into the hash tables and
225 * complete initialization after this.
226 */
227 tcp_set_state(sk, TCP_SYN_SENT);
Arnaldo Carvalho de Meloa7f5e7f2005-12-13 23:25:31 -0800228 err = inet_hash_connect(&tcp_death_row, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 if (err)
230 goto failure;
231
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200232 err = ip_route_newports(&rt, IPPROTO_TCP,
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000233 inet->inet_sport, inet->inet_dport, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 if (err)
235 goto failure;
236
237 /* OK, now commit destination to socket. */
Herbert Xubcd76112006-06-30 13:36:35 -0700238 sk->sk_gso_type = SKB_GSO_TCPV4;
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -0700239 sk_setup_caps(sk, &rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240
241 if (!tp->write_seq)
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000242 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
243 inet->inet_daddr,
244 inet->inet_sport,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 usin->sin_port);
246
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000247 inet->inet_id = tp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248
249 err = tcp_connect(sk);
250 rt = NULL;
251 if (err)
252 goto failure;
253
254 return 0;
255
256failure:
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200257 /*
258 * This unhashes the socket and releases the local port,
259 * if necessary.
260 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 tcp_set_state(sk, TCP_CLOSE);
262 ip_rt_put(rt);
263 sk->sk_route_caps = 0;
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000264 inet->inet_dport = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 return err;
266}
267
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268/*
269 * This routine does path mtu discovery as defined in RFC1191.
270 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800271static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272{
273 struct dst_entry *dst;
274 struct inet_sock *inet = inet_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275
276 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
277 * send out by Linux are always <576bytes so they should go through
278 * unfragmented).
279 */
280 if (sk->sk_state == TCP_LISTEN)
281 return;
282
283 /* We don't check in the destentry if pmtu discovery is forbidden
284 * on this route. We just assume that no packet_to_big packets
285 * are send back when pmtu discovery is not active.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900286 * There is a small race when the user changes this flag in the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 * route, but I think that's acceptable.
288 */
289 if ((dst = __sk_dst_check(sk, 0)) == NULL)
290 return;
291
292 dst->ops->update_pmtu(dst, mtu);
293
294 /* Something is about to be wrong... Remember soft error
295 * for the case, if this connection will not able to recover.
296 */
297 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
298 sk->sk_err_soft = EMSGSIZE;
299
300 mtu = dst_mtu(dst);
301
302 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -0800303 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 tcp_sync_mss(sk, mtu);
305
306 /* Resend the TCP packet because it's
307 * clear that the old packet has been
308 * dropped. This is the new "fast" path mtu
309 * discovery.
310 */
311 tcp_simple_retransmit(sk);
312 } /* else let the usual retransmit timer handle it */
313}
314
315/*
316 * This routine is called by the ICMP module when it gets some
317 * sort of error condition. If err < 0 then the socket should
318 * be closed and the error returned to the user. If err > 0
319 * it's just the icmp type << 8 | icmp code. After adjustment
320 * header points to the first 8 bytes of the tcp header. We need
321 * to find the appropriate port.
322 *
323 * The locking strategy used here is very "optimistic". When
324 * someone else accesses the socket the ICMP is just dropped
325 * and for some paths there is no check at all.
326 * A more general error queue to queue errors for later handling
327 * is probably better.
328 *
329 */
330
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000331void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000333 struct iphdr *iph = (struct iphdr *)icmp_skb->data;
334 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000335 struct inet_connection_sock *icsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 struct tcp_sock *tp;
337 struct inet_sock *inet;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000338 const int type = icmp_hdr(icmp_skb)->type;
339 const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 struct sock *sk;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000341 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 __u32 seq;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000343 __u32 remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 int err;
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000345 struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000347 if (icmp_skb->len < (iph->ihl << 2) + 8) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700348 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 return;
350 }
351
Pavel Emelyanovfd54d712008-07-14 23:01:40 -0700352 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
Damian Lukowski4d1a2d92009-08-26 00:16:27 +0000353 iph->saddr, th->source, inet_iif(icmp_skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 if (!sk) {
Pavel Emelyanovdcfc23c2008-07-14 23:03:00 -0700355 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 return;
357 }
358 if (sk->sk_state == TCP_TIME_WAIT) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -0700359 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 return;
361 }
362
363 bh_lock_sock(sk);
364 /* If too many ICMPs get dropped on busy
365 * servers this needs to be solved differently.
366 */
367 if (sock_owned_by_user(sk))
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700368 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369
370 if (sk->sk_state == TCP_CLOSE)
371 goto out;
372
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000373 icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 tp = tcp_sk(sk);
375 seq = ntohl(th->seq);
376 if (sk->sk_state != TCP_LISTEN &&
377 !between(seq, tp->snd_una, tp->snd_nxt)) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700378 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 goto out;
380 }
381
382 switch (type) {
383 case ICMP_SOURCE_QUENCH:
384 /* Just silently ignore these. */
385 goto out;
386 case ICMP_PARAMETERPROB:
387 err = EPROTO;
388 break;
389 case ICMP_DEST_UNREACH:
390 if (code > NR_ICMP_UNREACH)
391 goto out;
392
393 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
394 if (!sock_owned_by_user(sk))
395 do_pmtu_discovery(sk, iph, info);
396 goto out;
397 }
398
399 err = icmp_err_convert[code].errno;
Damian Lukowskif1ecd5d2009-08-26 00:16:31 +0000400 /* check if icmp_skb allows revert of backoff
401 * (see draft-zimmermann-tcp-lcd) */
402 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
403 break;
404 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
405 !icsk->icsk_backoff)
406 break;
407
408 icsk->icsk_backoff--;
409 inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
410 icsk->icsk_backoff;
411 tcp_bound_rto(sk);
412
413 skb = tcp_write_queue_head(sk);
414 BUG_ON(!skb);
415
416 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
417 tcp_time_stamp - TCP_SKB_CB(skb)->when);
418
419 if (remaining) {
420 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
421 remaining, TCP_RTO_MAX);
422 } else if (sock_owned_by_user(sk)) {
423 /* RTO revert clocked out retransmission,
424 * but socket is locked. Will defer. */
425 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
426 HZ/20, TCP_RTO_MAX);
427 } else {
428 /* RTO revert clocked out retransmission.
429 * Will retransmit now */
430 tcp_retransmit_timer(sk);
431 }
432
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 break;
434 case ICMP_TIME_EXCEEDED:
435 err = EHOSTUNREACH;
436 break;
437 default:
438 goto out;
439 }
440
441 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700442 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 case TCP_LISTEN:
444 if (sock_owned_by_user(sk))
445 goto out;
446
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700447 req = inet_csk_search_req(sk, &prev, th->dest,
448 iph->daddr, iph->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 if (!req)
450 goto out;
451
452 /* ICMPs are not backlogged, hence we cannot get
453 an established socket here.
454 */
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700455 WARN_ON(req->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700457 if (seq != tcp_rsk(req)->snt_isn) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -0700458 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 goto out;
460 }
461
462 /*
463 * Still in SYN_RECV, just remove it silently.
464 * There is no good way to pass the error to the newly
465 * created socket, and POSIX does not want network
466 * errors returned from accept().
467 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700468 inet_csk_reqsk_queue_drop(sk, req, prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 goto out;
470
471 case TCP_SYN_SENT:
472 case TCP_SYN_RECV: /* Cannot happen.
473 It can f.e. if SYNs crossed.
474 */
475 if (!sock_owned_by_user(sk)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 sk->sk_err = err;
477
478 sk->sk_error_report(sk);
479
480 tcp_done(sk);
481 } else {
482 sk->sk_err_soft = err;
483 }
484 goto out;
485 }
486
487 /* If we've already connected we will keep trying
488 * until we time out, or the user gives up.
489 *
490 * rfc1122 4.2.3.9 allows to consider as hard errors
491 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
492 * but it is obsoleted by pmtu discovery).
493 *
494 * Note, that in modern internet, where routing is unreliable
495 * and in each dark corner broken firewalls sit, sending random
496 * errors ordered by their masters even this two messages finally lose
497 * their original sense (even Linux sends invalid PORT_UNREACHs)
498 *
499 * Now we are in compliance with RFCs.
500 * --ANK (980905)
501 */
502
503 inet = inet_sk(sk);
504 if (!sock_owned_by_user(sk) && inet->recverr) {
505 sk->sk_err = err;
506 sk->sk_error_report(sk);
507 } else { /* Only an error on timeout */
508 sk->sk_err_soft = err;
509 }
510
511out:
512 bh_unlock_sock(sk);
513 sock_put(sk);
514}
515
516/* This routine computes an IPv4 TCP checksum. */
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -0800517void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518{
519 struct inet_sock *inet = inet_sk(sk);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700520 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521
Patrick McHardy84fa7932006-08-29 16:44:56 -0700522 if (skb->ip_summed == CHECKSUM_PARTIAL) {
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000523 th->check = ~tcp_v4_check(len, inet->inet_saddr,
524 inet->inet_daddr, 0);
Herbert Xu663ead32007-04-09 11:59:07 -0700525 skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viroff1dcad2006-11-20 18:07:29 -0800526 skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 } else {
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000528 th->check = tcp_v4_check(len, inet->inet_saddr,
529 inet->inet_daddr,
Joe Perches07f07572008-11-19 15:44:53 -0800530 csum_partial(th,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 th->doff << 2,
532 skb->csum));
533 }
534}
535
Herbert Xua430a432006-07-08 13:34:56 -0700536int tcp_v4_gso_send_check(struct sk_buff *skb)
537{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700538 const struct iphdr *iph;
Herbert Xua430a432006-07-08 13:34:56 -0700539 struct tcphdr *th;
540
541 if (!pskb_may_pull(skb, sizeof(*th)))
542 return -EINVAL;
543
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700544 iph = ip_hdr(skb);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700545 th = tcp_hdr(skb);
Herbert Xua430a432006-07-08 13:34:56 -0700546
547 th->check = 0;
Frederik Deweerdtba7808e2007-02-04 20:15:27 -0800548 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
Herbert Xu663ead32007-04-09 11:59:07 -0700549 skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viroff1dcad2006-11-20 18:07:29 -0800550 skb->csum_offset = offsetof(struct tcphdr, check);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700551 skb->ip_summed = CHECKSUM_PARTIAL;
Herbert Xua430a432006-07-08 13:34:56 -0700552 return 0;
553}
554
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555/*
556 * This routine will send an RST to the other tcp.
557 *
558 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
559 * for reset.
560 * Answer: if a packet caused RST, it is not for a socket
561 * existing in our system, if it is matched to a socket,
562 * it is just duplicate segment or bug in other side's TCP.
563 * So that we build reply only basing on parameters
564 * arrived with segment.
565 * Exception: precedence violation. We do not implement it in any case.
566 */
567
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800568static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700570 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800571 struct {
572 struct tcphdr th;
573#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800574 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800575#endif
576 } rep;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 struct ip_reply_arg arg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800578#ifdef CONFIG_TCP_MD5SIG
579 struct tcp_md5sig_key *key;
580#endif
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700581 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582
583 /* Never send a reset in response to a reset. */
584 if (th->rst)
585 return;
586
Eric Dumazet511c3f92009-06-02 05:14:27 +0000587 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 return;
589
590 /* Swap the send and the receive. */
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800591 memset(&rep, 0, sizeof(rep));
592 rep.th.dest = th->source;
593 rep.th.source = th->dest;
594 rep.th.doff = sizeof(struct tcphdr) / 4;
595 rep.th.rst = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
597 if (th->ack) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800598 rep.th.seq = th->ack_seq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800600 rep.th.ack = 1;
601 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
602 skb->len - (th->doff << 2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 }
604
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200605 memset(&arg, 0, sizeof(arg));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800606 arg.iov[0].iov_base = (unsigned char *)&rep;
607 arg.iov[0].iov_len = sizeof(rep.th);
608
609#ifdef CONFIG_TCP_MD5SIG
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700610 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800611 if (key) {
612 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
613 (TCPOPT_NOP << 16) |
614 (TCPOPT_MD5SIG << 8) |
615 TCPOLEN_MD5SIG);
616 /* Update length and the length the header thinks exists */
617 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
618 rep.th.doff = arg.iov[0].iov_len / 4;
619
Adam Langley49a72df2008-07-19 00:01:42 -0700620 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
Ilpo Järvinen78e645c2008-10-09 14:37:47 -0700621 key, ip_hdr(skb)->saddr,
622 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800623 }
624#endif
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700625 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
626 ip_hdr(skb)->saddr, /* XXX */
Ilpo Järvinen52cd5752008-10-08 11:34:06 -0700627 arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700629 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
Eric Dumazetadf30902009-06-02 05:19:30 +0000631 net = dev_net(skb_dst(skb)->dev);
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700632 ip_send_reply(net->ipv4.tcp_sock, skb,
Denis V. Lunev7feb49c2008-04-03 14:32:00 -0700633 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700635 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
636 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637}
638
639/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
640 outside socket context is ugly, certainly. What can I do?
641 */
642
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900643static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
644 u32 win, u32 ts, int oif,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700645 struct tcp_md5sig_key *key,
646 int reply_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700648 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 struct {
650 struct tcphdr th;
Al Viro714e85b2006-11-14 20:51:49 -0800651 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800652#ifdef CONFIG_TCP_MD5SIG
Al Viro714e85b2006-11-14 20:51:49 -0800653 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800654#endif
655 ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 } rep;
657 struct ip_reply_arg arg;
Eric Dumazetadf30902009-06-02 05:19:30 +0000658 struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659
660 memset(&rep.th, 0, sizeof(struct tcphdr));
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200661 memset(&arg, 0, sizeof(arg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
663 arg.iov[0].iov_base = (unsigned char *)&rep;
664 arg.iov[0].iov_len = sizeof(rep.th);
665 if (ts) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800666 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
667 (TCPOPT_TIMESTAMP << 8) |
668 TCPOLEN_TIMESTAMP);
669 rep.opt[1] = htonl(tcp_time_stamp);
670 rep.opt[2] = htonl(ts);
Craig Schlentercb48cfe2007-01-09 00:11:15 -0800671 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 }
673
674 /* Swap the send and the receive. */
675 rep.th.dest = th->source;
676 rep.th.source = th->dest;
677 rep.th.doff = arg.iov[0].iov_len / 4;
678 rep.th.seq = htonl(seq);
679 rep.th.ack_seq = htonl(ack);
680 rep.th.ack = 1;
681 rep.th.window = htons(win);
682
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800683#ifdef CONFIG_TCP_MD5SIG
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800684 if (key) {
685 int offset = (ts) ? 3 : 0;
686
687 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
688 (TCPOPT_NOP << 16) |
689 (TCPOPT_MD5SIG << 8) |
690 TCPOLEN_MD5SIG);
691 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
692 rep.th.doff = arg.iov[0].iov_len/4;
693
Adam Langley49a72df2008-07-19 00:01:42 -0700694 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
Adam Langley90b7e112008-07-31 20:49:48 -0700695 key, ip_hdr(skb)->saddr,
696 ip_hdr(skb)->daddr, &rep.th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800697 }
698#endif
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700699 arg.flags = reply_flags;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700700 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
701 ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 arg.iov[0].iov_len, IPPROTO_TCP, 0);
703 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900704 if (oif)
705 arg.bound_dev_if = oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
Pavel Emelyanova86b1e32008-07-16 20:20:58 -0700707 ip_send_reply(net->ipv4.tcp_sock, skb,
Denis V. Lunev7feb49c2008-04-03 14:32:00 -0700708 &arg, arg.iov[0].iov_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709
Pavel Emelyanov63231bd2008-07-16 20:22:25 -0700710 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711}
712
713static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
714{
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700715 struct inet_timewait_sock *tw = inet_twsk(sk);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800716 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900718 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200719 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900720 tcptw->tw_ts_recent,
721 tw->tw_bound_dev_if,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700722 tcp_twsk_md5_key(tcptw),
723 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900724 );
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700726 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727}
728
Gui Jianfeng6edafaa2008-08-06 23:50:04 -0700729static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200730 struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731{
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900732 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800733 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
YOSHIFUJI Hideaki9501f972008-04-18 12:45:16 +0900734 req->ts_recent,
735 0,
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -0700736 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
737 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738}
739
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740/*
Kris Katterjohn9bf1d832008-02-17 22:29:19 -0800741 * Send a SYN-ACK after having received a SYN.
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700742 * This still operates on a request_sock only, not on a big
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 * socket.
744 */
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800745static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
746 struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700748 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 int err = -1;
750 struct sk_buff * skb;
751
752 /* First, grab a route. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700753 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800754 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755
756 skb = tcp_make_synack(sk, dst, req);
757
758 if (skb) {
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700759 struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760
Frederik Deweerdtba7808e2007-02-04 20:15:27 -0800761 th->check = tcp_v4_check(skb->len,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700762 ireq->loc_addr,
763 ireq->rmt_addr,
Joe Perches07f07572008-11-19 15:44:53 -0800764 csum_partial(th, skb->len,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 skb->csum));
766
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700767 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
768 ireq->rmt_addr,
769 ireq->opt);
Gerrit Renkerb9df3cb2006-11-14 11:21:36 -0200770 err = net_xmit_eval(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 }
772
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 dst_release(dst);
774 return err;
775}
776
Denis V. Lunevfd80eb92008-02-29 11:43:03 -0800777static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
778{
779 return __tcp_v4_send_synack(sk, req, NULL);
780}
781
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700783 * IPv4 request_sock destructor.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 */
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700785static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786{
Jesper Juhla51482b2005-11-08 09:41:34 -0800787 kfree(inet_rsk(req)->opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788}
789
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200790#ifdef CONFIG_SYN_COOKIES
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800791static void syn_flood_warning(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792{
793 static unsigned long warntime;
794
795 if (time_after(jiffies, (warntime + HZ * 60))) {
796 warntime = jiffies;
797 printk(KERN_INFO
798 "possible SYN flooding on port %d. Sending cookies.\n",
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -0700799 ntohs(tcp_hdr(skb)->dest));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 }
801}
Arnaldo Carvalho de Melo80e40da2006-01-04 01:58:06 -0200802#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803
804/*
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700805 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 */
Stephen Hemminger40efc6f2006-01-03 16:03:49 -0800807static struct ip_options *tcp_v4_save_options(struct sock *sk,
808 struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809{
810 struct ip_options *opt = &(IPCB(skb)->opt);
811 struct ip_options *dopt = NULL;
812
813 if (opt && opt->optlen) {
814 int opt_size = optlength(opt);
815 dopt = kmalloc(opt_size, GFP_ATOMIC);
816 if (dopt) {
817 if (ip_options_echo(dopt, skb)) {
818 kfree(dopt);
819 dopt = NULL;
820 }
821 }
822 }
823 return dopt;
824}
825
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800826#ifdef CONFIG_TCP_MD5SIG
827/*
828 * RFC2385 MD5 checksumming requires a mapping of
829 * IP address->MD5 Key.
830 * We need to maintain these in the sk structure.
831 */
832
833/* Find the Key structure for an address. */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200834static struct tcp_md5sig_key *
835 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800836{
837 struct tcp_sock *tp = tcp_sk(sk);
838 int i;
839
840 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
841 return NULL;
842 for (i = 0; i < tp->md5sig_info->entries4; i++) {
843 if (tp->md5sig_info->keys4[i].addr == addr)
David S. Millerf8ab18d2007-09-28 15:18:35 -0700844 return &tp->md5sig_info->keys4[i].base;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800845 }
846 return NULL;
847}
848
849struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
850 struct sock *addr_sk)
851{
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000852 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800853}
854
855EXPORT_SYMBOL(tcp_v4_md5_lookup);
856
Adrian Bunkf5b99bc2006-11-30 17:22:29 -0800857static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
858 struct request_sock *req)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800859{
860 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
861}
862
863/* This can be called on a newly created socket, from other files */
864int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
865 u8 *newkey, u8 newkeylen)
866{
867 /* Add Key to the list */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700868 struct tcp_md5sig_key *key;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800869 struct tcp_sock *tp = tcp_sk(sk);
870 struct tcp4_md5sig_key *keys;
871
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700872 key = tcp_v4_md5_do_lookup(sk, addr);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800873 if (key) {
874 /* Pre-existing entry - just update that one. */
Matthias M. Dellwegb0a713e2007-10-29 20:55:27 -0700875 kfree(key->key);
876 key->key = newkey;
877 key->keylen = newkeylen;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800878 } else {
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200879 struct tcp_md5sig_info *md5sig;
880
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800881 if (!tp->md5sig_info) {
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200882 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
883 GFP_ATOMIC);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800884 if (!tp->md5sig_info) {
885 kfree(newkey);
886 return -ENOMEM;
887 }
David S. Miller3d7dbea2007-06-12 14:36:42 -0700888 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800889 }
Wu Fengguangaa133072009-09-02 23:45:45 -0700890 if (tcp_alloc_md5sig_pool(sk) == NULL) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800891 kfree(newkey);
892 return -ENOMEM;
893 }
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200894 md5sig = tp->md5sig_info;
895
896 if (md5sig->alloced4 == md5sig->entries4) {
897 keys = kmalloc((sizeof(*keys) *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900898 (md5sig->entries4 + 1)), GFP_ATOMIC);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800899 if (!keys) {
900 kfree(newkey);
901 tcp_free_md5sig_pool();
902 return -ENOMEM;
903 }
904
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200905 if (md5sig->entries4)
906 memcpy(keys, md5sig->keys4,
907 sizeof(*keys) * md5sig->entries4);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800908
909 /* Free old key list, and reference new one */
YOSHIFUJI Hideakia80cc202007-11-20 17:30:06 -0800910 kfree(md5sig->keys4);
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200911 md5sig->keys4 = keys;
912 md5sig->alloced4++;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800913 }
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -0200914 md5sig->entries4++;
David S. Millerf8ab18d2007-09-28 15:18:35 -0700915 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
916 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
917 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800918 }
919 return 0;
920}
921
922EXPORT_SYMBOL(tcp_v4_md5_do_add);
923
924static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
925 u8 *newkey, u8 newkeylen)
926{
Eric Dumazetc720c7e2009-10-15 06:30:45 +0000927 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800928 newkey, newkeylen);
929}
930
931int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
932{
933 struct tcp_sock *tp = tcp_sk(sk);
934 int i;
935
936 for (i = 0; i < tp->md5sig_info->entries4; i++) {
937 if (tp->md5sig_info->keys4[i].addr == addr) {
938 /* Free the key */
David S. Millerf8ab18d2007-09-28 15:18:35 -0700939 kfree(tp->md5sig_info->keys4[i].base.key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800940 tp->md5sig_info->entries4--;
941
942 if (tp->md5sig_info->entries4 == 0) {
943 kfree(tp->md5sig_info->keys4);
944 tp->md5sig_info->keys4 = NULL;
Leigh Brown8228a18d2006-12-17 17:12:30 -0800945 tp->md5sig_info->alloced4 = 0;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200946 } else if (tp->md5sig_info->entries4 != i) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800947 /* Need to do some manipulation */
YOSHIFUJI Hideaki354faf02007-11-20 17:30:31 -0800948 memmove(&tp->md5sig_info->keys4[i],
949 &tp->md5sig_info->keys4[i+1],
950 (tp->md5sig_info->entries4 - i) *
951 sizeof(struct tcp4_md5sig_key));
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800952 }
953 tcp_free_md5sig_pool();
954 return 0;
955 }
956 }
957 return -ENOENT;
958}
959
960EXPORT_SYMBOL(tcp_v4_md5_do_del);
961
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200962static void tcp_v4_clear_md5_list(struct sock *sk)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800963{
964 struct tcp_sock *tp = tcp_sk(sk);
965
966 /* Free each key, then the set of key keys,
967 * the crypto element, and then decrement our
968 * hold on the last resort crypto.
969 */
970 if (tp->md5sig_info->entries4) {
971 int i;
972 for (i = 0; i < tp->md5sig_info->entries4; i++)
David S. Millerf8ab18d2007-09-28 15:18:35 -0700973 kfree(tp->md5sig_info->keys4[i].base.key);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800974 tp->md5sig_info->entries4 = 0;
975 tcp_free_md5sig_pool();
976 }
977 if (tp->md5sig_info->keys4) {
978 kfree(tp->md5sig_info->keys4);
979 tp->md5sig_info->keys4 = NULL;
980 tp->md5sig_info->alloced4 = 0;
981 }
982}
983
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200984static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
985 int optlen)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800986{
987 struct tcp_md5sig cmd;
988 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
989 u8 *newkey;
990
991 if (optlen < sizeof(cmd))
992 return -EINVAL;
993
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -0200994 if (copy_from_user(&cmd, optval, sizeof(cmd)))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -0800995 return -EFAULT;
996
997 if (sin->sin_family != AF_INET)
998 return -EINVAL;
999
1000 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1001 if (!tcp_sk(sk)->md5sig_info)
1002 return -ENOENT;
1003 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1004 }
1005
1006 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1007 return -EINVAL;
1008
1009 if (!tcp_sk(sk)->md5sig_info) {
1010 struct tcp_sock *tp = tcp_sk(sk);
Wu Fengguangaa133072009-09-02 23:45:45 -07001011 struct tcp_md5sig_info *p;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001012
Wu Fengguangaa133072009-09-02 23:45:45 -07001013 p = kzalloc(sizeof(*p), sk->sk_allocation);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001014 if (!p)
1015 return -EINVAL;
1016
1017 tp->md5sig_info = p;
David S. Miller3d7dbea2007-06-12 14:36:42 -07001018 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001019 }
1020
Wu Fengguangaa133072009-09-02 23:45:45 -07001021 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001022 if (!newkey)
1023 return -ENOMEM;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001024 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1025 newkey, cmd.tcpm_keylen);
1026}
1027
Adam Langley49a72df2008-07-19 00:01:42 -07001028static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1029 __be32 daddr, __be32 saddr, int nbytes)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001030{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001031 struct tcp4_pseudohdr *bp;
Adam Langley49a72df2008-07-19 00:01:42 -07001032 struct scatterlist sg;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001033
1034 bp = &hp->md5_blk.ip4;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001035
1036 /*
Adam Langley49a72df2008-07-19 00:01:42 -07001037 * 1. the TCP pseudo-header (in the order: source IP address,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001038 * destination IP address, zero-padded protocol number, and
1039 * segment length)
1040 */
1041 bp->saddr = saddr;
1042 bp->daddr = daddr;
1043 bp->pad = 0;
YOSHIFUJI Hideaki076fb722008-04-17 12:48:12 +09001044 bp->protocol = IPPROTO_TCP;
Adam Langley49a72df2008-07-19 00:01:42 -07001045 bp->len = cpu_to_be16(nbytes);
David S. Millerc7da57a2007-10-26 00:41:21 -07001046
Adam Langley49a72df2008-07-19 00:01:42 -07001047 sg_init_one(&sg, bp, sizeof(*bp));
1048 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1049}
1050
1051static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
1052 __be32 daddr, __be32 saddr, struct tcphdr *th)
1053{
1054 struct tcp_md5sig_pool *hp;
1055 struct hash_desc *desc;
1056
1057 hp = tcp_get_md5sig_pool();
1058 if (!hp)
1059 goto clear_hash_noput;
1060 desc = &hp->md5_desc;
1061
1062 if (crypto_hash_init(desc))
1063 goto clear_hash;
1064 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1065 goto clear_hash;
1066 if (tcp_md5_hash_header(hp, th))
1067 goto clear_hash;
1068 if (tcp_md5_hash_key(hp, key))
1069 goto clear_hash;
1070 if (crypto_hash_final(desc, md5_hash))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001071 goto clear_hash;
1072
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001073 tcp_put_md5sig_pool();
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001074 return 0;
Adam Langley49a72df2008-07-19 00:01:42 -07001075
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001076clear_hash:
1077 tcp_put_md5sig_pool();
1078clear_hash_noput:
1079 memset(md5_hash, 0, 16);
Adam Langley49a72df2008-07-19 00:01:42 -07001080 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001081}
1082
Adam Langley49a72df2008-07-19 00:01:42 -07001083int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
1084 struct sock *sk, struct request_sock *req,
1085 struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001086{
Adam Langley49a72df2008-07-19 00:01:42 -07001087 struct tcp_md5sig_pool *hp;
1088 struct hash_desc *desc;
1089 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001090 __be32 saddr, daddr;
1091
1092 if (sk) {
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001093 saddr = inet_sk(sk)->inet_saddr;
1094 daddr = inet_sk(sk)->inet_daddr;
Adam Langley49a72df2008-07-19 00:01:42 -07001095 } else if (req) {
1096 saddr = inet_rsk(req)->loc_addr;
1097 daddr = inet_rsk(req)->rmt_addr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001098 } else {
Adam Langley49a72df2008-07-19 00:01:42 -07001099 const struct iphdr *iph = ip_hdr(skb);
1100 saddr = iph->saddr;
1101 daddr = iph->daddr;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001102 }
Adam Langley49a72df2008-07-19 00:01:42 -07001103
1104 hp = tcp_get_md5sig_pool();
1105 if (!hp)
1106 goto clear_hash_noput;
1107 desc = &hp->md5_desc;
1108
1109 if (crypto_hash_init(desc))
1110 goto clear_hash;
1111
1112 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1113 goto clear_hash;
1114 if (tcp_md5_hash_header(hp, th))
1115 goto clear_hash;
1116 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1117 goto clear_hash;
1118 if (tcp_md5_hash_key(hp, key))
1119 goto clear_hash;
1120 if (crypto_hash_final(desc, md5_hash))
1121 goto clear_hash;
1122
1123 tcp_put_md5sig_pool();
1124 return 0;
1125
1126clear_hash:
1127 tcp_put_md5sig_pool();
1128clear_hash_noput:
1129 memset(md5_hash, 0, 16);
1130 return 1;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001131}
1132
Adam Langley49a72df2008-07-19 00:01:42 -07001133EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001134
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001135static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001136{
1137 /*
1138 * This gets called for each TCP segment that arrives
1139 * so we want to be efficient.
1140 * We have 3 drop cases:
1141 * o No MD5 hash and one expected.
1142 * o MD5 hash and we're not expecting one.
1143 * o MD5 hash and its wrong.
1144 */
1145 __u8 *hash_location = NULL;
1146 struct tcp_md5sig_key *hash_expected;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001147 const struct iphdr *iph = ip_hdr(skb);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001148 struct tcphdr *th = tcp_hdr(skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001149 int genhash;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001150 unsigned char newhash[16];
1151
1152 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
YOSHIFUJI Hideaki7d5d5522008-04-17 12:29:53 +09001153 hash_location = tcp_parse_md5sig_option(th);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001154
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001155 /* We've parsed the options - do we have a hash? */
1156 if (!hash_expected && !hash_location)
1157 return 0;
1158
1159 if (hash_expected && !hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001160 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001161 return 1;
1162 }
1163
1164 if (!hash_expected && hash_location) {
David S. Miller785957d2008-07-30 03:03:15 -07001165 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001166 return 1;
1167 }
1168
1169 /* Okay, so this is hash_expected and hash_location -
1170 * so we need to calculate the checksum.
1171 */
Adam Langley49a72df2008-07-19 00:01:42 -07001172 genhash = tcp_v4_md5_hash_skb(newhash,
1173 hash_expected,
1174 NULL, NULL, skb);
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001175
1176 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1177 if (net_ratelimit()) {
Harvey Harrison673d57e2008-10-31 00:53:57 -07001178 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1179 &iph->saddr, ntohs(th->source),
1180 &iph->daddr, ntohs(th->dest),
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001181 genhash ? " tcp_v4_calc_md5_hash failed" : "");
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001182 }
1183 return 1;
1184 }
1185 return 0;
1186}
1187
1188#endif
1189
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001190struct request_sock_ops tcp_request_sock_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 .family = PF_INET,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001192 .obj_size = sizeof(struct tcp_request_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 .rtx_syn_ack = tcp_v4_send_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001194 .send_ack = tcp_v4_reqsk_send_ack,
1195 .destructor = tcp_v4_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 .send_reset = tcp_v4_send_reset,
1197};
1198
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001199#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001200static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001201 .md5_lookup = tcp_v4_reqsk_md5_lookup,
John Dykstrae3afe7b2009-07-16 05:04:51 +00001202 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001203};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001204#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001205
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001206static struct timewait_sock_ops tcp_timewait_sock_ops = {
1207 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1208 .twsk_unique = tcp_twsk_unique,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001209 .twsk_destructor= tcp_twsk_destructor,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001210};
1211
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1213{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001214 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 struct tcp_options_received tmp_opt;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001216 struct request_sock *req;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001217 __be32 saddr = ip_hdr(skb)->saddr;
1218 __be32 daddr = ip_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 __u32 isn = TCP_SKB_CB(skb)->when;
1220 struct dst_entry *dst = NULL;
1221#ifdef CONFIG_SYN_COOKIES
1222 int want_cookie = 0;
1223#else
1224#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1225#endif
1226
1227 /* Never answer to SYNs send to broadcast or multicast */
Eric Dumazet511c3f92009-06-02 05:14:27 +00001228 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 goto drop;
1230
1231 /* TW buckets are converted to open requests without
1232 * limitations, they conserve resources and peer is
1233 * evidently real one.
1234 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001235 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236#ifdef CONFIG_SYN_COOKIES
1237 if (sysctl_tcp_syncookies) {
1238 want_cookie = 1;
1239 } else
1240#endif
1241 goto drop;
1242 }
1243
1244 /* Accept backlog is full. If we have already queued enough
1245 * of warm entries in syn queue, drop request. It is better than
1246 * clogging syn queue with openreqs with exponentially increasing
1247 * timeout.
1248 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001249 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 goto drop;
1251
Arnaldo Carvalho de Meloce4a7d02008-06-10 12:39:35 -07001252 req = inet_reqsk_alloc(&tcp_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 if (!req)
1254 goto drop;
1255
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001256#ifdef CONFIG_TCP_MD5SIG
1257 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1258#endif
1259
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 tcp_clear_options(&tmp_opt);
1261 tmp_opt.mss_clamp = 536;
1262 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1263
1264 tcp_parse_options(skb, &tmp_opt, 0);
1265
Florian Westphal4dfc2812008-04-10 03:12:40 -07001266 if (want_cookie && !tmp_opt.saw_tstamp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 tcp_clear_options(&tmp_opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1270
1271 tcp_openreq_init(req, &tmp_opt, skb);
1272
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001273 ireq = inet_rsk(req);
1274 ireq->loc_addr = daddr;
1275 ireq->rmt_addr = saddr;
KOVACS Krisztian88ef4a52008-10-01 07:41:00 -07001276 ireq->no_srccheck = inet_sk(sk)->transparent;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001277 ireq->opt = tcp_v4_save_options(sk, skb);
Paul Moore284904a2009-03-27 17:10:28 -04001278
1279 if (security_inet_conn_request(sk, skb, req))
1280 goto drop_and_free;
1281
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282 if (!want_cookie)
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001283 TCP_ECN_create_request(req, tcp_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
1285 if (want_cookie) {
1286#ifdef CONFIG_SYN_COOKIES
1287 syn_flood_warning(skb);
Florian Westphal4dfc2812008-04-10 03:12:40 -07001288 req->cookie_ts = tmp_opt.tstamp_ok;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289#endif
1290 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1291 } else if (!isn) {
1292 struct inet_peer *peer = NULL;
1293
1294 /* VJ's idea. We save last timestamp seen
1295 * from the destination in peer table, when entering
1296 * state TIME-WAIT, and check against it before
1297 * accepting new connection request.
1298 *
1299 * If "isn" is not zero, this request hit alive
1300 * timewait bucket, so that all the necessary checks
1301 * are made in the function processing timewait state.
1302 */
1303 if (tmp_opt.saw_tstamp &&
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -07001304 tcp_death_row.sysctl_tw_recycle &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001305 (dst = inet_csk_route_req(sk, req)) != NULL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1307 peer->v4daddr == saddr) {
James Morris9d729f72007-03-04 16:12:44 -08001308 if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 (s32)(peer->tcp_ts - req->ts_recent) >
1310 TCP_PAWS_WINDOW) {
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001311 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001312 goto drop_and_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 }
1314 }
1315 /* Kill the following clause, if you dislike this way. */
1316 else if (!sysctl_tcp_syncookies &&
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001317 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 (sysctl_max_syn_backlog >> 2)) &&
1319 (!peer || !peer->tcp_ts_stamp) &&
1320 (!dst || !dst_metric(dst, RTAX_RTT))) {
1321 /* Without syncookies last quarter of
1322 * backlog is filled with destinations,
1323 * proven to be alive.
1324 * It means that we continue to communicate
1325 * to destinations, already remembered
1326 * to the moment of synflood.
1327 */
Harvey Harrison673d57e2008-10-31 00:53:57 -07001328 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1329 &saddr, ntohs(tcp_hdr(skb)->source));
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001330 goto drop_and_release;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 }
1332
Gerrit Renkera94f7232006-11-10 14:06:49 -08001333 isn = tcp_v4_init_sequence(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001335 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001337 if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 goto drop_and_free;
1339
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001340 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 return 0;
1342
Denis V. Lunev7cd04fa2008-03-03 11:59:32 -08001343drop_and_release:
1344 dst_release(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345drop_and_free:
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001346 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 return 0;
1349}
1350
1351
1352/*
1353 * The three way handshake has completed - we got a valid synack -
1354 * now create the new socket.
1355 */
1356struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001357 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 struct dst_entry *dst)
1359{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001360 struct inet_request_sock *ireq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 struct inet_sock *newinet;
1362 struct tcp_sock *newtp;
1363 struct sock *newsk;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001364#ifdef CONFIG_TCP_MD5SIG
1365 struct tcp_md5sig_key *key;
1366#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
1368 if (sk_acceptq_is_full(sk))
1369 goto exit_overflow;
1370
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001371 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 goto exit;
1373
1374 newsk = tcp_create_openreq_child(sk, req, skb);
1375 if (!newsk)
1376 goto exit;
1377
Herbert Xubcd76112006-06-30 13:36:35 -07001378 newsk->sk_gso_type = SKB_GSO_TCPV4;
Arnaldo Carvalho de Melo6cbb0df2005-08-09 19:49:02 -07001379 sk_setup_caps(newsk, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380
1381 newtp = tcp_sk(newsk);
1382 newinet = inet_sk(newsk);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001383 ireq = inet_rsk(req);
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001384 newinet->inet_daddr = ireq->rmt_addr;
1385 newinet->inet_rcv_saddr = ireq->loc_addr;
1386 newinet->inet_saddr = ireq->loc_addr;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001387 newinet->opt = ireq->opt;
1388 ireq->opt = NULL;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001389 newinet->mc_index = inet_iif(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001390 newinet->mc_ttl = ip_hdr(skb)->ttl;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001391 inet_csk(newsk)->icsk_ext_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 if (newinet->opt)
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001393 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001394 newinet->inet_id = newtp->write_seq ^ jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395
John Heffner5d424d52006-03-20 17:53:41 -08001396 tcp_mtup_init(newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 tcp_sync_mss(newsk, dst_mtu(dst));
1398 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
Tom Quetchenbachf5fff5d2008-09-21 00:21:51 -07001399 if (tcp_sk(sk)->rx_opt.user_mss &&
1400 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1401 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1402
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 tcp_initialize_rcv_mss(newsk);
1404
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001405#ifdef CONFIG_TCP_MD5SIG
1406 /* Copy over the MD5 key from the original socket */
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001407 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1408 if (key != NULL) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001409 /*
1410 * We're using one, so create a matching key
1411 * on the newsk structure. If we fail to get
1412 * memory, then we end up not copying the key
1413 * across. Shucks.
1414 */
Arnaldo Carvalho de Melof6685932006-11-17 11:06:01 -02001415 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1416 if (newkey != NULL)
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001417 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001418 newkey, key->keylen);
Adam Langley49a72df2008-07-19 00:01:42 -07001419 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001420 }
1421#endif
1422
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001423 __inet_hash_nolisten(newsk);
1424 __inet_inherit_port(sk, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425
1426 return newsk;
1427
1428exit_overflow:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001429 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430exit:
Pavel Emelyanovde0744a2008-07-16 20:31:16 -07001431 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 dst_release(dst);
1433 return NULL;
1434}
1435
1436static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1437{
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001438 struct tcphdr *th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001439 const struct iphdr *iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 struct sock *nsk;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001441 struct request_sock **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001443 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1444 iph->saddr, iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 if (req)
1446 return tcp_check_req(sk, skb, req, prev);
1447
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001448 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001449 th->source, iph->daddr, th->dest, inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450
1451 if (nsk) {
1452 if (nsk->sk_state != TCP_TIME_WAIT) {
1453 bh_lock_sock(nsk);
1454 return nsk;
1455 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001456 inet_twsk_put(inet_twsk(nsk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 return NULL;
1458 }
1459
1460#ifdef CONFIG_SYN_COOKIES
1461 if (!th->rst && !th->syn && th->ack)
1462 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1463#endif
1464 return sk;
1465}
1466
Al Virob51655b2006-11-14 21:40:42 -08001467static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001469 const struct iphdr *iph = ip_hdr(skb);
1470
Patrick McHardy84fa7932006-08-29 16:44:56 -07001471 if (skb->ip_summed == CHECKSUM_COMPLETE) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001472 if (!tcp_v4_check(skb->len, iph->saddr,
1473 iph->daddr, skb->csum)) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001474 skb->ip_summed = CHECKSUM_UNNECESSARY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 return 0;
Herbert Xufb286bb2005-11-10 13:01:24 -08001476 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 }
Herbert Xufb286bb2005-11-10 13:01:24 -08001478
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001479 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
Herbert Xufb286bb2005-11-10 13:01:24 -08001480 skb->len, IPPROTO_TCP, 0);
1481
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 if (skb->len <= 76) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001483 return __skb_checksum_complete(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 }
1485 return 0;
1486}
1487
1488
1489/* The socket must have it's spinlock held when we get
1490 * here.
1491 *
1492 * We have a potential double-lock case here, so even when
1493 * doing backlog processing we use the BH locking scheme.
1494 * This is because we cannot sleep with the original spinlock
1495 * held.
1496 */
1497int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1498{
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001499 struct sock *rsk;
1500#ifdef CONFIG_TCP_MD5SIG
1501 /*
1502 * We really want to reject the packet as early as possible
1503 * if:
1504 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1505 * o There is an MD5 option and we're not expecting one
1506 */
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02001507 if (tcp_v4_inbound_md5_hash(sk, skb))
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001508 goto discard;
1509#endif
1510
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1512 TCP_CHECK_TIMER(sk);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001513 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001514 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001516 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 TCP_CHECK_TIMER(sk);
1518 return 0;
1519 }
1520
Arnaldo Carvalho de Meloab6a5bb2007-03-18 17:43:48 -07001521 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 goto csum_err;
1523
1524 if (sk->sk_state == TCP_LISTEN) {
1525 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1526 if (!nsk)
1527 goto discard;
1528
1529 if (nsk != sk) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001530 if (tcp_child_process(sk, nsk, skb)) {
1531 rsk = nsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001533 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 return 0;
1535 }
1536 }
1537
1538 TCP_CHECK_TIMER(sk);
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001539 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001540 rsk = sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 goto reset;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001542 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 TCP_CHECK_TIMER(sk);
1544 return 0;
1545
1546reset:
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001547 tcp_v4_send_reset(rsk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548discard:
1549 kfree_skb(skb);
1550 /* Be careful here. If this function gets more complicated and
1551 * gcc suffers from register pressure on the x86, sk (in %ebx)
1552 * might be destroyed here. This current version compiles correctly,
1553 * but you have been warned.
1554 */
1555 return 0;
1556
1557csum_err:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001558 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 goto discard;
1560}
1561
1562/*
1563 * From tcp_input.c
1564 */
1565
1566int tcp_v4_rcv(struct sk_buff *skb)
1567{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001568 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 struct tcphdr *th;
1570 struct sock *sk;
1571 int ret;
Pavel Emelyanova86b1e32008-07-16 20:20:58 -07001572 struct net *net = dev_net(skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573
1574 if (skb->pkt_type != PACKET_HOST)
1575 goto discard_it;
1576
1577 /* Count it even if it's bad */
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001578 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579
1580 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1581 goto discard_it;
1582
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001583 th = tcp_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584
1585 if (th->doff < sizeof(struct tcphdr) / 4)
1586 goto bad_packet;
1587 if (!pskb_may_pull(skb, th->doff * 4))
1588 goto discard_it;
1589
1590 /* An explanation is required here, I think.
1591 * Packet length and doff are validated by header prediction,
Stephen Hemmingercaa20d9a2005-11-10 17:13:47 -08001592 * provided case of th->doff==0 is eliminated.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 * So, we defer the checks. */
Herbert Xu60476372007-04-09 11:59:39 -07001594 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 goto bad_packet;
1596
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001597 th = tcp_hdr(skb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001598 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1600 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1601 skb->len - th->doff * 4);
1602 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1603 TCP_SKB_CB(skb)->when = 0;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001604 TCP_SKB_CB(skb)->flags = iph->tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605 TCP_SKB_CB(skb)->sacked = 0;
1606
Arnaldo Carvalho de Melo9a1f27c2008-10-07 11:41:57 -07001607 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 if (!sk)
1609 goto no_tcp_socket;
1610
1611process:
1612 if (sk->sk_state == TCP_TIME_WAIT)
1613 goto do_time_wait;
1614
1615 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1616 goto discard_and_relse;
Patrick McHardyb59c2702006-01-06 23:06:10 -08001617 nf_reset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001619 if (sk_filter(sk, skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 goto discard_and_relse;
1621
1622 skb->dev = NULL;
1623
Ingo Molnarc6366182006-07-03 00:25:13 -07001624 bh_lock_sock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 ret = 0;
1626 if (!sock_owned_by_user(sk)) {
Chris Leech1a2449a2006-05-23 18:05:53 -07001627#ifdef CONFIG_NET_DMA
1628 struct tcp_sock *tp = tcp_sk(sk);
1629 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
Dan Williamsf67b4592009-01-06 11:38:15 -07001630 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
Chris Leech1a2449a2006-05-23 18:05:53 -07001631 if (tp->ucopy.dma_chan)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001633 else
1634#endif
1635 {
1636 if (!tcp_prequeue(sk, skb))
Shan Weiae8d7f82009-05-05 01:01:29 +00001637 ret = tcp_v4_do_rcv(sk, skb);
Chris Leech1a2449a2006-05-23 18:05:53 -07001638 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 } else
1640 sk_add_backlog(sk, skb);
1641 bh_unlock_sock(sk);
1642
1643 sock_put(sk);
1644
1645 return ret;
1646
1647no_tcp_socket:
1648 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1649 goto discard_it;
1650
1651 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1652bad_packet:
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001653 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 } else {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001655 tcp_v4_send_reset(NULL, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656 }
1657
1658discard_it:
1659 /* Discard frame. */
1660 kfree_skb(skb);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001661 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662
1663discard_and_relse:
1664 sock_put(sk);
1665 goto discard_it;
1666
1667do_time_wait:
1668 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001669 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 goto discard_it;
1671 }
1672
1673 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
Pavel Emelyanov63231bd2008-07-16 20:22:25 -07001674 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001675 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 goto discard_it;
1677 }
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001678 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 case TCP_TW_SYN: {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001680 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
Pavel Emelyanovc67499c2008-01-31 05:06:40 -08001681 &tcp_hashinfo,
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001682 iph->daddr, th->dest,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001683 inet_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 if (sk2) {
YOSHIFUJI Hideaki9469c7b2006-10-10 19:41:46 -07001685 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1686 inet_twsk_put(inet_twsk(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 sk = sk2;
1688 goto process;
1689 }
1690 /* Fall through to ACK */
1691 }
1692 case TCP_TW_ACK:
1693 tcp_v4_timewait_ack(sk, skb);
1694 break;
1695 case TCP_TW_RST:
1696 goto no_tcp_socket;
1697 case TCP_TW_SUCCESS:;
1698 }
1699 goto discard_it;
1700}
1701
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702/* VJ's idea. Save last timestamp seen from this destination
1703 * and hold it at least for normal timewait interval to use for duplicate
1704 * segment detection in subsequent connections, before they enter synchronized
1705 * state.
1706 */
1707
1708int tcp_v4_remember_stamp(struct sock *sk)
1709{
1710 struct inet_sock *inet = inet_sk(sk);
1711 struct tcp_sock *tp = tcp_sk(sk);
1712 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1713 struct inet_peer *peer = NULL;
1714 int release_it = 0;
1715
Eric Dumazetc720c7e2009-10-15 06:30:45 +00001716 if (!rt || rt->rt_dst != inet->inet_daddr) {
1717 peer = inet_getpeer(inet->inet_daddr, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 release_it = 1;
1719 } else {
1720 if (!rt->peer)
1721 rt_bind_peer(rt, 1);
1722 peer = rt->peer;
1723 }
1724
1725 if (peer) {
1726 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
James Morris9d729f72007-03-04 16:12:44 -08001727 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1729 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1730 peer->tcp_ts = tp->rx_opt.ts_recent;
1731 }
1732 if (release_it)
1733 inet_putpeer(peer);
1734 return 1;
1735 }
1736
1737 return 0;
1738}
1739
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001740int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741{
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001742 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743
1744 if (peer) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001745 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1746
1747 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
James Morris9d729f72007-03-04 16:12:44 -08001748 (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001749 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1750 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1751 peer->tcp_ts = tcptw->tw_ts_recent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 }
1753 inet_putpeer(peer);
1754 return 1;
1755 }
1756
1757 return 0;
1758}
1759
Stephen Hemminger3b401a82009-09-01 19:25:04 +00001760const struct inet_connection_sock_af_ops ipv4_specific = {
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001761 .queue_xmit = ip_queue_xmit,
1762 .send_check = tcp_v4_send_check,
1763 .rebuild_header = inet_sk_rebuild_header,
1764 .conn_request = tcp_v4_conn_request,
1765 .syn_recv_sock = tcp_v4_syn_recv_sock,
1766 .remember_stamp = tcp_v4_remember_stamp,
1767 .net_header_len = sizeof(struct iphdr),
1768 .setsockopt = ip_setsockopt,
1769 .getsockopt = ip_getsockopt,
1770 .addr2sockaddr = inet_csk_addr2sockaddr,
1771 .sockaddr_len = sizeof(struct sockaddr_in),
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001772 .bind_conflict = inet_csk_bind_conflict,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001773#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001774 .compat_setsockopt = compat_ip_setsockopt,
1775 .compat_getsockopt = compat_ip_getsockopt,
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001776#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777};
1778
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001779#ifdef CONFIG_TCP_MD5SIG
Stephen Hemmingerb2e4b3d2009-09-01 19:25:03 +00001780static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001781 .md5_lookup = tcp_v4_md5_lookup,
Adam Langley49a72df2008-07-19 00:01:42 -07001782 .calc_md5_hash = tcp_v4_md5_hash_skb,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001783 .md5_add = tcp_v4_md5_add_func,
1784 .md5_parse = tcp_v4_parse_md5_keys,
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001785};
Andrew Mortonb6332e62006-11-30 19:16:28 -08001786#endif
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001787
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788/* NOTE: A lot of things set to zero explicitly by call to
1789 * sk_alloc() so need not be done here.
1790 */
1791static int tcp_v4_init_sock(struct sock *sk)
1792{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001793 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 struct tcp_sock *tp = tcp_sk(sk);
1795
1796 skb_queue_head_init(&tp->out_of_order_queue);
1797 tcp_init_xmit_timers(sk);
1798 tcp_prequeue_init(tp);
1799
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001800 icsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 tp->mdev = TCP_TIMEOUT_INIT;
1802
1803 /* So many TCP implementations out there (incorrectly) count the
1804 * initial SYN frame in their delayed-ACK and congestion control
1805 * algorithms that we must have the following bandaid to talk
1806 * efficiently to them. -DaveM
1807 */
1808 tp->snd_cwnd = 2;
1809
1810 /* See draft-stevens-tcpca-spec-01 for discussion of the
1811 * initialization of these values.
1812 */
Ilpo Järvinen0b6a05c2009-09-15 01:30:10 -07001813 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 tp->snd_cwnd_clamp = ~0;
David S. Millerc1b4a7e2005-07-05 15:24:38 -07001815 tp->mss_cache = 536;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816
1817 tp->reordering = sysctl_tcp_reordering;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001818 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819
1820 sk->sk_state = TCP_CLOSE;
1821
1822 sk->sk_write_space = sk_stream_write_space;
1823 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1824
Arnaldo Carvalho de Melo8292a172005-12-13 23:15:52 -08001825 icsk->icsk_af_ops = &ipv4_specific;
Arnaldo Carvalho de Melod83d8462005-12-13 23:26:10 -08001826 icsk->icsk_sync_mss = tcp_sync_mss;
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001827#ifdef CONFIG_TCP_MD5SIG
1828 tp->af_specific = &tcp_sock_ipv4_specific;
1829#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830
1831 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1832 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1833
Herbert Xueb4dea52008-12-29 23:04:08 -08001834 local_bh_disable();
Eric Dumazet17483762008-11-25 21:16:35 -08001835 percpu_counter_inc(&tcp_sockets_allocated);
Herbert Xueb4dea52008-12-29 23:04:08 -08001836 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837
1838 return 0;
1839}
1840
Brian Haley7d06b2e2008-06-14 17:04:49 -07001841void tcp_v4_destroy_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842{
1843 struct tcp_sock *tp = tcp_sk(sk);
1844
1845 tcp_clear_xmit_timers(sk);
1846
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001847 tcp_cleanup_congestion_control(sk);
Stephen Hemminger317a76f2005-06-23 12:19:55 -07001848
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 /* Cleanup up the write buffer. */
David S. Millerfe067e82007-03-07 12:12:44 -08001850 tcp_write_queue_purge(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851
1852 /* Cleans up our, hopefully empty, out_of_order_queue. */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001853 __skb_queue_purge(&tp->out_of_order_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854
YOSHIFUJI Hideakicfb6eeb2006-11-14 19:07:45 -08001855#ifdef CONFIG_TCP_MD5SIG
1856 /* Clean up the MD5 key list, if any */
1857 if (tp->md5sig_info) {
1858 tcp_v4_clear_md5_list(sk);
1859 kfree(tp->md5sig_info);
1860 tp->md5sig_info = NULL;
1861 }
1862#endif
1863
Chris Leech1a2449a2006-05-23 18:05:53 -07001864#ifdef CONFIG_NET_DMA
1865 /* Cleans up our sk_async_wait_queue */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001866 __skb_queue_purge(&sk->sk_async_wait_queue);
Chris Leech1a2449a2006-05-23 18:05:53 -07001867#endif
1868
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 /* Clean prequeue, it must be empty really */
1870 __skb_queue_purge(&tp->ucopy.prequeue);
1871
1872 /* Clean up a referenced TCP bind bucket. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001873 if (inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08001874 inet_put_port(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875
1876 /*
1877 * If sendmsg cached page exists, toss it.
1878 */
1879 if (sk->sk_sndmsg_page) {
1880 __free_page(sk->sk_sndmsg_page);
1881 sk->sk_sndmsg_page = NULL;
1882 }
1883
Eric Dumazet17483762008-11-25 21:16:35 -08001884 percpu_counter_dec(&tcp_sockets_allocated);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885}
1886
1887EXPORT_SYMBOL(tcp_v4_destroy_sock);
1888
1889#ifdef CONFIG_PROC_FS
1890/* Proc filesystem TCP sock list dumping. */
1891
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001892static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001894 return hlist_nulls_empty(head) ? NULL :
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001895 list_entry(head->first, struct inet_timewait_sock, tw_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896}
1897
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001898static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001900 return !is_a_nulls(tw->tw_node.next) ?
1901 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902}
1903
1904static void *listening_get_next(struct seq_file *seq, void *cur)
1905{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001906 struct inet_connection_sock *icsk;
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001907 struct hlist_nulls_node *node;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 struct sock *sk = cur;
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001909 struct inet_listen_hashbucket *ilb;
Jianjun Kong5799de02008-11-03 02:49:10 -08001910 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07001911 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912
1913 if (!sk) {
1914 st->bucket = 0;
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001915 ilb = &tcp_hashinfo.listening_hash[0];
1916 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001917 sk = sk_nulls_head(&ilb->head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 goto get_sk;
1919 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001920 ilb = &tcp_hashinfo.listening_hash[st->bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921 ++st->num;
1922
1923 if (st->state == TCP_SEQ_STATE_OPENREQ) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001924 struct request_sock *req = cur;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001926 icsk = inet_csk(st->syn_wait_sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 req = req->dl_next;
1928 while (1) {
1929 while (req) {
Daniel Lezcanobdccc4c2008-07-19 00:15:13 -07001930 if (req->rsk_ops->family == st->family) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 cur = req;
1932 goto out;
1933 }
1934 req = req->dl_next;
1935 }
Eric Dumazet72a3eff2006-11-16 02:30:37 -08001936 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 break;
1938get_req:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001939 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 }
1941 sk = sk_next(st->syn_wait_sk);
1942 st->state = TCP_SEQ_STATE_LISTENING;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001943 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944 } else {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001945 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001946 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1947 if (reqsk_queue_len(&icsk->icsk_accept_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 goto start_req;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001949 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 sk = sk_next(sk);
1951 }
1952get_sk:
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001953 sk_nulls_for_each_from(sk, node) {
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09001954 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 cur = sk;
1956 goto out;
1957 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001958 icsk = inet_csk(sk);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001959 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1960 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961start_req:
1962 st->uid = sock_i_uid(sk);
1963 st->syn_wait_sk = sk;
1964 st->state = TCP_SEQ_STATE_OPENREQ;
1965 st->sbucket = 0;
1966 goto get_req;
1967 }
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001968 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 }
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001970 spin_unlock_bh(&ilb->lock);
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -07001971 if (++st->bucket < INET_LHTABLE_SIZE) {
Eric Dumazet5caea4e2008-11-20 00:40:07 -08001972 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1973 spin_lock_bh(&ilb->lock);
Eric Dumazetc25eb3b2008-11-23 17:22:55 -08001974 sk = sk_nulls_head(&ilb->head);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 goto get_sk;
1976 }
1977 cur = NULL;
1978out:
1979 return cur;
1980}
1981
1982static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1983{
1984 void *rc = listening_get_next(seq, NULL);
1985
1986 while (rc && *pos) {
1987 rc = listening_get_next(seq, rc);
1988 --*pos;
1989 }
1990 return rc;
1991}
1992
Andi Kleen6eac5602008-08-28 01:08:02 -07001993static inline int empty_bucket(struct tcp_iter_state *st)
1994{
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08001995 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
1996 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
Andi Kleen6eac5602008-08-28 01:08:02 -07001997}
1998
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999static void *established_get_first(struct seq_file *seq)
2000{
Jianjun Kong5799de02008-11-03 02:49:10 -08002001 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002002 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 void *rc = NULL;
2004
Eric Dumazetf373b532009-10-09 00:16:19 +00002005 for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 struct sock *sk;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002007 struct hlist_nulls_node *node;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002008 struct inet_timewait_sock *tw;
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002009 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010
Andi Kleen6eac5602008-08-28 01:08:02 -07002011 /* Lockless fast path for the common case of empty buckets */
2012 if (empty_bucket(st))
2013 continue;
2014
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002015 spin_lock_bh(lock);
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002016 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002017 if (sk->sk_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002018 !net_eq(sock_net(sk), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 continue;
2020 }
2021 rc = sk;
2022 goto out;
2023 }
2024 st->state = TCP_SEQ_STATE_TIME_WAIT;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002025 inet_twsk_for_each(tw, node,
Eric Dumazetdbca9b2752007-02-08 14:16:46 -08002026 &tcp_hashinfo.ehash[st->bucket].twchain) {
Pavel Emelyanov28518fc2008-03-21 15:52:00 -07002027 if (tw->tw_family != st->family ||
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002028 !net_eq(twsk_net(tw), net)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029 continue;
2030 }
2031 rc = tw;
2032 goto out;
2033 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002034 spin_unlock_bh(lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 st->state = TCP_SEQ_STATE_ESTABLISHED;
2036 }
2037out:
2038 return rc;
2039}
2040
2041static void *established_get_next(struct seq_file *seq, void *cur)
2042{
2043 struct sock *sk = cur;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002044 struct inet_timewait_sock *tw;
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002045 struct hlist_nulls_node *node;
Jianjun Kong5799de02008-11-03 02:49:10 -08002046 struct tcp_iter_state *st = seq->private;
Denis V. Luneva4146b12008-04-13 22:11:14 -07002047 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048
2049 ++st->num;
2050
2051 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2052 tw = cur;
2053 tw = tw_next(tw);
2054get_tw:
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002055 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 tw = tw_next(tw);
2057 }
2058 if (tw) {
2059 cur = tw;
2060 goto out;
2061 }
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002062 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063 st->state = TCP_SEQ_STATE_ESTABLISHED;
2064
Andi Kleen6eac5602008-08-28 01:08:02 -07002065 /* Look for next non empty bucket */
Eric Dumazetf373b532009-10-09 00:16:19 +00002066 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
Andi Kleen6eac5602008-08-28 01:08:02 -07002067 empty_bucket(st))
2068 ;
Eric Dumazetf373b532009-10-09 00:16:19 +00002069 if (st->bucket > tcp_hashinfo.ehash_mask)
Andi Kleen6eac5602008-08-28 01:08:02 -07002070 return NULL;
2071
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002072 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002073 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 } else
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002075 sk = sk_nulls_next(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002077 sk_nulls_for_each_from(sk, node) {
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09002078 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 goto found;
2080 }
2081
2082 st->state = TCP_SEQ_STATE_TIME_WAIT;
Eric Dumazetdbca9b2752007-02-08 14:16:46 -08002083 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084 goto get_tw;
2085found:
2086 cur = sk;
2087out:
2088 return cur;
2089}
2090
2091static void *established_get_idx(struct seq_file *seq, loff_t pos)
2092{
2093 void *rc = established_get_first(seq);
2094
2095 while (rc && pos) {
2096 rc = established_get_next(seq, rc);
2097 --pos;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002098 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 return rc;
2100}
2101
2102static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2103{
2104 void *rc;
Jianjun Kong5799de02008-11-03 02:49:10 -08002105 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 st->state = TCP_SEQ_STATE_LISTENING;
2108 rc = listening_get_idx(seq, &pos);
2109
2110 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 st->state = TCP_SEQ_STATE_ESTABLISHED;
2112 rc = established_get_idx(seq, pos);
2113 }
2114
2115 return rc;
2116}
2117
2118static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2119{
Jianjun Kong5799de02008-11-03 02:49:10 -08002120 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 st->state = TCP_SEQ_STATE_LISTENING;
2122 st->num = 0;
2123 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2124}
2125
2126static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2127{
2128 void *rc = NULL;
Jianjun Kong5799de02008-11-03 02:49:10 -08002129 struct tcp_iter_state *st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130
2131 if (v == SEQ_START_TOKEN) {
2132 rc = tcp_get_idx(seq, 0);
2133 goto out;
2134 }
2135 st = seq->private;
2136
2137 switch (st->state) {
2138 case TCP_SEQ_STATE_OPENREQ:
2139 case TCP_SEQ_STATE_LISTENING:
2140 rc = listening_get_next(seq, v);
2141 if (!rc) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142 st->state = TCP_SEQ_STATE_ESTABLISHED;
2143 rc = established_get_first(seq);
2144 }
2145 break;
2146 case TCP_SEQ_STATE_ESTABLISHED:
2147 case TCP_SEQ_STATE_TIME_WAIT:
2148 rc = established_get_next(seq, v);
2149 break;
2150 }
2151out:
2152 ++*pos;
2153 return rc;
2154}
2155
2156static void tcp_seq_stop(struct seq_file *seq, void *v)
2157{
Jianjun Kong5799de02008-11-03 02:49:10 -08002158 struct tcp_iter_state *st = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159
2160 switch (st->state) {
2161 case TCP_SEQ_STATE_OPENREQ:
2162 if (v) {
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002163 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2164 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165 }
2166 case TCP_SEQ_STATE_LISTENING:
2167 if (v != SEQ_START_TOKEN)
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002168 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 break;
2170 case TCP_SEQ_STATE_TIME_WAIT:
2171 case TCP_SEQ_STATE_ESTABLISHED:
2172 if (v)
Eric Dumazet9db66bd2008-11-20 20:39:09 -08002173 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 break;
2175 }
2176}
2177
2178static int tcp_seq_open(struct inode *inode, struct file *file)
2179{
2180 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181 struct tcp_iter_state *s;
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002182 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002184 err = seq_open_net(inode, file, &afinfo->seq_ops,
2185 sizeof(struct tcp_iter_state));
2186 if (err < 0)
2187 return err;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002188
Denis V. Lunev52d6f3f2008-04-13 22:12:41 -07002189 s = ((struct seq_file *)file->private_data)->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 s->family = afinfo->family;
Daniel Lezcanof40c8172008-03-21 04:13:54 -07002191 return 0;
2192}
2193
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002194int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195{
2196 int rc = 0;
2197 struct proc_dir_entry *p;
2198
Denis V. Lunev68fcadd2008-04-13 22:13:30 -07002199 afinfo->seq_fops.open = tcp_seq_open;
2200 afinfo->seq_fops.read = seq_read;
2201 afinfo->seq_fops.llseek = seq_lseek;
2202 afinfo->seq_fops.release = seq_release_net;
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002203
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002204 afinfo->seq_ops.start = tcp_seq_start;
2205 afinfo->seq_ops.next = tcp_seq_next;
2206 afinfo->seq_ops.stop = tcp_seq_stop;
2207
Denis V. Lunev84841c32008-05-02 04:10:08 -07002208 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2209 &afinfo->seq_fops, afinfo);
2210 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211 rc = -ENOMEM;
2212 return rc;
2213}
2214
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002215void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216{
Daniel Lezcano6f8b13b2008-03-21 04:14:45 -07002217 proc_net_remove(net, afinfo->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218}
2219
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002220static void get_openreq4(struct sock *sk, struct request_sock *req,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002221 struct seq_file *f, int i, int uid, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002223 const struct inet_request_sock *ireq = inet_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 int ttd = req->expires - jiffies;
2225
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002226 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2227 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 i,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002229 ireq->loc_addr,
Eric Dumazetc720c7e2009-10-15 06:30:45 +00002230 ntohs(inet_sk(sk)->inet_sport),
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002231 ireq->rmt_addr,
2232 ntohs(ireq->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233 TCP_SYN_RECV,
2234 0, 0, /* could print option size, but that is af dependent. */
2235 1, /* timers active (only the expire timer) */
2236 jiffies_to_clock_t(ttd),
2237 req->retrans,
2238 uid,
2239 0, /* non standard timer */
2240 0, /* open_requests have no inode */
2241 atomic_read(&sk->sk_refcnt),
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002242 req,
2243 len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244}
2245
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002246static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247{
2248 int timer_active;
2249 unsigned long timer_expires;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002250 struct tcp_sock *tp = tcp_sk(sk);
2251 const struct inet_connection_sock *icsk = inet_csk(sk);
2252 struct inet_sock *inet = inet_sk(sk);
Eric Dumazetc720c7e2009-10-15 06:30:45 +00002253 __be32 dest = inet->inet_daddr;
2254 __be32 src = inet->inet_rcv_saddr;
2255 __u16 destp = ntohs(inet->inet_dport);
2256 __u16 srcp = ntohs(inet->inet_sport);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002258 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002260 timer_expires = icsk->icsk_timeout;
2261 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002263 timer_expires = icsk->icsk_timeout;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002264 } else if (timer_pending(&sk->sk_timer)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265 timer_active = 2;
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002266 timer_expires = sk->sk_timer.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 } else {
2268 timer_active = 0;
2269 timer_expires = jiffies;
2270 }
2271
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002272 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
Stephen Hemminger7be87352008-06-27 20:00:19 -07002273 "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n",
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002274 i, src, srcp, dest, destp, sk->sk_state,
Sridhar Samudrala47da8ee2006-06-27 13:29:00 -07002275 tp->write_seq - tp->snd_una,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002276 sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002277 (tp->rcv_nxt - tp->copied_seq),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 timer_active,
2279 jiffies_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002280 icsk->icsk_retransmits,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002281 sock_i_uid(sk),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002282 icsk->icsk_probes_out,
Ilpo Järvinencf4c6bf2007-02-22 01:13:58 -08002283 sock_i_ino(sk),
2284 atomic_read(&sk->sk_refcnt), sk,
Stephen Hemminger7be87352008-06-27 20:00:19 -07002285 jiffies_to_clock_t(icsk->icsk_rto),
2286 jiffies_to_clock_t(icsk->icsk_ack.ato),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002287 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288 tp->snd_cwnd,
Ilpo Järvinen0b6a05c2009-09-15 01:30:10 -07002289 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002290 len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291}
2292
Arnaldo Carvalho de Melo71742592006-11-17 10:57:30 -02002293static void get_timewait4_sock(struct inet_timewait_sock *tw,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002294 struct seq_file *f, int i, int *len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295{
Al Viro23f33c22006-09-27 18:43:50 -07002296 __be32 dest, src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 __u16 destp, srcp;
2298 int ttd = tw->tw_ttd - jiffies;
2299
2300 if (ttd < 0)
2301 ttd = 0;
2302
2303 dest = tw->tw_daddr;
2304 src = tw->tw_rcv_saddr;
2305 destp = ntohs(tw->tw_dport);
2306 srcp = ntohs(tw->tw_sport);
2307
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002308 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2309 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p%n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2311 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002312 atomic_read(&tw->tw_refcnt), tw, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313}
2314
2315#define TMPSZ 150
2316
2317static int tcp4_seq_show(struct seq_file *seq, void *v)
2318{
Jianjun Kong5799de02008-11-03 02:49:10 -08002319 struct tcp_iter_state *st;
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002320 int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321
2322 if (v == SEQ_START_TOKEN) {
2323 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2324 " sl local_address rem_address st tx_queue "
2325 "rx_queue tr tm->when retrnsmt uid timeout "
2326 "inode");
2327 goto out;
2328 }
2329 st = seq->private;
2330
2331 switch (st->state) {
2332 case TCP_SEQ_STATE_LISTENING:
2333 case TCP_SEQ_STATE_ESTABLISHED:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002334 get_tcp4_sock(v, seq, st->num, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 break;
2336 case TCP_SEQ_STATE_OPENREQ:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002337 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 break;
2339 case TCP_SEQ_STATE_TIME_WAIT:
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002340 get_timewait4_sock(v, seq, st->num, &len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341 break;
2342 }
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002343 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344out:
2345 return 0;
2346}
2347
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348static struct tcp_seq_afinfo tcp4_seq_afinfo = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349 .name = "tcp",
2350 .family = AF_INET,
Denis V. Lunev5f4472c2008-04-13 22:13:53 -07002351 .seq_fops = {
2352 .owner = THIS_MODULE,
2353 },
Denis V. Lunev9427c4b2008-04-13 22:12:13 -07002354 .seq_ops = {
2355 .show = tcp4_seq_show,
2356 },
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357};
2358
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002359static int tcp4_proc_init_net(struct net *net)
2360{
2361 return tcp_proc_register(net, &tcp4_seq_afinfo);
2362}
2363
2364static void tcp4_proc_exit_net(struct net *net)
2365{
2366 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2367}
2368
2369static struct pernet_operations tcp4_net_ops = {
2370 .init = tcp4_proc_init_net,
2371 .exit = tcp4_proc_exit_net,
2372};
2373
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374int __init tcp4_proc_init(void)
2375{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002376 return register_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377}
2378
2379void tcp4_proc_exit(void)
2380{
Pavel Emelyanov757764f2008-03-24 14:56:02 -07002381 unregister_pernet_subsys(&tcp4_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382}
2383#endif /* CONFIG_PROC_FS */
2384
Herbert Xubf296b12008-12-15 23:43:36 -08002385struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2386{
Herbert Xu36e7b1b2009-04-27 05:44:45 -07002387 struct iphdr *iph = skb_gro_network_header(skb);
Herbert Xubf296b12008-12-15 23:43:36 -08002388
2389 switch (skb->ip_summed) {
2390 case CHECKSUM_COMPLETE:
Herbert Xu86911732009-01-29 14:19:50 +00002391 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
Herbert Xubf296b12008-12-15 23:43:36 -08002392 skb->csum)) {
2393 skb->ip_summed = CHECKSUM_UNNECESSARY;
2394 break;
2395 }
2396
2397 /* fall through */
2398 case CHECKSUM_NONE:
2399 NAPI_GRO_CB(skb)->flush = 1;
2400 return NULL;
2401 }
2402
2403 return tcp_gro_receive(head, skb);
2404}
2405EXPORT_SYMBOL(tcp4_gro_receive);
2406
2407int tcp4_gro_complete(struct sk_buff *skb)
2408{
2409 struct iphdr *iph = ip_hdr(skb);
2410 struct tcphdr *th = tcp_hdr(skb);
2411
2412 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2413 iph->saddr, iph->daddr, 0);
2414 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2415
2416 return tcp_gro_complete(skb);
2417}
2418EXPORT_SYMBOL(tcp4_gro_complete);
2419
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420struct proto tcp_prot = {
2421 .name = "TCP",
2422 .owner = THIS_MODULE,
2423 .close = tcp_close,
2424 .connect = tcp_v4_connect,
2425 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002426 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427 .ioctl = tcp_ioctl,
2428 .init = tcp_v4_init_sock,
2429 .destroy = tcp_v4_destroy_sock,
2430 .shutdown = tcp_shutdown,
2431 .setsockopt = tcp_setsockopt,
2432 .getsockopt = tcp_getsockopt,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 .recvmsg = tcp_recvmsg,
2434 .backlog_rcv = tcp_v4_do_rcv,
Arnaldo Carvalho de Meloab1e0a12008-02-03 04:06:04 -08002435 .hash = inet_hash,
2436 .unhash = inet_unhash,
2437 .get_port = inet_csk_get_port,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 .enter_memory_pressure = tcp_enter_memory_pressure,
2439 .sockets_allocated = &tcp_sockets_allocated,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002440 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 .memory_allocated = &tcp_memory_allocated,
2442 .memory_pressure = &tcp_memory_pressure,
2443 .sysctl_mem = sysctl_tcp_mem,
2444 .sysctl_wmem = sysctl_tcp_wmem,
2445 .sysctl_rmem = sysctl_tcp_rmem,
2446 .max_header = MAX_TCP_HEADER,
2447 .obj_size = sizeof(struct tcp_sock),
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002448 .slab_flags = SLAB_DESTROY_BY_RCU,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002449 .twsk_prot = &tcp_timewait_sock_ops,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002450 .rsk_prot = &tcp_request_sock_ops,
Pavel Emelyanov39d8cda2008-03-22 16:50:58 -07002451 .h.hashinfo = &tcp_hashinfo,
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002452#ifdef CONFIG_COMPAT
2453 .compat_setsockopt = compat_tcp_setsockopt,
2454 .compat_getsockopt = compat_tcp_getsockopt,
2455#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002456};
2457
Denis V. Lunev046ee902008-04-03 14:31:33 -07002458
2459static int __net_init tcp_sk_init(struct net *net)
2460{
2461 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2462 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2463}
2464
2465static void __net_exit tcp_sk_exit(struct net *net)
2466{
2467 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
Daniel Lezcanod3154922008-09-08 13:17:27 -07002468 inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
Denis V. Lunev046ee902008-04-03 14:31:33 -07002469}
2470
2471static struct pernet_operations __net_initdata tcp_sk_ops = {
2472 .init = tcp_sk_init,
2473 .exit = tcp_sk_exit,
2474};
2475
Denis V. Lunev9b0f9762008-02-29 11:13:15 -08002476void __init tcp_v4_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477{
Eric Dumazet5caea4e2008-11-20 00:40:07 -08002478 inet_hashinfo_init(&tcp_hashinfo);
Eric W. Biederman6a1b3052009-02-22 00:10:18 -08002479 if (register_pernet_subsys(&tcp_sk_ops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 panic("Failed to create the TCP control socket.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481}
2482
2483EXPORT_SYMBOL(ipv4_specific);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485EXPORT_SYMBOL(tcp_prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486EXPORT_SYMBOL(tcp_v4_conn_request);
2487EXPORT_SYMBOL(tcp_v4_connect);
2488EXPORT_SYMBOL(tcp_v4_do_rcv);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489EXPORT_SYMBOL(tcp_v4_remember_stamp);
2490EXPORT_SYMBOL(tcp_v4_send_check);
2491EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2492
2493#ifdef CONFIG_PROC_FS
2494EXPORT_SYMBOL(tcp_proc_register);
2495EXPORT_SYMBOL(tcp_proc_unregister);
2496#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497EXPORT_SYMBOL(sysctl_tcp_low_latency);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498