blob: 5a10d30cec4a9ad265f030a37fd77a2001dab304 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
Arnaldo Carvalho de Melo5324a042005-08-12 09:26:18 -030050#include <net/inet6_hashtables.h>
Arnaldo Carvalho de Melo81297652005-12-13 23:15:24 -080051#include <net/inet6_connection_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ipv6.h>
53#include <net/transp_v6.h>
54#include <net/addrconf.h>
55#include <net/ip6_route.h>
56#include <net/ip6_checksum.h>
57#include <net/inet_ecn.h>
58#include <net/protocol.h>
59#include <net/xfrm.h>
60#include <net/addrconf.h>
61#include <net/snmp.h>
62#include <net/dsfield.h>
63
64#include <asm/uaccess.h>
65
66#include <linux/proc_fs.h>
67#include <linux/seq_file.h>
68
69static void tcp_v6_send_reset(struct sk_buff *skb);
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070070static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
Linus Torvalds1da177e2005-04-16 15:20:36 -070071static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 struct sk_buff *skb);
73
74static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
75static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
76
77static struct tcp_func ipv6_mapped;
78static struct tcp_func ipv6_specific;
79
Arnaldo Carvalho de Melo971af182005-12-13 23:14:47 -080080int inet6_csk_bind_conflict(const struct sock *sk,
81 const struct inet_bind_bucket *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -070082{
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -070083 const struct sock *sk2;
84 const struct hlist_node *node;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085
86 /* We must walk the whole port owner list in this case. -DaveM */
87 sk_for_each_bound(sk2, node, &tb->owners) {
88 if (sk != sk2 &&
89 (!sk->sk_bound_dev_if ||
90 !sk2->sk_bound_dev_if ||
91 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
92 (!sk->sk_reuse || !sk2->sk_reuse ||
93 sk2->sk_state == TCP_LISTEN) &&
94 ipv6_rcv_saddr_equal(sk, sk2))
95 break;
96 }
97
98 return node != NULL;
99}
100
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
102{
Arnaldo Carvalho de Melo971af182005-12-13 23:14:47 -0800103 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
104 inet6_csk_bind_conflict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105}
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107static void tcp_v6_hash(struct sock *sk)
108{
109 if (sk->sk_state != TCP_CLOSE) {
110 struct tcp_sock *tp = tcp_sk(sk);
111
112 if (tp->af_specific == &ipv6_mapped) {
113 tcp_prot.hash(sk);
114 return;
115 }
116 local_bh_disable();
Arnaldo Carvalho de Melo90b19d32005-12-13 23:15:01 -0800117 __inet6_hash(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 local_bh_enable();
119 }
120}
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
123 struct in6_addr *saddr,
124 struct in6_addr *daddr,
125 unsigned long base)
126{
127 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
128}
129
130static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
131{
132 if (skb->protocol == htons(ETH_P_IPV6)) {
133 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
134 skb->nh.ipv6h->saddr.s6_addr32,
135 skb->h.th->dest,
136 skb->h.th->source);
137 } else {
138 return secure_tcp_sequence_number(skb->nh.iph->daddr,
139 skb->nh.iph->saddr,
140 skb->h.th->dest,
141 skb->h.th->source);
142 }
143}
144
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300145static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700146 struct inet_timewait_sock **twp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147{
148 struct inet_sock *inet = inet_sk(sk);
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300149 const struct ipv6_pinfo *np = inet6_sk(sk);
150 const struct in6_addr *daddr = &np->rcv_saddr;
151 const struct in6_addr *saddr = &np->daddr;
152 const int dif = sk->sk_bound_dev_if;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700153 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
Eric Dumazet81c3d542005-10-03 14:13:38 -0700154 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
155 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 struct sock *sk2;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700157 const struct hlist_node *node;
158 struct inet_timewait_sock *tw;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
Eric Dumazet81c3d542005-10-03 14:13:38 -0700160 prefetch(head->chain.first);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 write_lock(&head->lock);
162
163 /* Check TIME-WAIT sockets first. */
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700164 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700165 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
166
167 tw = inet_twsk(sk2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168
169 if(*((__u32 *)&(tw->tw_dport)) == ports &&
170 sk2->sk_family == PF_INET6 &&
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700171 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
172 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700174 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 struct tcp_sock *tp = tcp_sk(sk);
176
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700177 if (tcptw->tw_ts_recent_stamp &&
178 (!twp ||
179 (sysctl_tcp_tw_reuse &&
180 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 /* See comment in tcp_ipv4.c */
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700182 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 if (!tp->write_seq)
184 tp->write_seq = 1;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700185 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
186 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 sock_hold(sk2);
188 goto unique;
189 } else
190 goto not_unique;
191 }
192 }
193 tw = NULL;
194
195 /* And established part... */
196 sk_for_each(sk2, node, &head->chain) {
Eric Dumazet81c3d542005-10-03 14:13:38 -0700197 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 goto not_unique;
199 }
200
201unique:
202 BUG_TRAP(sk_unhashed(sk));
203 __sk_add_node(sk, &head->chain);
Eric Dumazet81c3d542005-10-03 14:13:38 -0700204 sk->sk_hash = hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 sock_prot_inc_use(sk->sk_prot);
206 write_unlock(&head->lock);
207
208 if (twp) {
209 *twp = tw;
210 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
211 } else if (tw) {
212 /* Silly. Should hash-dance instead... */
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700213 inet_twsk_deschedule(tw, &tcp_death_row);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
215
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700216 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 }
218 return 0;
219
220not_unique:
221 write_unlock(&head->lock);
222 return -EADDRNOTAVAIL;
223}
224
225static inline u32 tcpv6_port_offset(const struct sock *sk)
226{
227 const struct inet_sock *inet = inet_sk(sk);
228 const struct ipv6_pinfo *np = inet6_sk(sk);
229
230 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
231 np->daddr.s6_addr32,
232 inet->dport);
233}
234
235static int tcp_v6_hash_connect(struct sock *sk)
236{
237 unsigned short snum = inet_sk(sk)->num;
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700238 struct inet_bind_hashbucket *head;
239 struct inet_bind_bucket *tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 int ret;
241
242 if (!snum) {
243 int low = sysctl_local_port_range[0];
244 int high = sysctl_local_port_range[1];
245 int range = high - low;
246 int i;
247 int port;
248 static u32 hint;
249 u32 offset = hint + tcpv6_port_offset(sk);
250 struct hlist_node *node;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700251 struct inet_timewait_sock *tw = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252
253 local_bh_disable();
254 for (i = 1; i <= range; i++) {
255 port = low + (i + offset) % range;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700256 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 spin_lock(&head->lock);
258
259 /* Does not bother with rcv_saddr checks,
260 * because the established check is already
261 * unique enough.
262 */
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700263 inet_bind_bucket_for_each(tb, node, &head->chain) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 if (tb->port == port) {
265 BUG_TRAP(!hlist_empty(&tb->owners));
266 if (tb->fastreuse >= 0)
267 goto next_port;
268 if (!__tcp_v6_check_established(sk,
269 port,
270 &tw))
271 goto ok;
272 goto next_port;
273 }
274 }
275
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700276 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 if (!tb) {
278 spin_unlock(&head->lock);
279 break;
280 }
281 tb->fastreuse = -1;
282 goto ok;
283
284 next_port:
285 spin_unlock(&head->lock);
286 }
287 local_bh_enable();
288
289 return -EADDRNOTAVAIL;
290
291ok:
292 hint += i;
293
294 /* Head lock still held and bh's disabled */
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -0700295 inet_bind_hash(sk, tb, port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 if (sk_unhashed(sk)) {
297 inet_sk(sk)->sport = htons(port);
Arnaldo Carvalho de Melo90b19d32005-12-13 23:15:01 -0800298 __inet6_hash(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 }
300 spin_unlock(&head->lock);
301
302 if (tw) {
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700303 inet_twsk_deschedule(tw, &tcp_death_row);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700304 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 }
306
307 ret = 0;
308 goto out;
309 }
310
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700311 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700312 tb = inet_csk(sk)->icsk_bind_hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 spin_lock_bh(&head->lock);
314
315 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
Arnaldo Carvalho de Melo90b19d32005-12-13 23:15:01 -0800316 __inet6_hash(&tcp_hashinfo, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 spin_unlock_bh(&head->lock);
318 return 0;
319 } else {
320 spin_unlock(&head->lock);
321 /* No definite answer... Walk to established hash table */
322 ret = __tcp_v6_check_established(sk, snum, NULL);
323out:
324 local_bh_enable();
325 return ret;
326 }
327}
328
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
330 int addr_len)
331{
332 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
333 struct inet_sock *inet = inet_sk(sk);
334 struct ipv6_pinfo *np = inet6_sk(sk);
335 struct tcp_sock *tp = tcp_sk(sk);
336 struct in6_addr *saddr = NULL, *final_p = NULL, final;
337 struct flowi fl;
338 struct dst_entry *dst;
339 int addr_type;
340 int err;
341
342 if (addr_len < SIN6_LEN_RFC2133)
343 return -EINVAL;
344
345 if (usin->sin6_family != AF_INET6)
346 return(-EAFNOSUPPORT);
347
348 memset(&fl, 0, sizeof(fl));
349
350 if (np->sndflow) {
351 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
352 IP6_ECN_flow_init(fl.fl6_flowlabel);
353 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
354 struct ip6_flowlabel *flowlabel;
355 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
356 if (flowlabel == NULL)
357 return -EINVAL;
358 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
359 fl6_sock_release(flowlabel);
360 }
361 }
362
363 /*
364 * connect() to INADDR_ANY means loopback (BSD'ism).
365 */
366
367 if(ipv6_addr_any(&usin->sin6_addr))
368 usin->sin6_addr.s6_addr[15] = 0x1;
369
370 addr_type = ipv6_addr_type(&usin->sin6_addr);
371
372 if(addr_type & IPV6_ADDR_MULTICAST)
373 return -ENETUNREACH;
374
375 if (addr_type&IPV6_ADDR_LINKLOCAL) {
376 if (addr_len >= sizeof(struct sockaddr_in6) &&
377 usin->sin6_scope_id) {
378 /* If interface is set while binding, indices
379 * must coincide.
380 */
381 if (sk->sk_bound_dev_if &&
382 sk->sk_bound_dev_if != usin->sin6_scope_id)
383 return -EINVAL;
384
385 sk->sk_bound_dev_if = usin->sin6_scope_id;
386 }
387
388 /* Connect to link-local address requires an interface */
389 if (!sk->sk_bound_dev_if)
390 return -EINVAL;
391 }
392
393 if (tp->rx_opt.ts_recent_stamp &&
394 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
395 tp->rx_opt.ts_recent = 0;
396 tp->rx_opt.ts_recent_stamp = 0;
397 tp->write_seq = 0;
398 }
399
400 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
401 np->flow_label = fl.fl6_flowlabel;
402
403 /*
404 * TCP over IPv4
405 */
406
407 if (addr_type == IPV6_ADDR_MAPPED) {
408 u32 exthdrlen = tp->ext_header_len;
409 struct sockaddr_in sin;
410
411 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
412
413 if (__ipv6_only_sock(sk))
414 return -ENETUNREACH;
415
416 sin.sin_family = AF_INET;
417 sin.sin_port = usin->sin6_port;
418 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
419
420 tp->af_specific = &ipv6_mapped;
421 sk->sk_backlog_rcv = tcp_v4_do_rcv;
422
423 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
424
425 if (err) {
426 tp->ext_header_len = exthdrlen;
427 tp->af_specific = &ipv6_specific;
428 sk->sk_backlog_rcv = tcp_v6_do_rcv;
429 goto failure;
430 } else {
431 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
432 inet->saddr);
433 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
434 inet->rcv_saddr);
435 }
436
437 return err;
438 }
439
440 if (!ipv6_addr_any(&np->rcv_saddr))
441 saddr = &np->rcv_saddr;
442
443 fl.proto = IPPROTO_TCP;
444 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
445 ipv6_addr_copy(&fl.fl6_src,
446 (saddr ? saddr : &np->saddr));
447 fl.oif = sk->sk_bound_dev_if;
448 fl.fl_ip_dport = usin->sin6_port;
449 fl.fl_ip_sport = inet->sport;
450
451 if (np->opt && np->opt->srcrt) {
452 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
453 ipv6_addr_copy(&final, &fl.fl6_dst);
454 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
455 final_p = &final;
456 }
457
458 err = ip6_dst_lookup(sk, &dst, &fl);
459 if (err)
460 goto failure;
461 if (final_p)
462 ipv6_addr_copy(&fl.fl6_dst, final_p);
463
Patrick McHardye104411b2005-09-08 15:11:55 -0700464 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466
467 if (saddr == NULL) {
468 saddr = &fl.fl6_src;
469 ipv6_addr_copy(&np->rcv_saddr, saddr);
470 }
471
472 /* set the source address */
473 ipv6_addr_copy(&np->saddr, saddr);
474 inet->rcv_saddr = LOOPBACK4_IPV6;
475
476 ip6_dst_store(sk, dst, NULL);
477 sk->sk_route_caps = dst->dev->features &
478 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
479
480 tp->ext_header_len = 0;
481 if (np->opt)
482 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
483
484 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
485
486 inet->dport = usin->sin6_port;
487
488 tcp_set_state(sk, TCP_SYN_SENT);
489 err = tcp_v6_hash_connect(sk);
490 if (err)
491 goto late_failure;
492
493 if (!tp->write_seq)
494 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
495 np->daddr.s6_addr32,
496 inet->sport,
497 inet->dport);
498
499 err = tcp_connect(sk);
500 if (err)
501 goto late_failure;
502
503 return 0;
504
505late_failure:
506 tcp_set_state(sk, TCP_CLOSE);
507 __sk_dst_reset(sk);
508failure:
509 inet->dport = 0;
510 sk->sk_route_caps = 0;
511 return err;
512}
513
514static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
515 int type, int code, int offset, __u32 info)
516{
517 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300518 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 struct ipv6_pinfo *np;
520 struct sock *sk;
521 int err;
522 struct tcp_sock *tp;
523 __u32 seq;
524
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300525 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
526 th->source, skb->dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
528 if (sk == NULL) {
529 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
530 return;
531 }
532
533 if (sk->sk_state == TCP_TIME_WAIT) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700534 inet_twsk_put((struct inet_timewait_sock *)sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 return;
536 }
537
538 bh_lock_sock(sk);
539 if (sock_owned_by_user(sk))
540 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
541
542 if (sk->sk_state == TCP_CLOSE)
543 goto out;
544
545 tp = tcp_sk(sk);
546 seq = ntohl(th->seq);
547 if (sk->sk_state != TCP_LISTEN &&
548 !between(seq, tp->snd_una, tp->snd_nxt)) {
549 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
550 goto out;
551 }
552
553 np = inet6_sk(sk);
554
555 if (type == ICMPV6_PKT_TOOBIG) {
556 struct dst_entry *dst = NULL;
557
558 if (sock_owned_by_user(sk))
559 goto out;
560 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
561 goto out;
562
563 /* icmp should have updated the destination cache entry */
564 dst = __sk_dst_check(sk, np->dst_cookie);
565
566 if (dst == NULL) {
567 struct inet_sock *inet = inet_sk(sk);
568 struct flowi fl;
569
570 /* BUGGG_FUTURE: Again, it is not clear how
571 to handle rthdr case. Ignore this complexity
572 for now.
573 */
574 memset(&fl, 0, sizeof(fl));
575 fl.proto = IPPROTO_TCP;
576 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
577 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
578 fl.oif = sk->sk_bound_dev_if;
579 fl.fl_ip_dport = inet->dport;
580 fl.fl_ip_sport = inet->sport;
581
582 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
583 sk->sk_err_soft = -err;
584 goto out;
585 }
586
587 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
588 sk->sk_err_soft = -err;
589 goto out;
590 }
591
592 } else
593 dst_hold(dst);
594
595 if (tp->pmtu_cookie > dst_mtu(dst)) {
596 tcp_sync_mss(sk, dst_mtu(dst));
597 tcp_simple_retransmit(sk);
598 } /* else let the usual retransmit timer handle it */
599 dst_release(dst);
600 goto out;
601 }
602
603 icmpv6_err_convert(type, code, &err);
604
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700605 /* Might be for an request_sock */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700607 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 case TCP_LISTEN:
609 if (sock_owned_by_user(sk))
610 goto out;
611
Arnaldo Carvalho de Melo81297652005-12-13 23:15:24 -0800612 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
613 &hdr->saddr, inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 if (!req)
615 goto out;
616
617 /* ICMPs are not backlogged, hence we cannot get
618 * an established socket here.
619 */
620 BUG_TRAP(req->sk == NULL);
621
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700622 if (seq != tcp_rsk(req)->snt_isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
624 goto out;
625 }
626
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700627 inet_csk_reqsk_queue_drop(sk, req, prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 goto out;
629
630 case TCP_SYN_SENT:
631 case TCP_SYN_RECV: /* Cannot happen.
632 It can, it SYNs are crossed. --ANK */
633 if (!sock_owned_by_user(sk)) {
634 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
635 sk->sk_err = err;
636 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
637
638 tcp_done(sk);
639 } else
640 sk->sk_err_soft = err;
641 goto out;
642 }
643
644 if (!sock_owned_by_user(sk) && np->recverr) {
645 sk->sk_err = err;
646 sk->sk_error_report(sk);
647 } else
648 sk->sk_err_soft = err;
649
650out:
651 bh_unlock_sock(sk);
652 sock_put(sk);
653}
654
655
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700656static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 struct dst_entry *dst)
658{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700659 struct tcp6_request_sock *treq = tcp6_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 struct ipv6_pinfo *np = inet6_sk(sk);
661 struct sk_buff * skb;
662 struct ipv6_txoptions *opt = NULL;
663 struct in6_addr * final_p = NULL, final;
664 struct flowi fl;
665 int err = -1;
666
667 memset(&fl, 0, sizeof(fl));
668 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700669 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
670 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 fl.fl6_flowlabel = 0;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700672 fl.oif = treq->iif;
673 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 fl.fl_ip_sport = inet_sk(sk)->sport;
675
676 if (dst == NULL) {
677 opt = np->opt;
678 if (opt == NULL &&
YOSHIFUJI Hideaki333fad52005-09-08 09:59:17 +0900679 np->rxopt.bits.osrcrt == 2 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700680 treq->pktopts) {
681 struct sk_buff *pktopts = treq->pktopts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
683 if (rxopt->srcrt)
684 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
685 }
686
687 if (opt && opt->srcrt) {
688 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
689 ipv6_addr_copy(&final, &fl.fl6_dst);
690 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
691 final_p = &final;
692 }
693
694 err = ip6_dst_lookup(sk, &dst, &fl);
695 if (err)
696 goto done;
697 if (final_p)
698 ipv6_addr_copy(&fl.fl6_dst, final_p);
699 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
700 goto done;
701 }
702
703 skb = tcp_make_synack(sk, dst, req);
704 if (skb) {
705 struct tcphdr *th = skb->h.th;
706
707 th->check = tcp_v6_check(th, skb->len,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700708 &treq->loc_addr, &treq->rmt_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 csum_partial((char *)th, skb->len, skb->csum));
710
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700711 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 err = ip6_xmit(sk, skb, &fl, opt, 0);
713 if (err == NET_XMIT_CN)
714 err = 0;
715 }
716
717done:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 if (opt && opt != np->opt)
719 sock_kfree_s(sk, opt, opt->tot_len);
720 return err;
721}
722
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700723static void tcp_v6_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700725 if (tcp6_rsk(req)->pktopts)
726 kfree_skb(tcp6_rsk(req)->pktopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727}
728
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700729static struct request_sock_ops tcp6_request_sock_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 .family = AF_INET6,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700731 .obj_size = sizeof(struct tcp6_request_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 .rtx_syn_ack = tcp_v6_send_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700733 .send_ack = tcp_v6_reqsk_send_ack,
734 .destructor = tcp_v6_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 .send_reset = tcp_v6_send_reset
736};
737
738static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
739{
740 struct ipv6_pinfo *np = inet6_sk(sk);
741 struct inet6_skb_parm *opt = IP6CB(skb);
742
743 if (np->rxopt.all) {
YOSHIFUJI Hideaki333fad52005-09-08 09:59:17 +0900744 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
745 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
746 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
747 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 return 1;
749 }
750 return 0;
751}
752
753
754static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
755 struct sk_buff *skb)
756{
757 struct ipv6_pinfo *np = inet6_sk(sk);
758
759 if (skb->ip_summed == CHECKSUM_HW) {
760 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
761 skb->csum = offsetof(struct tcphdr, check);
762 } else {
763 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
764 csum_partial((char *)th, th->doff<<2,
765 skb->csum));
766 }
767}
768
769
770static void tcp_v6_send_reset(struct sk_buff *skb)
771{
772 struct tcphdr *th = skb->h.th, *t1;
773 struct sk_buff *buff;
774 struct flowi fl;
775
776 if (th->rst)
777 return;
778
779 if (!ipv6_unicast_destination(skb))
780 return;
781
782 /*
783 * We need to grab some memory, and put together an RST,
784 * and then put it into the queue to be sent.
785 */
786
787 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
788 GFP_ATOMIC);
789 if (buff == NULL)
790 return;
791
792 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
793
794 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
795
796 /* Swap the send and the receive. */
797 memset(t1, 0, sizeof(*t1));
798 t1->dest = th->source;
799 t1->source = th->dest;
800 t1->doff = sizeof(*t1)/4;
801 t1->rst = 1;
802
803 if(th->ack) {
804 t1->seq = th->ack_seq;
805 } else {
806 t1->ack = 1;
807 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
808 + skb->len - (th->doff<<2));
809 }
810
811 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
812
813 memset(&fl, 0, sizeof(fl));
814 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
815 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
816
817 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
818 sizeof(*t1), IPPROTO_TCP,
819 buff->csum);
820
821 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300822 fl.oif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 fl.fl_ip_dport = t1->dest;
824 fl.fl_ip_sport = t1->source;
825
826 /* sk = NULL, but it is safe for now. RST socket required. */
827 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
828
Arnaldo Carvalho de Meloecc51b62005-12-12 14:38:10 -0800829 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
830 ip6_xmit(NULL, buff, &fl, NULL, 0);
831 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
832 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833 return;
Arnaldo Carvalho de Meloecc51b62005-12-12 14:38:10 -0800834 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 }
836
837 kfree_skb(buff);
838}
839
840static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
841{
842 struct tcphdr *th = skb->h.th, *t1;
843 struct sk_buff *buff;
844 struct flowi fl;
845 int tot_len = sizeof(struct tcphdr);
846
847 if (ts)
848 tot_len += 3*4;
849
850 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
851 GFP_ATOMIC);
852 if (buff == NULL)
853 return;
854
855 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
856
857 t1 = (struct tcphdr *) skb_push(buff,tot_len);
858
859 /* Swap the send and the receive. */
860 memset(t1, 0, sizeof(*t1));
861 t1->dest = th->source;
862 t1->source = th->dest;
863 t1->doff = tot_len/4;
864 t1->seq = htonl(seq);
865 t1->ack_seq = htonl(ack);
866 t1->ack = 1;
867 t1->window = htons(win);
868
869 if (ts) {
870 u32 *ptr = (u32*)(t1 + 1);
871 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
872 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
873 *ptr++ = htonl(tcp_time_stamp);
874 *ptr = htonl(ts);
875 }
876
877 buff->csum = csum_partial((char *)t1, tot_len, 0);
878
879 memset(&fl, 0, sizeof(fl));
880 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
881 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
882
883 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
884 tot_len, IPPROTO_TCP,
885 buff->csum);
886
887 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300888 fl.oif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 fl.fl_ip_dport = t1->dest;
890 fl.fl_ip_sport = t1->source;
891
892 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
Arnaldo Carvalho de Meloecc51b62005-12-12 14:38:10 -0800893 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
894 ip6_xmit(NULL, buff, &fl, NULL, 0);
895 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 return;
Arnaldo Carvalho de Meloecc51b62005-12-12 14:38:10 -0800897 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 }
899
900 kfree_skb(buff);
901}
902
903static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
904{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700905 struct inet_timewait_sock *tw = inet_twsk(sk);
906 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700908 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
909 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
910 tcptw->tw_ts_recent);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700912 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913}
914
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700915static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700917 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918}
919
920
921static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
922{
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700923 struct request_sock *req, **prev;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300924 const struct tcphdr *th = skb->h.th;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 struct sock *nsk;
926
927 /* Find possible connection requests. */
Arnaldo Carvalho de Melo81297652005-12-13 23:15:24 -0800928 req = inet6_csk_search_req(sk, &prev, th->source,
929 &skb->nh.ipv6h->saddr,
930 &skb->nh.ipv6h->daddr, inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 if (req)
932 return tcp_check_req(sk, skb, req, prev);
933
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300934 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
935 th->source, &skb->nh.ipv6h->daddr,
936 ntohs(th->dest), inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938 if (nsk) {
939 if (nsk->sk_state != TCP_TIME_WAIT) {
940 bh_lock_sock(nsk);
941 return nsk;
942 }
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -0700943 inet_twsk_put((struct inet_timewait_sock *)nsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 return NULL;
945 }
946
947#if 0 /*def CONFIG_SYN_COOKIES*/
948 if (!th->rst && !th->syn && th->ack)
949 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
950#endif
951 return sk;
952}
953
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954/* FIXME: this is substantially similar to the ipv4 code.
955 * Can some kind of merge be done? -- erics
956 */
957static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
958{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700959 struct tcp6_request_sock *treq;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 struct ipv6_pinfo *np = inet6_sk(sk);
961 struct tcp_options_received tmp_opt;
962 struct tcp_sock *tp = tcp_sk(sk);
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700963 struct request_sock *req = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 __u32 isn = TCP_SKB_CB(skb)->when;
965
966 if (skb->protocol == htons(ETH_P_IP))
967 return tcp_v4_conn_request(sk, skb);
968
969 if (!ipv6_unicast_destination(skb))
970 goto drop;
971
972 /*
973 * There are no SYN attacks on IPv6, yet...
974 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700975 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 if (net_ratelimit())
977 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
978 goto drop;
979 }
980
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700981 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 goto drop;
983
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700984 req = reqsk_alloc(&tcp6_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 if (req == NULL)
986 goto drop;
987
988 tcp_clear_options(&tmp_opt);
989 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
990 tmp_opt.user_mss = tp->rx_opt.user_mss;
991
992 tcp_parse_options(skb, &tmp_opt, 0);
993
994 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
995 tcp_openreq_init(req, &tmp_opt, skb);
996
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700997 treq = tcp6_rsk(req);
998 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
999 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 TCP_ECN_create_request(req, skb->h.th);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001001 treq->pktopts = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 if (ipv6_opt_accepted(sk, skb) ||
YOSHIFUJI Hideaki333fad52005-09-08 09:59:17 +09001003 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1004 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 atomic_inc(&skb->users);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001006 treq->pktopts = skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001008 treq->iif = sk->sk_bound_dev_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
1010 /* So that link locals have meaning */
1011 if (!sk->sk_bound_dev_if &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001012 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001013 treq->iif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014
1015 if (isn == 0)
1016 isn = tcp_v6_init_sequence(sk,skb);
1017
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001018 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019
1020 if (tcp_v6_send_synack(sk, req, NULL))
1021 goto drop;
1022
Arnaldo Carvalho de Melo81297652005-12-13 23:15:24 -08001023 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 return 0;
1025
1026drop:
1027 if (req)
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001028 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
1030 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1031 return 0; /* don't send reset */
1032}
1033
1034static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001035 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 struct dst_entry *dst)
1037{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001038 struct tcp6_request_sock *treq = tcp6_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1040 struct tcp6_sock *newtcp6sk;
1041 struct inet_sock *newinet;
1042 struct tcp_sock *newtp;
1043 struct sock *newsk;
1044 struct ipv6_txoptions *opt;
1045
1046 if (skb->protocol == htons(ETH_P_IP)) {
1047 /*
1048 * v6 mapped
1049 */
1050
1051 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1052
1053 if (newsk == NULL)
1054 return NULL;
1055
1056 newtcp6sk = (struct tcp6_sock *)newsk;
1057 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1058
1059 newinet = inet_sk(newsk);
1060 newnp = inet6_sk(newsk);
1061 newtp = tcp_sk(newsk);
1062
1063 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1064
1065 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1066 newinet->daddr);
1067
1068 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1069 newinet->saddr);
1070
1071 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1072
1073 newtp->af_specific = &ipv6_mapped;
1074 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1075 newnp->pktoptions = NULL;
1076 newnp->opt = NULL;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001077 newnp->mcast_oif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1079
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07001080 /*
1081 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1082 * here, tcp_create_openreq_child now does this for us, see the comment in
1083 * that function for the gory details. -acme
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
1086 /* It is tricky place. Until this moment IPv4 tcp
1087 worked with IPv6 af_tcp.af_specific.
1088 Sync it now.
1089 */
1090 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1091
1092 return newsk;
1093 }
1094
1095 opt = np->opt;
1096
1097 if (sk_acceptq_is_full(sk))
1098 goto out_overflow;
1099
YOSHIFUJI Hideaki333fad52005-09-08 09:59:17 +09001100 if (np->rxopt.bits.osrcrt == 2 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001101 opt == NULL && treq->pktopts) {
1102 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 if (rxopt->srcrt)
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001104 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 }
1106
1107 if (dst == NULL) {
1108 struct in6_addr *final_p = NULL, final;
1109 struct flowi fl;
1110
1111 memset(&fl, 0, sizeof(fl));
1112 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001113 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 if (opt && opt->srcrt) {
1115 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1116 ipv6_addr_copy(&final, &fl.fl6_dst);
1117 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1118 final_p = &final;
1119 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001120 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 fl.oif = sk->sk_bound_dev_if;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001122 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 fl.fl_ip_sport = inet_sk(sk)->sport;
1124
1125 if (ip6_dst_lookup(sk, &dst, &fl))
1126 goto out;
1127
1128 if (final_p)
1129 ipv6_addr_copy(&fl.fl6_dst, final_p);
1130
1131 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1132 goto out;
1133 }
1134
1135 newsk = tcp_create_openreq_child(sk, req, skb);
1136 if (newsk == NULL)
1137 goto out;
1138
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07001139 /*
1140 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1141 * count here, tcp_create_openreq_child now does this for us, see the
1142 * comment in that function for the gory details. -acme
1143 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144
1145 ip6_dst_store(newsk, dst, NULL);
1146 newsk->sk_route_caps = dst->dev->features &
1147 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1148
1149 newtcp6sk = (struct tcp6_sock *)newsk;
1150 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1151
1152 newtp = tcp_sk(newsk);
1153 newinet = inet_sk(newsk);
1154 newnp = inet6_sk(newsk);
1155
1156 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1157
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001158 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1159 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1160 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1161 newsk->sk_bound_dev_if = treq->iif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
1163 /* Now IPv6 options...
1164
1165 First: no IPv4 options.
1166 */
1167 newinet->opt = NULL;
1168
1169 /* Clone RX bits */
1170 newnp->rxopt.all = np->rxopt.all;
1171
1172 /* Clone pktoptions received with SYN */
1173 newnp->pktoptions = NULL;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001174 if (treq->pktopts != NULL) {
1175 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1176 kfree_skb(treq->pktopts);
1177 treq->pktopts = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 if (newnp->pktoptions)
1179 skb_set_owner_r(newnp->pktoptions, newsk);
1180 }
1181 newnp->opt = NULL;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001182 newnp->mcast_oif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1184
1185 /* Clone native IPv6 options from listening socket (if any)
1186
1187 Yes, keeping reference count would be much more clever,
1188 but we make one more one thing there: reattach optmem
1189 to newsk.
1190 */
1191 if (opt) {
1192 newnp->opt = ipv6_dup_options(newsk, opt);
1193 if (opt != np->opt)
1194 sock_kfree_s(sk, opt, opt->tot_len);
1195 }
1196
1197 newtp->ext_header_len = 0;
1198 if (newnp->opt)
1199 newtp->ext_header_len = newnp->opt->opt_nflen +
1200 newnp->opt->opt_flen;
1201
1202 tcp_sync_mss(newsk, dst_mtu(dst));
1203 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1204 tcp_initialize_rcv_mss(newsk);
1205
1206 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1207
Arnaldo Carvalho de Melo90b19d32005-12-13 23:15:01 -08001208 __inet6_hash(&tcp_hashinfo, newsk);
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -07001209 inet_inherit_port(&tcp_hashinfo, sk, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211 return newsk;
1212
1213out_overflow:
1214 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1215out:
1216 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1217 if (opt && opt != np->opt)
1218 sock_kfree_s(sk, opt, opt->tot_len);
1219 dst_release(dst);
1220 return NULL;
1221}
1222
1223static int tcp_v6_checksum_init(struct sk_buff *skb)
1224{
1225 if (skb->ip_summed == CHECKSUM_HW) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
Herbert Xufb286bb2005-11-10 13:01:24 -08001227 &skb->nh.ipv6h->daddr,skb->csum)) {
1228 skb->ip_summed = CHECKSUM_UNNECESSARY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 return 0;
Herbert Xufb286bb2005-11-10 13:01:24 -08001230 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 }
Herbert Xufb286bb2005-11-10 13:01:24 -08001232
1233 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1234 &skb->nh.ipv6h->daddr, 0);
1235
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 if (skb->len <= 76) {
Herbert Xufb286bb2005-11-10 13:01:24 -08001237 return __skb_checksum_complete(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 }
1239 return 0;
1240}
1241
1242/* The socket must have it's spinlock held when we get
1243 * here.
1244 *
1245 * We have a potential double-lock case here, so even when
1246 * doing backlog processing we use the BH locking scheme.
1247 * This is because we cannot sleep with the original spinlock
1248 * held.
1249 */
1250static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1251{
1252 struct ipv6_pinfo *np = inet6_sk(sk);
1253 struct tcp_sock *tp;
1254 struct sk_buff *opt_skb = NULL;
1255
1256 /* Imagine: socket is IPv6. IPv4 packet arrives,
1257 goes to IPv4 receive handler and backlogged.
1258 From backlog it always goes here. Kerboom...
1259 Fortunately, tcp_rcv_established and rcv_established
1260 handle them correctly, but it is not case with
1261 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1262 */
1263
1264 if (skb->protocol == htons(ETH_P_IP))
1265 return tcp_v4_do_rcv(sk, skb);
1266
1267 if (sk_filter(sk, skb, 0))
1268 goto discard;
1269
1270 /*
1271 * socket locking is here for SMP purposes as backlog rcv
1272 * is currently called with bh processing disabled.
1273 */
1274
1275 /* Do Stevens' IPV6_PKTOPTIONS.
1276
1277 Yes, guys, it is the only place in our code, where we
1278 may make it not affecting IPv4.
1279 The rest of code is protocol independent,
1280 and I do not like idea to uglify IPv4.
1281
1282 Actually, all the idea behind IPV6_PKTOPTIONS
1283 looks not very well thought. For now we latch
1284 options, received in the last packet, enqueued
1285 by tcp. Feel free to propose better solution.
1286 --ANK (980728)
1287 */
1288 if (np->rxopt.all)
1289 opt_skb = skb_clone(skb, GFP_ATOMIC);
1290
1291 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1292 TCP_CHECK_TIMER(sk);
1293 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1294 goto reset;
1295 TCP_CHECK_TIMER(sk);
1296 if (opt_skb)
1297 goto ipv6_pktoptions;
1298 return 0;
1299 }
1300
1301 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1302 goto csum_err;
1303
1304 if (sk->sk_state == TCP_LISTEN) {
1305 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1306 if (!nsk)
1307 goto discard;
1308
1309 /*
1310 * Queue it on the new socket if the new socket is active,
1311 * otherwise we just shortcircuit this and continue with
1312 * the new socket..
1313 */
1314 if(nsk != sk) {
1315 if (tcp_child_process(sk, nsk, skb))
1316 goto reset;
1317 if (opt_skb)
1318 __kfree_skb(opt_skb);
1319 return 0;
1320 }
1321 }
1322
1323 TCP_CHECK_TIMER(sk);
1324 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1325 goto reset;
1326 TCP_CHECK_TIMER(sk);
1327 if (opt_skb)
1328 goto ipv6_pktoptions;
1329 return 0;
1330
1331reset:
1332 tcp_v6_send_reset(skb);
1333discard:
1334 if (opt_skb)
1335 __kfree_skb(opt_skb);
1336 kfree_skb(skb);
1337 return 0;
1338csum_err:
1339 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1340 goto discard;
1341
1342
1343ipv6_pktoptions:
1344 /* Do you ask, what is it?
1345
1346 1. skb was enqueued by tcp.
1347 2. skb is added to tail of read queue, rather than out of order.
1348 3. socket is not in passive state.
1349 4. Finally, it really contains options, which user wants to receive.
1350 */
1351 tp = tcp_sk(sk);
1352 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1353 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
YOSHIFUJI Hideaki333fad52005-09-08 09:59:17 +09001354 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001355 np->mcast_oif = inet6_iif(opt_skb);
YOSHIFUJI Hideaki333fad52005-09-08 09:59:17 +09001356 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1358 if (ipv6_opt_accepted(sk, opt_skb)) {
1359 skb_set_owner_r(opt_skb, sk);
1360 opt_skb = xchg(&np->pktoptions, opt_skb);
1361 } else {
1362 __kfree_skb(opt_skb);
1363 opt_skb = xchg(&np->pktoptions, NULL);
1364 }
1365 }
1366
1367 if (opt_skb)
1368 kfree_skb(opt_skb);
1369 return 0;
1370}
1371
1372static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1373{
1374 struct sk_buff *skb = *pskb;
1375 struct tcphdr *th;
1376 struct sock *sk;
1377 int ret;
1378
1379 if (skb->pkt_type != PACKET_HOST)
1380 goto discard_it;
1381
1382 /*
1383 * Count it even if it's bad.
1384 */
1385 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1386
1387 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1388 goto discard_it;
1389
1390 th = skb->h.th;
1391
1392 if (th->doff < sizeof(struct tcphdr)/4)
1393 goto bad_packet;
1394 if (!pskb_may_pull(skb, th->doff*4))
1395 goto discard_it;
1396
1397 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
Herbert Xufb286bb2005-11-10 13:01:24 -08001398 tcp_v6_checksum_init(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 goto bad_packet;
1400
1401 th = skb->h.th;
1402 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1403 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1404 skb->len - th->doff*4);
1405 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1406 TCP_SKB_CB(skb)->when = 0;
1407 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1408 TCP_SKB_CB(skb)->sacked = 0;
1409
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001410 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1411 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1412 inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413
1414 if (!sk)
1415 goto no_tcp_socket;
1416
1417process:
1418 if (sk->sk_state == TCP_TIME_WAIT)
1419 goto do_time_wait;
1420
1421 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1422 goto discard_and_relse;
1423
1424 if (sk_filter(sk, skb, 0))
1425 goto discard_and_relse;
1426
1427 skb->dev = NULL;
1428
1429 bh_lock_sock(sk);
1430 ret = 0;
1431 if (!sock_owned_by_user(sk)) {
1432 if (!tcp_prequeue(sk, skb))
1433 ret = tcp_v6_do_rcv(sk, skb);
1434 } else
1435 sk_add_backlog(sk, skb);
1436 bh_unlock_sock(sk);
1437
1438 sock_put(sk);
1439 return ret ? -1 : 0;
1440
1441no_tcp_socket:
1442 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1443 goto discard_it;
1444
1445 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1446bad_packet:
1447 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1448 } else {
1449 tcp_v6_send_reset(skb);
1450 }
1451
1452discard_it:
1453
1454 /*
1455 * Discard frame
1456 */
1457
1458 kfree_skb(skb);
1459 return 0;
1460
1461discard_and_relse:
1462 sock_put(sk);
1463 goto discard_it;
1464
1465do_time_wait:
1466 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001467 inet_twsk_put((struct inet_timewait_sock *)sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 goto discard_it;
1469 }
1470
1471 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1472 TCP_INC_STATS_BH(TCP_MIB_INERRS);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001473 inet_twsk_put((struct inet_timewait_sock *)sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 goto discard_it;
1475 }
1476
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001477 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1478 skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 case TCP_TW_SYN:
1480 {
1481 struct sock *sk2;
1482
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001483 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1484 &skb->nh.ipv6h->daddr,
1485 ntohs(th->dest), inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 if (sk2 != NULL) {
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -07001487 struct inet_timewait_sock *tw = inet_twsk(sk);
1488 inet_twsk_deschedule(tw, &tcp_death_row);
1489 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 sk = sk2;
1491 goto process;
1492 }
1493 /* Fall through to ACK */
1494 }
1495 case TCP_TW_ACK:
1496 tcp_v6_timewait_ack(sk, skb);
1497 break;
1498 case TCP_TW_RST:
1499 goto no_tcp_socket;
1500 case TCP_TW_SUCCESS:;
1501 }
1502 goto discard_it;
1503}
1504
1505static int tcp_v6_rebuild_header(struct sock *sk)
1506{
1507 int err;
1508 struct dst_entry *dst;
1509 struct ipv6_pinfo *np = inet6_sk(sk);
1510
1511 dst = __sk_dst_check(sk, np->dst_cookie);
1512
1513 if (dst == NULL) {
1514 struct inet_sock *inet = inet_sk(sk);
1515 struct in6_addr *final_p = NULL, final;
1516 struct flowi fl;
1517
1518 memset(&fl, 0, sizeof(fl));
1519 fl.proto = IPPROTO_TCP;
1520 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1521 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1522 fl.fl6_flowlabel = np->flow_label;
1523 fl.oif = sk->sk_bound_dev_if;
1524 fl.fl_ip_dport = inet->dport;
1525 fl.fl_ip_sport = inet->sport;
1526
1527 if (np->opt && np->opt->srcrt) {
1528 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1529 ipv6_addr_copy(&final, &fl.fl6_dst);
1530 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1531 final_p = &final;
1532 }
1533
1534 err = ip6_dst_lookup(sk, &dst, &fl);
1535 if (err) {
1536 sk->sk_route_caps = 0;
1537 return err;
1538 }
1539 if (final_p)
1540 ipv6_addr_copy(&fl.fl6_dst, final_p);
1541
1542 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1543 sk->sk_err_soft = -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 return err;
1545 }
1546
1547 ip6_dst_store(sk, dst, NULL);
1548 sk->sk_route_caps = dst->dev->features &
1549 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1550 }
1551
1552 return 0;
1553}
1554
1555static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1556{
1557 struct sock *sk = skb->sk;
1558 struct inet_sock *inet = inet_sk(sk);
1559 struct ipv6_pinfo *np = inet6_sk(sk);
1560 struct flowi fl;
1561 struct dst_entry *dst;
1562 struct in6_addr *final_p = NULL, final;
1563
1564 memset(&fl, 0, sizeof(fl));
1565 fl.proto = IPPROTO_TCP;
1566 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1567 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1568 fl.fl6_flowlabel = np->flow_label;
1569 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1570 fl.oif = sk->sk_bound_dev_if;
1571 fl.fl_ip_sport = inet->sport;
1572 fl.fl_ip_dport = inet->dport;
1573
1574 if (np->opt && np->opt->srcrt) {
1575 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1576 ipv6_addr_copy(&final, &fl.fl6_dst);
1577 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1578 final_p = &final;
1579 }
1580
1581 dst = __sk_dst_check(sk, np->dst_cookie);
1582
1583 if (dst == NULL) {
1584 int err = ip6_dst_lookup(sk, &dst, &fl);
1585
1586 if (err) {
1587 sk->sk_err_soft = -err;
1588 return err;
1589 }
1590
1591 if (final_p)
1592 ipv6_addr_copy(&fl.fl6_dst, final_p);
1593
1594 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1595 sk->sk_route_caps = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 return err;
1597 }
1598
1599 ip6_dst_store(sk, dst, NULL);
1600 sk->sk_route_caps = dst->dev->features &
1601 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1602 }
1603
1604 skb->dst = dst_clone(dst);
1605
1606 /* Restore final destination back after routing done */
1607 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1608
1609 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1610}
1611
1612static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1613{
1614 struct ipv6_pinfo *np = inet6_sk(sk);
1615 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1616
1617 sin6->sin6_family = AF_INET6;
1618 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1619 sin6->sin6_port = inet_sk(sk)->dport;
1620 /* We do not store received flowlabel for TCP */
1621 sin6->sin6_flowinfo = 0;
1622 sin6->sin6_scope_id = 0;
1623 if (sk->sk_bound_dev_if &&
1624 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1625 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1626}
1627
1628static int tcp_v6_remember_stamp(struct sock *sk)
1629{
1630 /* Alas, not yet... */
1631 return 0;
1632}
1633
1634static struct tcp_func ipv6_specific = {
1635 .queue_xmit = tcp_v6_xmit,
1636 .send_check = tcp_v6_send_check,
1637 .rebuild_header = tcp_v6_rebuild_header,
1638 .conn_request = tcp_v6_conn_request,
1639 .syn_recv_sock = tcp_v6_syn_recv_sock,
1640 .remember_stamp = tcp_v6_remember_stamp,
1641 .net_header_len = sizeof(struct ipv6hdr),
1642
1643 .setsockopt = ipv6_setsockopt,
1644 .getsockopt = ipv6_getsockopt,
1645 .addr2sockaddr = v6_addr2sockaddr,
1646 .sockaddr_len = sizeof(struct sockaddr_in6)
1647};
1648
1649/*
1650 * TCP over IPv4 via INET6 API
1651 */
1652
1653static struct tcp_func ipv6_mapped = {
1654 .queue_xmit = ip_queue_xmit,
1655 .send_check = tcp_v4_send_check,
Arnaldo Carvalho de Melo32519f12005-08-09 19:50:02 -07001656 .rebuild_header = inet_sk_rebuild_header,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657 .conn_request = tcp_v6_conn_request,
1658 .syn_recv_sock = tcp_v6_syn_recv_sock,
1659 .remember_stamp = tcp_v4_remember_stamp,
1660 .net_header_len = sizeof(struct iphdr),
1661
1662 .setsockopt = ipv6_setsockopt,
1663 .getsockopt = ipv6_getsockopt,
1664 .addr2sockaddr = v6_addr2sockaddr,
1665 .sockaddr_len = sizeof(struct sockaddr_in6)
1666};
1667
1668
1669
1670/* NOTE: A lot of things set to zero explicitly by call to
1671 * sk_alloc() so need not be done here.
1672 */
1673static int tcp_v6_init_sock(struct sock *sk)
1674{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001675 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 struct tcp_sock *tp = tcp_sk(sk);
1677
1678 skb_queue_head_init(&tp->out_of_order_queue);
1679 tcp_init_xmit_timers(sk);
1680 tcp_prequeue_init(tp);
1681
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001682 icsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 tp->mdev = TCP_TIMEOUT_INIT;
1684
1685 /* So many TCP implementations out there (incorrectly) count the
1686 * initial SYN frame in their delayed-ACK and congestion control
1687 * algorithms that we must have the following bandaid to talk
1688 * efficiently to them. -DaveM
1689 */
1690 tp->snd_cwnd = 2;
1691
1692 /* See draft-stevens-tcpca-spec-01 for discussion of the
1693 * initialization of these values.
1694 */
1695 tp->snd_ssthresh = 0x7fffffff;
1696 tp->snd_cwnd_clamp = ~0;
David S. Millerc1b4a7e2005-07-05 15:24:38 -07001697 tp->mss_cache = 536;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698
1699 tp->reordering = sysctl_tcp_reordering;
1700
1701 sk->sk_state = TCP_CLOSE;
1702
1703 tp->af_specific = &ipv6_specific;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001704 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 sk->sk_write_space = sk_stream_write_space;
1706 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1707
1708 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1709 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1710
1711 atomic_inc(&tcp_sockets_allocated);
1712
1713 return 0;
1714}
1715
1716static int tcp_v6_destroy_sock(struct sock *sk)
1717{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 tcp_v4_destroy_sock(sk);
1719 return inet6_destroy_sock(sk);
1720}
1721
1722/* Proc filesystem TCPv6 sock list dumping. */
1723static void get_openreq6(struct seq_file *seq,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001724 struct sock *sk, struct request_sock *req, int i, int uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725{
1726 struct in6_addr *dest, *src;
1727 int ttd = req->expires - jiffies;
1728
1729 if (ttd < 0)
1730 ttd = 0;
1731
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001732 src = &tcp6_rsk(req)->loc_addr;
1733 dest = &tcp6_rsk(req)->rmt_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 seq_printf(seq,
1735 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1736 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1737 i,
1738 src->s6_addr32[0], src->s6_addr32[1],
1739 src->s6_addr32[2], src->s6_addr32[3],
1740 ntohs(inet_sk(sk)->sport),
1741 dest->s6_addr32[0], dest->s6_addr32[1],
1742 dest->s6_addr32[2], dest->s6_addr32[3],
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001743 ntohs(inet_rsk(req)->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 TCP_SYN_RECV,
1745 0,0, /* could print option size, but that is af dependent. */
1746 1, /* timers active (only the expire timer) */
1747 jiffies_to_clock_t(ttd),
1748 req->retrans,
1749 uid,
1750 0, /* non standard timer */
1751 0, /* open_requests have no inode */
1752 0, req);
1753}
1754
1755static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1756{
1757 struct in6_addr *dest, *src;
1758 __u16 destp, srcp;
1759 int timer_active;
1760 unsigned long timer_expires;
1761 struct inet_sock *inet = inet_sk(sp);
1762 struct tcp_sock *tp = tcp_sk(sp);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001763 const struct inet_connection_sock *icsk = inet_csk(sp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 struct ipv6_pinfo *np = inet6_sk(sp);
1765
1766 dest = &np->daddr;
1767 src = &np->rcv_saddr;
1768 destp = ntohs(inet->dport);
1769 srcp = ntohs(inet->sport);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001770
1771 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001773 timer_expires = icsk->icsk_timeout;
1774 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001776 timer_expires = icsk->icsk_timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 } else if (timer_pending(&sp->sk_timer)) {
1778 timer_active = 2;
1779 timer_expires = sp->sk_timer.expires;
1780 } else {
1781 timer_active = 0;
1782 timer_expires = jiffies;
1783 }
1784
1785 seq_printf(seq,
1786 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1787 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1788 i,
1789 src->s6_addr32[0], src->s6_addr32[1],
1790 src->s6_addr32[2], src->s6_addr32[3], srcp,
1791 dest->s6_addr32[0], dest->s6_addr32[1],
1792 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1793 sp->sk_state,
1794 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1795 timer_active,
1796 jiffies_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001797 icsk->icsk_retransmits,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 sock_i_uid(sp),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03001799 icsk->icsk_probes_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 sock_i_ino(sp),
1801 atomic_read(&sp->sk_refcnt), sp,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001802 icsk->icsk_rto,
1803 icsk->icsk_ack.ato,
1804 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1806 );
1807}
1808
1809static void get_timewait6_sock(struct seq_file *seq,
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001810 struct inet_timewait_sock *tw, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811{
1812 struct in6_addr *dest, *src;
1813 __u16 destp, srcp;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001814 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 int ttd = tw->tw_ttd - jiffies;
1816
1817 if (ttd < 0)
1818 ttd = 0;
1819
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001820 dest = &tcp6tw->tw_v6_daddr;
1821 src = &tcp6tw->tw_v6_rcv_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 destp = ntohs(tw->tw_dport);
1823 srcp = ntohs(tw->tw_sport);
1824
1825 seq_printf(seq,
1826 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1827 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1828 i,
1829 src->s6_addr32[0], src->s6_addr32[1],
1830 src->s6_addr32[2], src->s6_addr32[3], srcp,
1831 dest->s6_addr32[0], dest->s6_addr32[1],
1832 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1833 tw->tw_substate, 0, 0,
1834 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1835 atomic_read(&tw->tw_refcnt), tw);
1836}
1837
1838#ifdef CONFIG_PROC_FS
1839static int tcp6_seq_show(struct seq_file *seq, void *v)
1840{
1841 struct tcp_iter_state *st;
1842
1843 if (v == SEQ_START_TOKEN) {
1844 seq_puts(seq,
1845 " sl "
1846 "local_address "
1847 "remote_address "
1848 "st tx_queue rx_queue tr tm->when retrnsmt"
1849 " uid timeout inode\n");
1850 goto out;
1851 }
1852 st = seq->private;
1853
1854 switch (st->state) {
1855 case TCP_SEQ_STATE_LISTENING:
1856 case TCP_SEQ_STATE_ESTABLISHED:
1857 get_tcp6_sock(seq, v, st->num);
1858 break;
1859 case TCP_SEQ_STATE_OPENREQ:
1860 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1861 break;
1862 case TCP_SEQ_STATE_TIME_WAIT:
1863 get_timewait6_sock(seq, v, st->num);
1864 break;
1865 }
1866out:
1867 return 0;
1868}
1869
1870static struct file_operations tcp6_seq_fops;
1871static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1872 .owner = THIS_MODULE,
1873 .name = "tcp6",
1874 .family = AF_INET6,
1875 .seq_show = tcp6_seq_show,
1876 .seq_fops = &tcp6_seq_fops,
1877};
1878
1879int __init tcp6_proc_init(void)
1880{
1881 return tcp_proc_register(&tcp6_seq_afinfo);
1882}
1883
1884void tcp6_proc_exit(void)
1885{
1886 tcp_proc_unregister(&tcp6_seq_afinfo);
1887}
1888#endif
1889
1890struct proto tcpv6_prot = {
1891 .name = "TCPv6",
1892 .owner = THIS_MODULE,
1893 .close = tcp_close,
1894 .connect = tcp_v6_connect,
1895 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001896 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 .ioctl = tcp_ioctl,
1898 .init = tcp_v6_init_sock,
1899 .destroy = tcp_v6_destroy_sock,
1900 .shutdown = tcp_shutdown,
1901 .setsockopt = tcp_setsockopt,
1902 .getsockopt = tcp_getsockopt,
1903 .sendmsg = tcp_sendmsg,
1904 .recvmsg = tcp_recvmsg,
1905 .backlog_rcv = tcp_v6_do_rcv,
1906 .hash = tcp_v6_hash,
1907 .unhash = tcp_unhash,
1908 .get_port = tcp_v6_get_port,
1909 .enter_memory_pressure = tcp_enter_memory_pressure,
1910 .sockets_allocated = &tcp_sockets_allocated,
1911 .memory_allocated = &tcp_memory_allocated,
1912 .memory_pressure = &tcp_memory_pressure,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07001913 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 .sysctl_mem = sysctl_tcp_mem,
1915 .sysctl_wmem = sysctl_tcp_wmem,
1916 .sysctl_rmem = sysctl_tcp_rmem,
1917 .max_header = MAX_TCP_HEADER,
1918 .obj_size = sizeof(struct tcp6_sock),
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001919 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001920 .rsk_prot = &tcp6_request_sock_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921};
1922
1923static struct inet6_protocol tcpv6_protocol = {
1924 .handler = tcp_v6_rcv,
1925 .err_handler = tcp_v6_err,
1926 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1927};
1928
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929static struct inet_protosw tcpv6_protosw = {
1930 .type = SOCK_STREAM,
1931 .protocol = IPPROTO_TCP,
1932 .prot = &tcpv6_prot,
1933 .ops = &inet6_stream_ops,
1934 .capability = -1,
1935 .no_check = 0,
1936 .flags = INET_PROTOSW_PERMANENT,
1937};
1938
1939void __init tcpv6_init(void)
1940{
1941 /* register inet6 protocol */
1942 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1943 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1944 inet6_register_protosw(&tcpv6_protosw);
1945}