blob: 885e05bd99f6412de4c14388e904666ecf81b969 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
50#include <net/ipv6.h>
51#include <net/transp_v6.h>
52#include <net/addrconf.h>
53#include <net/ip6_route.h>
54#include <net/ip6_checksum.h>
55#include <net/inet_ecn.h>
56#include <net/protocol.h>
57#include <net/xfrm.h>
58#include <net/addrconf.h>
59#include <net/snmp.h>
60#include <net/dsfield.h>
61
62#include <asm/uaccess.h>
63
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
66
67static void tcp_v6_send_reset(struct sk_buff *skb);
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070068static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
Linus Torvalds1da177e2005-04-16 15:20:36 -070069static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb);
71
72static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75static struct tcp_func ipv6_mapped;
76static struct tcp_func ipv6_specific;
77
78/* I have no idea if this is a good hash for v6 or not. -DaveM */
79static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
81{
82 int hashent = (lport ^ fport);
83
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
88}
89
90static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91{
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
99}
100
101static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
103{
104 struct sock *sk2;
105 struct hlist_node *node;
106
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
109 if (sk != sk2 &&
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
116 break;
117 }
118
119 return node != NULL;
120}
121
122/* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
126 */
127static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128{
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
132 int ret;
133
134 local_bh_disable();
135 if (snum == 0) {
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
139 int rover;
140
141 spin_lock(&tcp_portalloc_lock);
Folkert van Heusdenc3924c702005-05-03 14:36:45 -0700142 if (tcp_port_rover < low)
143 rover = low;
144 else
145 rover = tcp_port_rover;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 do { rover++;
Folkert van Heusdenc3924c702005-05-03 14:36:45 -0700147 if (rover > high)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 rover = low;
149 head = &tcp_bhash[tcp_bhashfn(rover)];
150 spin_lock(&head->lock);
151 tb_for_each(tb, node, &head->chain)
152 if (tb->port == rover)
153 goto next;
154 break;
155 next:
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
158 tcp_port_rover = rover;
159 spin_unlock(&tcp_portalloc_lock);
160
David S. Millerd5d28372005-08-23 10:49:54 -0700161 /* Exhausted local port range during search? It is not
162 * possible for us to be holding one of the bind hash
163 * locks if this test triggers, because if 'remaining'
164 * drops to zero, we broke out of the do/while loop at
165 * the top level, not from the 'break;' statement.
166 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 ret = 1;
David S. Millerd5d28372005-08-23 10:49:54 -0700168 if (unlikely(remaining <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 goto fail;
170
171 /* OK, here is the one we will use. */
172 snum = rover;
173 } else {
174 head = &tcp_bhash[tcp_bhashfn(snum)];
175 spin_lock(&head->lock);
176 tb_for_each(tb, node, &head->chain)
177 if (tb->port == snum)
178 goto tb_found;
179 }
180 tb = NULL;
181 goto tb_not_found;
182tb_found:
183 if (tb && !hlist_empty(&tb->owners)) {
184 if (tb->fastreuse > 0 && sk->sk_reuse &&
185 sk->sk_state != TCP_LISTEN) {
186 goto success;
187 } else {
188 ret = 1;
189 if (tcp_v6_bind_conflict(sk, tb))
190 goto fail_unlock;
191 }
192 }
193tb_not_found:
194 ret = 1;
195 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
196 goto fail_unlock;
197 if (hlist_empty(&tb->owners)) {
198 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
199 tb->fastreuse = 1;
200 else
201 tb->fastreuse = 0;
202 } else if (tb->fastreuse &&
203 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
204 tb->fastreuse = 0;
205
206success:
207 if (!tcp_sk(sk)->bind_hash)
208 tcp_bind_hash(sk, tb, snum);
209 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
210 ret = 0;
211
212fail_unlock:
213 spin_unlock(&head->lock);
214fail:
215 local_bh_enable();
216 return ret;
217}
218
219static __inline__ void __tcp_v6_hash(struct sock *sk)
220{
221 struct hlist_head *list;
222 rwlock_t *lock;
223
224 BUG_TRAP(sk_unhashed(sk));
225
226 if (sk->sk_state == TCP_LISTEN) {
227 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
228 lock = &tcp_lhash_lock;
229 tcp_listen_wlock();
230 } else {
231 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
232 list = &tcp_ehash[sk->sk_hashent].chain;
233 lock = &tcp_ehash[sk->sk_hashent].lock;
234 write_lock(lock);
235 }
236
237 __sk_add_node(sk, list);
238 sock_prot_inc_use(sk->sk_prot);
239 write_unlock(lock);
240}
241
242
243static void tcp_v6_hash(struct sock *sk)
244{
245 if (sk->sk_state != TCP_CLOSE) {
246 struct tcp_sock *tp = tcp_sk(sk);
247
248 if (tp->af_specific == &ipv6_mapped) {
249 tcp_prot.hash(sk);
250 return;
251 }
252 local_bh_disable();
253 __tcp_v6_hash(sk);
254 local_bh_enable();
255 }
256}
257
258static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
259{
260 struct sock *sk;
261 struct hlist_node *node;
262 struct sock *result = NULL;
263 int score, hiscore;
264
265 hiscore=0;
266 read_lock(&tcp_lhash_lock);
267 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
268 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
269 struct ipv6_pinfo *np = inet6_sk(sk);
270
271 score = 1;
272 if (!ipv6_addr_any(&np->rcv_saddr)) {
273 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
274 continue;
275 score++;
276 }
277 if (sk->sk_bound_dev_if) {
278 if (sk->sk_bound_dev_if != dif)
279 continue;
280 score++;
281 }
282 if (score == 3) {
283 result = sk;
284 break;
285 }
286 if (score > hiscore) {
287 hiscore = score;
288 result = sk;
289 }
290 }
291 }
292 if (result)
293 sock_hold(result);
294 read_unlock(&tcp_lhash_lock);
295 return result;
296}
297
298/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
299 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
300 *
301 * The sockhash lock must be held as a reader here.
302 */
303
304static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
305 struct in6_addr *daddr, u16 hnum,
306 int dif)
307{
308 struct tcp_ehash_bucket *head;
309 struct sock *sk;
310 struct hlist_node *node;
311 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
312 int hash;
313
314 /* Optimize here for direct hit, only listening connections can
315 * have wildcards anyways.
316 */
317 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
318 head = &tcp_ehash[hash];
319 read_lock(&head->lock);
320 sk_for_each(sk, node, &head->chain) {
321 /* For IPV6 do the cheaper port and family tests first. */
322 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
323 goto hit; /* You sunk my battleship! */
324 }
325 /* Must check for a TIME_WAIT'er before going to listener hash. */
326 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
327 /* FIXME: acme: check this... */
328 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
329
330 if(*((__u32 *)&(tw->tw_dport)) == ports &&
331 sk->sk_family == PF_INET6) {
332 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
333 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
334 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
335 goto hit;
336 }
337 }
338 read_unlock(&head->lock);
339 return NULL;
340
341hit:
342 sock_hold(sk);
343 read_unlock(&head->lock);
344 return sk;
345}
346
347
348static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
349 struct in6_addr *daddr, u16 hnum,
350 int dif)
351{
352 struct sock *sk;
353
354 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
355
356 if (sk)
357 return sk;
358
359 return tcp_v6_lookup_listener(daddr, hnum, dif);
360}
361
362inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
363 struct in6_addr *daddr, u16 dport,
364 int dif)
365{
366 struct sock *sk;
367
368 local_bh_disable();
369 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
370 local_bh_enable();
371
372 return sk;
373}
374
375EXPORT_SYMBOL_GPL(tcp_v6_lookup);
376
377
378/*
379 * Open request hash tables.
380 */
381
382static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
383{
384 u32 a, b, c;
385
386 a = raddr->s6_addr32[0];
387 b = raddr->s6_addr32[1];
388 c = raddr->s6_addr32[2];
389
390 a += JHASH_GOLDEN_RATIO;
391 b += JHASH_GOLDEN_RATIO;
392 c += rnd;
393 __jhash_mix(a, b, c);
394
395 a += raddr->s6_addr32[3];
396 b += (u32) rport;
397 __jhash_mix(a, b, c);
398
399 return c & (TCP_SYNQ_HSIZE - 1);
400}
401
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700402static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
403 struct request_sock ***prevp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 __u16 rport,
405 struct in6_addr *raddr,
406 struct in6_addr *laddr,
407 int iif)
408{
Arnaldo Carvalho de Melo2ad69c52005-06-18 22:48:55 -0700409 struct listen_sock *lopt = tp->accept_queue.listen_opt;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700410 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411
412 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
413 (req = *prev) != NULL;
414 prev = &req->dl_next) {
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700415 const struct tcp6_request_sock *treq = tcp6_rsk(req);
416
417 if (inet_rsk(req)->rmt_port == rport &&
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700418 req->rsk_ops->family == AF_INET6 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700419 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
420 ipv6_addr_equal(&treq->loc_addr, laddr) &&
421 (!treq->iif || treq->iif == iif)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 BUG_TRAP(req->sk == NULL);
423 *prevp = prev;
424 return req;
425 }
426 }
427
428 return NULL;
429}
430
431static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
432 struct in6_addr *saddr,
433 struct in6_addr *daddr,
434 unsigned long base)
435{
436 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
437}
438
439static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
440{
441 if (skb->protocol == htons(ETH_P_IPV6)) {
442 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
443 skb->nh.ipv6h->saddr.s6_addr32,
444 skb->h.th->dest,
445 skb->h.th->source);
446 } else {
447 return secure_tcp_sequence_number(skb->nh.iph->daddr,
448 skb->nh.iph->saddr,
449 skb->h.th->dest,
450 skb->h.th->source);
451 }
452}
453
454static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
455 struct tcp_tw_bucket **twp)
456{
457 struct inet_sock *inet = inet_sk(sk);
458 struct ipv6_pinfo *np = inet6_sk(sk);
459 struct in6_addr *daddr = &np->rcv_saddr;
460 struct in6_addr *saddr = &np->daddr;
461 int dif = sk->sk_bound_dev_if;
462 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
463 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
464 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
465 struct sock *sk2;
466 struct hlist_node *node;
467 struct tcp_tw_bucket *tw;
468
469 write_lock(&head->lock);
470
471 /* Check TIME-WAIT sockets first. */
472 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
473 tw = (struct tcp_tw_bucket*)sk2;
474
475 if(*((__u32 *)&(tw->tw_dport)) == ports &&
476 sk2->sk_family == PF_INET6 &&
477 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
478 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
479 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
480 struct tcp_sock *tp = tcp_sk(sk);
481
482 if (tw->tw_ts_recent_stamp &&
483 (!twp || (sysctl_tcp_tw_reuse &&
484 xtime.tv_sec -
485 tw->tw_ts_recent_stamp > 1))) {
486 /* See comment in tcp_ipv4.c */
487 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
488 if (!tp->write_seq)
489 tp->write_seq = 1;
490 tp->rx_opt.ts_recent = tw->tw_ts_recent;
491 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
492 sock_hold(sk2);
493 goto unique;
494 } else
495 goto not_unique;
496 }
497 }
498 tw = NULL;
499
500 /* And established part... */
501 sk_for_each(sk2, node, &head->chain) {
502 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
503 goto not_unique;
504 }
505
506unique:
507 BUG_TRAP(sk_unhashed(sk));
508 __sk_add_node(sk, &head->chain);
509 sk->sk_hashent = hash;
510 sock_prot_inc_use(sk->sk_prot);
511 write_unlock(&head->lock);
512
513 if (twp) {
514 *twp = tw;
515 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
516 } else if (tw) {
517 /* Silly. Should hash-dance instead... */
518 tcp_tw_deschedule(tw);
519 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
520
521 tcp_tw_put(tw);
522 }
523 return 0;
524
525not_unique:
526 write_unlock(&head->lock);
527 return -EADDRNOTAVAIL;
528}
529
530static inline u32 tcpv6_port_offset(const struct sock *sk)
531{
532 const struct inet_sock *inet = inet_sk(sk);
533 const struct ipv6_pinfo *np = inet6_sk(sk);
534
535 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
536 np->daddr.s6_addr32,
537 inet->dport);
538}
539
540static int tcp_v6_hash_connect(struct sock *sk)
541{
542 unsigned short snum = inet_sk(sk)->num;
543 struct tcp_bind_hashbucket *head;
544 struct tcp_bind_bucket *tb;
545 int ret;
546
547 if (!snum) {
548 int low = sysctl_local_port_range[0];
549 int high = sysctl_local_port_range[1];
550 int range = high - low;
551 int i;
552 int port;
553 static u32 hint;
554 u32 offset = hint + tcpv6_port_offset(sk);
555 struct hlist_node *node;
556 struct tcp_tw_bucket *tw = NULL;
557
558 local_bh_disable();
559 for (i = 1; i <= range; i++) {
560 port = low + (i + offset) % range;
561 head = &tcp_bhash[tcp_bhashfn(port)];
562 spin_lock(&head->lock);
563
564 /* Does not bother with rcv_saddr checks,
565 * because the established check is already
566 * unique enough.
567 */
568 tb_for_each(tb, node, &head->chain) {
569 if (tb->port == port) {
570 BUG_TRAP(!hlist_empty(&tb->owners));
571 if (tb->fastreuse >= 0)
572 goto next_port;
573 if (!__tcp_v6_check_established(sk,
574 port,
575 &tw))
576 goto ok;
577 goto next_port;
578 }
579 }
580
581 tb = tcp_bucket_create(head, port);
582 if (!tb) {
583 spin_unlock(&head->lock);
584 break;
585 }
586 tb->fastreuse = -1;
587 goto ok;
588
589 next_port:
590 spin_unlock(&head->lock);
591 }
592 local_bh_enable();
593
594 return -EADDRNOTAVAIL;
595
596ok:
597 hint += i;
598
599 /* Head lock still held and bh's disabled */
600 tcp_bind_hash(sk, tb, port);
601 if (sk_unhashed(sk)) {
602 inet_sk(sk)->sport = htons(port);
603 __tcp_v6_hash(sk);
604 }
605 spin_unlock(&head->lock);
606
607 if (tw) {
608 tcp_tw_deschedule(tw);
609 tcp_tw_put(tw);
610 }
611
612 ret = 0;
613 goto out;
614 }
615
616 head = &tcp_bhash[tcp_bhashfn(snum)];
617 tb = tcp_sk(sk)->bind_hash;
618 spin_lock_bh(&head->lock);
619
620 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
621 __tcp_v6_hash(sk);
622 spin_unlock_bh(&head->lock);
623 return 0;
624 } else {
625 spin_unlock(&head->lock);
626 /* No definite answer... Walk to established hash table */
627 ret = __tcp_v6_check_established(sk, snum, NULL);
628out:
629 local_bh_enable();
630 return ret;
631 }
632}
633
634static __inline__ int tcp_v6_iif(struct sk_buff *skb)
635{
636 return IP6CB(skb)->iif;
637}
638
639static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
640 int addr_len)
641{
642 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
643 struct inet_sock *inet = inet_sk(sk);
644 struct ipv6_pinfo *np = inet6_sk(sk);
645 struct tcp_sock *tp = tcp_sk(sk);
646 struct in6_addr *saddr = NULL, *final_p = NULL, final;
647 struct flowi fl;
648 struct dst_entry *dst;
649 int addr_type;
650 int err;
651
652 if (addr_len < SIN6_LEN_RFC2133)
653 return -EINVAL;
654
655 if (usin->sin6_family != AF_INET6)
656 return(-EAFNOSUPPORT);
657
658 memset(&fl, 0, sizeof(fl));
659
660 if (np->sndflow) {
661 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
662 IP6_ECN_flow_init(fl.fl6_flowlabel);
663 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
664 struct ip6_flowlabel *flowlabel;
665 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
666 if (flowlabel == NULL)
667 return -EINVAL;
668 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
669 fl6_sock_release(flowlabel);
670 }
671 }
672
673 /*
674 * connect() to INADDR_ANY means loopback (BSD'ism).
675 */
676
677 if(ipv6_addr_any(&usin->sin6_addr))
678 usin->sin6_addr.s6_addr[15] = 0x1;
679
680 addr_type = ipv6_addr_type(&usin->sin6_addr);
681
682 if(addr_type & IPV6_ADDR_MULTICAST)
683 return -ENETUNREACH;
684
685 if (addr_type&IPV6_ADDR_LINKLOCAL) {
686 if (addr_len >= sizeof(struct sockaddr_in6) &&
687 usin->sin6_scope_id) {
688 /* If interface is set while binding, indices
689 * must coincide.
690 */
691 if (sk->sk_bound_dev_if &&
692 sk->sk_bound_dev_if != usin->sin6_scope_id)
693 return -EINVAL;
694
695 sk->sk_bound_dev_if = usin->sin6_scope_id;
696 }
697
698 /* Connect to link-local address requires an interface */
699 if (!sk->sk_bound_dev_if)
700 return -EINVAL;
701 }
702
703 if (tp->rx_opt.ts_recent_stamp &&
704 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
705 tp->rx_opt.ts_recent = 0;
706 tp->rx_opt.ts_recent_stamp = 0;
707 tp->write_seq = 0;
708 }
709
710 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
711 np->flow_label = fl.fl6_flowlabel;
712
713 /*
714 * TCP over IPv4
715 */
716
717 if (addr_type == IPV6_ADDR_MAPPED) {
718 u32 exthdrlen = tp->ext_header_len;
719 struct sockaddr_in sin;
720
721 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
722
723 if (__ipv6_only_sock(sk))
724 return -ENETUNREACH;
725
726 sin.sin_family = AF_INET;
727 sin.sin_port = usin->sin6_port;
728 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
729
730 tp->af_specific = &ipv6_mapped;
731 sk->sk_backlog_rcv = tcp_v4_do_rcv;
732
733 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
734
735 if (err) {
736 tp->ext_header_len = exthdrlen;
737 tp->af_specific = &ipv6_specific;
738 sk->sk_backlog_rcv = tcp_v6_do_rcv;
739 goto failure;
740 } else {
741 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
742 inet->saddr);
743 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
744 inet->rcv_saddr);
745 }
746
747 return err;
748 }
749
750 if (!ipv6_addr_any(&np->rcv_saddr))
751 saddr = &np->rcv_saddr;
752
753 fl.proto = IPPROTO_TCP;
754 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
755 ipv6_addr_copy(&fl.fl6_src,
756 (saddr ? saddr : &np->saddr));
757 fl.oif = sk->sk_bound_dev_if;
758 fl.fl_ip_dport = usin->sin6_port;
759 fl.fl_ip_sport = inet->sport;
760
761 if (np->opt && np->opt->srcrt) {
762 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
763 ipv6_addr_copy(&final, &fl.fl6_dst);
764 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
765 final_p = &final;
766 }
767
768 err = ip6_dst_lookup(sk, &dst, &fl);
769 if (err)
770 goto failure;
771 if (final_p)
772 ipv6_addr_copy(&fl.fl6_dst, final_p);
773
774 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
775 dst_release(dst);
776 goto failure;
777 }
778
779 if (saddr == NULL) {
780 saddr = &fl.fl6_src;
781 ipv6_addr_copy(&np->rcv_saddr, saddr);
782 }
783
784 /* set the source address */
785 ipv6_addr_copy(&np->saddr, saddr);
786 inet->rcv_saddr = LOOPBACK4_IPV6;
787
788 ip6_dst_store(sk, dst, NULL);
789 sk->sk_route_caps = dst->dev->features &
790 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
791
792 tp->ext_header_len = 0;
793 if (np->opt)
794 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
795
796 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
797
798 inet->dport = usin->sin6_port;
799
800 tcp_set_state(sk, TCP_SYN_SENT);
801 err = tcp_v6_hash_connect(sk);
802 if (err)
803 goto late_failure;
804
805 if (!tp->write_seq)
806 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
807 np->daddr.s6_addr32,
808 inet->sport,
809 inet->dport);
810
811 err = tcp_connect(sk);
812 if (err)
813 goto late_failure;
814
815 return 0;
816
817late_failure:
818 tcp_set_state(sk, TCP_CLOSE);
819 __sk_dst_reset(sk);
820failure:
821 inet->dport = 0;
822 sk->sk_route_caps = 0;
823 return err;
824}
825
826static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
827 int type, int code, int offset, __u32 info)
828{
829 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
830 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
831 struct ipv6_pinfo *np;
832 struct sock *sk;
833 int err;
834 struct tcp_sock *tp;
835 __u32 seq;
836
837 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
838
839 if (sk == NULL) {
840 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
841 return;
842 }
843
844 if (sk->sk_state == TCP_TIME_WAIT) {
845 tcp_tw_put((struct tcp_tw_bucket*)sk);
846 return;
847 }
848
849 bh_lock_sock(sk);
850 if (sock_owned_by_user(sk))
851 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
852
853 if (sk->sk_state == TCP_CLOSE)
854 goto out;
855
856 tp = tcp_sk(sk);
857 seq = ntohl(th->seq);
858 if (sk->sk_state != TCP_LISTEN &&
859 !between(seq, tp->snd_una, tp->snd_nxt)) {
860 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
861 goto out;
862 }
863
864 np = inet6_sk(sk);
865
866 if (type == ICMPV6_PKT_TOOBIG) {
867 struct dst_entry *dst = NULL;
868
869 if (sock_owned_by_user(sk))
870 goto out;
871 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
872 goto out;
873
874 /* icmp should have updated the destination cache entry */
875 dst = __sk_dst_check(sk, np->dst_cookie);
876
877 if (dst == NULL) {
878 struct inet_sock *inet = inet_sk(sk);
879 struct flowi fl;
880
881 /* BUGGG_FUTURE: Again, it is not clear how
882 to handle rthdr case. Ignore this complexity
883 for now.
884 */
885 memset(&fl, 0, sizeof(fl));
886 fl.proto = IPPROTO_TCP;
887 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
888 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
889 fl.oif = sk->sk_bound_dev_if;
890 fl.fl_ip_dport = inet->dport;
891 fl.fl_ip_sport = inet->sport;
892
893 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
894 sk->sk_err_soft = -err;
895 goto out;
896 }
897
898 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
899 sk->sk_err_soft = -err;
900 goto out;
901 }
902
903 } else
904 dst_hold(dst);
905
906 if (tp->pmtu_cookie > dst_mtu(dst)) {
907 tcp_sync_mss(sk, dst_mtu(dst));
908 tcp_simple_retransmit(sk);
909 } /* else let the usual retransmit timer handle it */
910 dst_release(dst);
911 goto out;
912 }
913
914 icmpv6_err_convert(type, code, &err);
915
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700916 /* Might be for an request_sock */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700918 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 case TCP_LISTEN:
920 if (sock_owned_by_user(sk))
921 goto out;
922
923 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
924 &hdr->saddr, tcp_v6_iif(skb));
925 if (!req)
926 goto out;
927
928 /* ICMPs are not backlogged, hence we cannot get
929 * an established socket here.
930 */
931 BUG_TRAP(req->sk == NULL);
932
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700933 if (seq != tcp_rsk(req)->snt_isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
935 goto out;
936 }
937
938 tcp_synq_drop(sk, req, prev);
939 goto out;
940
941 case TCP_SYN_SENT:
942 case TCP_SYN_RECV: /* Cannot happen.
943 It can, it SYNs are crossed. --ANK */
944 if (!sock_owned_by_user(sk)) {
945 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
946 sk->sk_err = err;
947 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
948
949 tcp_done(sk);
950 } else
951 sk->sk_err_soft = err;
952 goto out;
953 }
954
955 if (!sock_owned_by_user(sk) && np->recverr) {
956 sk->sk_err = err;
957 sk->sk_error_report(sk);
958 } else
959 sk->sk_err_soft = err;
960
961out:
962 bh_unlock_sock(sk);
963 sock_put(sk);
964}
965
966
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700967static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 struct dst_entry *dst)
969{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700970 struct tcp6_request_sock *treq = tcp6_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 struct ipv6_pinfo *np = inet6_sk(sk);
972 struct sk_buff * skb;
973 struct ipv6_txoptions *opt = NULL;
974 struct in6_addr * final_p = NULL, final;
975 struct flowi fl;
976 int err = -1;
977
978 memset(&fl, 0, sizeof(fl));
979 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700980 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
981 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 fl.fl6_flowlabel = 0;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700983 fl.oif = treq->iif;
984 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 fl.fl_ip_sport = inet_sk(sk)->sport;
986
987 if (dst == NULL) {
988 opt = np->opt;
989 if (opt == NULL &&
990 np->rxopt.bits.srcrt == 2 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700991 treq->pktopts) {
992 struct sk_buff *pktopts = treq->pktopts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
994 if (rxopt->srcrt)
995 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
996 }
997
998 if (opt && opt->srcrt) {
999 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1000 ipv6_addr_copy(&final, &fl.fl6_dst);
1001 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1002 final_p = &final;
1003 }
1004
1005 err = ip6_dst_lookup(sk, &dst, &fl);
1006 if (err)
1007 goto done;
1008 if (final_p)
1009 ipv6_addr_copy(&fl.fl6_dst, final_p);
1010 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1011 goto done;
1012 }
1013
1014 skb = tcp_make_synack(sk, dst, req);
1015 if (skb) {
1016 struct tcphdr *th = skb->h.th;
1017
1018 th->check = tcp_v6_check(th, skb->len,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001019 &treq->loc_addr, &treq->rmt_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 csum_partial((char *)th, skb->len, skb->csum));
1021
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001022 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 err = ip6_xmit(sk, skb, &fl, opt, 0);
1024 if (err == NET_XMIT_CN)
1025 err = 0;
1026 }
1027
1028done:
1029 dst_release(dst);
1030 if (opt && opt != np->opt)
1031 sock_kfree_s(sk, opt, opt->tot_len);
1032 return err;
1033}
1034
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001035static void tcp_v6_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001037 if (tcp6_rsk(req)->pktopts)
1038 kfree_skb(tcp6_rsk(req)->pktopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039}
1040
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001041static struct request_sock_ops tcp6_request_sock_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 .family = AF_INET6,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001043 .obj_size = sizeof(struct tcp6_request_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 .rtx_syn_ack = tcp_v6_send_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001045 .send_ack = tcp_v6_reqsk_send_ack,
1046 .destructor = tcp_v6_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 .send_reset = tcp_v6_send_reset
1048};
1049
1050static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1051{
1052 struct ipv6_pinfo *np = inet6_sk(sk);
1053 struct inet6_skb_parm *opt = IP6CB(skb);
1054
1055 if (np->rxopt.all) {
1056 if ((opt->hop && np->rxopt.bits.hopopts) ||
1057 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1058 np->rxopt.bits.rxflow) ||
1059 (opt->srcrt && np->rxopt.bits.srcrt) ||
1060 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1061 return 1;
1062 }
1063 return 0;
1064}
1065
1066
1067static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1068 struct sk_buff *skb)
1069{
1070 struct ipv6_pinfo *np = inet6_sk(sk);
1071
1072 if (skb->ip_summed == CHECKSUM_HW) {
1073 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1074 skb->csum = offsetof(struct tcphdr, check);
1075 } else {
1076 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1077 csum_partial((char *)th, th->doff<<2,
1078 skb->csum));
1079 }
1080}
1081
1082
1083static void tcp_v6_send_reset(struct sk_buff *skb)
1084{
1085 struct tcphdr *th = skb->h.th, *t1;
1086 struct sk_buff *buff;
1087 struct flowi fl;
1088
1089 if (th->rst)
1090 return;
1091
1092 if (!ipv6_unicast_destination(skb))
1093 return;
1094
1095 /*
1096 * We need to grab some memory, and put together an RST,
1097 * and then put it into the queue to be sent.
1098 */
1099
1100 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1101 GFP_ATOMIC);
1102 if (buff == NULL)
1103 return;
1104
1105 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1106
1107 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1108
1109 /* Swap the send and the receive. */
1110 memset(t1, 0, sizeof(*t1));
1111 t1->dest = th->source;
1112 t1->source = th->dest;
1113 t1->doff = sizeof(*t1)/4;
1114 t1->rst = 1;
1115
1116 if(th->ack) {
1117 t1->seq = th->ack_seq;
1118 } else {
1119 t1->ack = 1;
1120 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1121 + skb->len - (th->doff<<2));
1122 }
1123
1124 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1125
1126 memset(&fl, 0, sizeof(fl));
1127 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1128 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1129
1130 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1131 sizeof(*t1), IPPROTO_TCP,
1132 buff->csum);
1133
1134 fl.proto = IPPROTO_TCP;
1135 fl.oif = tcp_v6_iif(skb);
1136 fl.fl_ip_dport = t1->dest;
1137 fl.fl_ip_sport = t1->source;
1138
1139 /* sk = NULL, but it is safe for now. RST socket required. */
1140 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1141
1142 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1143 dst_release(buff->dst);
1144 return;
1145 }
1146
1147 ip6_xmit(NULL, buff, &fl, NULL, 0);
1148 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1149 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1150 return;
1151 }
1152
1153 kfree_skb(buff);
1154}
1155
1156static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1157{
1158 struct tcphdr *th = skb->h.th, *t1;
1159 struct sk_buff *buff;
1160 struct flowi fl;
1161 int tot_len = sizeof(struct tcphdr);
1162
1163 if (ts)
1164 tot_len += 3*4;
1165
1166 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1167 GFP_ATOMIC);
1168 if (buff == NULL)
1169 return;
1170
1171 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1172
1173 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1174
1175 /* Swap the send and the receive. */
1176 memset(t1, 0, sizeof(*t1));
1177 t1->dest = th->source;
1178 t1->source = th->dest;
1179 t1->doff = tot_len/4;
1180 t1->seq = htonl(seq);
1181 t1->ack_seq = htonl(ack);
1182 t1->ack = 1;
1183 t1->window = htons(win);
1184
1185 if (ts) {
1186 u32 *ptr = (u32*)(t1 + 1);
1187 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1188 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1189 *ptr++ = htonl(tcp_time_stamp);
1190 *ptr = htonl(ts);
1191 }
1192
1193 buff->csum = csum_partial((char *)t1, tot_len, 0);
1194
1195 memset(&fl, 0, sizeof(fl));
1196 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1197 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1198
1199 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1200 tot_len, IPPROTO_TCP,
1201 buff->csum);
1202
1203 fl.proto = IPPROTO_TCP;
1204 fl.oif = tcp_v6_iif(skb);
1205 fl.fl_ip_dport = t1->dest;
1206 fl.fl_ip_sport = t1->source;
1207
1208 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1209 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1210 dst_release(buff->dst);
1211 return;
1212 }
1213 ip6_xmit(NULL, buff, &fl, NULL, 0);
1214 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1215 return;
1216 }
1217
1218 kfree_skb(buff);
1219}
1220
1221static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1222{
1223 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1224
1225 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1226 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1227
1228 tcp_tw_put(tw);
1229}
1230
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001231static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001233 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234}
1235
1236
1237static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1238{
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001239 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 struct tcphdr *th = skb->h.th;
1241 struct tcp_sock *tp = tcp_sk(sk);
1242 struct sock *nsk;
1243
1244 /* Find possible connection requests. */
1245 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1246 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1247 if (req)
1248 return tcp_check_req(sk, skb, req, prev);
1249
1250 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1251 th->source,
1252 &skb->nh.ipv6h->daddr,
1253 ntohs(th->dest),
1254 tcp_v6_iif(skb));
1255
1256 if (nsk) {
1257 if (nsk->sk_state != TCP_TIME_WAIT) {
1258 bh_lock_sock(nsk);
1259 return nsk;
1260 }
1261 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1262 return NULL;
1263 }
1264
1265#if 0 /*def CONFIG_SYN_COOKIES*/
1266 if (!th->rst && !th->syn && th->ack)
1267 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1268#endif
1269 return sk;
1270}
1271
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001272static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273{
1274 struct tcp_sock *tp = tcp_sk(sk);
Arnaldo Carvalho de Melo2ad69c52005-06-18 22:48:55 -07001275 struct listen_sock *lopt = tp->accept_queue.listen_opt;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001276 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277
Arnaldo Carvalho de Melo0e875062005-06-18 22:47:59 -07001278 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 tcp_synq_added(sk);
1280}
1281
1282
1283/* FIXME: this is substantially similar to the ipv4 code.
1284 * Can some kind of merge be done? -- erics
1285 */
1286static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1287{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001288 struct tcp6_request_sock *treq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 struct ipv6_pinfo *np = inet6_sk(sk);
1290 struct tcp_options_received tmp_opt;
1291 struct tcp_sock *tp = tcp_sk(sk);
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001292 struct request_sock *req = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 __u32 isn = TCP_SKB_CB(skb)->when;
1294
1295 if (skb->protocol == htons(ETH_P_IP))
1296 return tcp_v4_conn_request(sk, skb);
1297
1298 if (!ipv6_unicast_destination(skb))
1299 goto drop;
1300
1301 /*
1302 * There are no SYN attacks on IPv6, yet...
1303 */
1304 if (tcp_synq_is_full(sk) && !isn) {
1305 if (net_ratelimit())
1306 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1307 goto drop;
1308 }
1309
1310 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1311 goto drop;
1312
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001313 req = reqsk_alloc(&tcp6_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 if (req == NULL)
1315 goto drop;
1316
1317 tcp_clear_options(&tmp_opt);
1318 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1319 tmp_opt.user_mss = tp->rx_opt.user_mss;
1320
1321 tcp_parse_options(skb, &tmp_opt, 0);
1322
1323 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1324 tcp_openreq_init(req, &tmp_opt, skb);
1325
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001326 treq = tcp6_rsk(req);
1327 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1328 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 TCP_ECN_create_request(req, skb->h.th);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001330 treq->pktopts = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 if (ipv6_opt_accepted(sk, skb) ||
1332 np->rxopt.bits.rxinfo ||
1333 np->rxopt.bits.rxhlim) {
1334 atomic_inc(&skb->users);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001335 treq->pktopts = skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001337 treq->iif = sk->sk_bound_dev_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
1339 /* So that link locals have meaning */
1340 if (!sk->sk_bound_dev_if &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001341 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1342 treq->iif = tcp_v6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
1344 if (isn == 0)
1345 isn = tcp_v6_init_sequence(sk,skb);
1346
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001347 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348
1349 if (tcp_v6_send_synack(sk, req, NULL))
1350 goto drop;
1351
1352 tcp_v6_synq_add(sk, req);
1353
1354 return 0;
1355
1356drop:
1357 if (req)
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001358 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359
1360 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1361 return 0; /* don't send reset */
1362}
1363
1364static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001365 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 struct dst_entry *dst)
1367{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001368 struct tcp6_request_sock *treq = tcp6_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1370 struct tcp6_sock *newtcp6sk;
1371 struct inet_sock *newinet;
1372 struct tcp_sock *newtp;
1373 struct sock *newsk;
1374 struct ipv6_txoptions *opt;
1375
1376 if (skb->protocol == htons(ETH_P_IP)) {
1377 /*
1378 * v6 mapped
1379 */
1380
1381 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1382
1383 if (newsk == NULL)
1384 return NULL;
1385
1386 newtcp6sk = (struct tcp6_sock *)newsk;
1387 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1388
1389 newinet = inet_sk(newsk);
1390 newnp = inet6_sk(newsk);
1391 newtp = tcp_sk(newsk);
1392
1393 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1394
1395 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1396 newinet->daddr);
1397
1398 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1399 newinet->saddr);
1400
1401 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1402
1403 newtp->af_specific = &ipv6_mapped;
1404 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1405 newnp->pktoptions = NULL;
1406 newnp->opt = NULL;
1407 newnp->mcast_oif = tcp_v6_iif(skb);
1408 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1409
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07001410 /*
1411 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1412 * here, tcp_create_openreq_child now does this for us, see the comment in
1413 * that function for the gory details. -acme
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
1416 /* It is tricky place. Until this moment IPv4 tcp
1417 worked with IPv6 af_tcp.af_specific.
1418 Sync it now.
1419 */
1420 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1421
1422 return newsk;
1423 }
1424
1425 opt = np->opt;
1426
1427 if (sk_acceptq_is_full(sk))
1428 goto out_overflow;
1429
1430 if (np->rxopt.bits.srcrt == 2 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001431 opt == NULL && treq->pktopts) {
1432 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 if (rxopt->srcrt)
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001434 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 }
1436
1437 if (dst == NULL) {
1438 struct in6_addr *final_p = NULL, final;
1439 struct flowi fl;
1440
1441 memset(&fl, 0, sizeof(fl));
1442 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001443 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 if (opt && opt->srcrt) {
1445 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1446 ipv6_addr_copy(&final, &fl.fl6_dst);
1447 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1448 final_p = &final;
1449 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001450 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 fl.oif = sk->sk_bound_dev_if;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001452 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 fl.fl_ip_sport = inet_sk(sk)->sport;
1454
1455 if (ip6_dst_lookup(sk, &dst, &fl))
1456 goto out;
1457
1458 if (final_p)
1459 ipv6_addr_copy(&fl.fl6_dst, final_p);
1460
1461 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1462 goto out;
1463 }
1464
1465 newsk = tcp_create_openreq_child(sk, req, skb);
1466 if (newsk == NULL)
1467 goto out;
1468
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07001469 /*
1470 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1471 * count here, tcp_create_openreq_child now does this for us, see the
1472 * comment in that function for the gory details. -acme
1473 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474
1475 ip6_dst_store(newsk, dst, NULL);
1476 newsk->sk_route_caps = dst->dev->features &
1477 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1478
1479 newtcp6sk = (struct tcp6_sock *)newsk;
1480 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1481
1482 newtp = tcp_sk(newsk);
1483 newinet = inet_sk(newsk);
1484 newnp = inet6_sk(newsk);
1485
1486 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1487
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001488 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1489 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1490 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1491 newsk->sk_bound_dev_if = treq->iif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492
1493 /* Now IPv6 options...
1494
1495 First: no IPv4 options.
1496 */
1497 newinet->opt = NULL;
1498
1499 /* Clone RX bits */
1500 newnp->rxopt.all = np->rxopt.all;
1501
1502 /* Clone pktoptions received with SYN */
1503 newnp->pktoptions = NULL;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001504 if (treq->pktopts != NULL) {
1505 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1506 kfree_skb(treq->pktopts);
1507 treq->pktopts = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 if (newnp->pktoptions)
1509 skb_set_owner_r(newnp->pktoptions, newsk);
1510 }
1511 newnp->opt = NULL;
1512 newnp->mcast_oif = tcp_v6_iif(skb);
1513 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1514
1515 /* Clone native IPv6 options from listening socket (if any)
1516
1517 Yes, keeping reference count would be much more clever,
1518 but we make one more one thing there: reattach optmem
1519 to newsk.
1520 */
1521 if (opt) {
1522 newnp->opt = ipv6_dup_options(newsk, opt);
1523 if (opt != np->opt)
1524 sock_kfree_s(sk, opt, opt->tot_len);
1525 }
1526
1527 newtp->ext_header_len = 0;
1528 if (newnp->opt)
1529 newtp->ext_header_len = newnp->opt->opt_nflen +
1530 newnp->opt->opt_flen;
1531
1532 tcp_sync_mss(newsk, dst_mtu(dst));
1533 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1534 tcp_initialize_rcv_mss(newsk);
1535
1536 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1537
1538 __tcp_v6_hash(newsk);
1539 tcp_inherit_port(sk, newsk);
1540
1541 return newsk;
1542
1543out_overflow:
1544 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1545out:
1546 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1547 if (opt && opt != np->opt)
1548 sock_kfree_s(sk, opt, opt->tot_len);
1549 dst_release(dst);
1550 return NULL;
1551}
1552
1553static int tcp_v6_checksum_init(struct sk_buff *skb)
1554{
1555 if (skb->ip_summed == CHECKSUM_HW) {
1556 skb->ip_summed = CHECKSUM_UNNECESSARY;
1557 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1558 &skb->nh.ipv6h->daddr,skb->csum))
1559 return 0;
1560 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1561 }
1562 if (skb->len <= 76) {
1563 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1565 return -1;
1566 skb->ip_summed = CHECKSUM_UNNECESSARY;
1567 } else {
1568 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1569 &skb->nh.ipv6h->daddr,0);
1570 }
1571 return 0;
1572}
1573
1574/* The socket must have it's spinlock held when we get
1575 * here.
1576 *
1577 * We have a potential double-lock case here, so even when
1578 * doing backlog processing we use the BH locking scheme.
1579 * This is because we cannot sleep with the original spinlock
1580 * held.
1581 */
1582static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1583{
1584 struct ipv6_pinfo *np = inet6_sk(sk);
1585 struct tcp_sock *tp;
1586 struct sk_buff *opt_skb = NULL;
1587
1588 /* Imagine: socket is IPv6. IPv4 packet arrives,
1589 goes to IPv4 receive handler and backlogged.
1590 From backlog it always goes here. Kerboom...
1591 Fortunately, tcp_rcv_established and rcv_established
1592 handle them correctly, but it is not case with
1593 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1594 */
1595
1596 if (skb->protocol == htons(ETH_P_IP))
1597 return tcp_v4_do_rcv(sk, skb);
1598
1599 if (sk_filter(sk, skb, 0))
1600 goto discard;
1601
1602 /*
1603 * socket locking is here for SMP purposes as backlog rcv
1604 * is currently called with bh processing disabled.
1605 */
1606
1607 /* Do Stevens' IPV6_PKTOPTIONS.
1608
1609 Yes, guys, it is the only place in our code, where we
1610 may make it not affecting IPv4.
1611 The rest of code is protocol independent,
1612 and I do not like idea to uglify IPv4.
1613
1614 Actually, all the idea behind IPV6_PKTOPTIONS
1615 looks not very well thought. For now we latch
1616 options, received in the last packet, enqueued
1617 by tcp. Feel free to propose better solution.
1618 --ANK (980728)
1619 */
1620 if (np->rxopt.all)
1621 opt_skb = skb_clone(skb, GFP_ATOMIC);
1622
1623 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1624 TCP_CHECK_TIMER(sk);
1625 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1626 goto reset;
1627 TCP_CHECK_TIMER(sk);
1628 if (opt_skb)
1629 goto ipv6_pktoptions;
1630 return 0;
1631 }
1632
1633 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1634 goto csum_err;
1635
1636 if (sk->sk_state == TCP_LISTEN) {
1637 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1638 if (!nsk)
1639 goto discard;
1640
1641 /*
1642 * Queue it on the new socket if the new socket is active,
1643 * otherwise we just shortcircuit this and continue with
1644 * the new socket..
1645 */
1646 if(nsk != sk) {
1647 if (tcp_child_process(sk, nsk, skb))
1648 goto reset;
1649 if (opt_skb)
1650 __kfree_skb(opt_skb);
1651 return 0;
1652 }
1653 }
1654
1655 TCP_CHECK_TIMER(sk);
1656 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1657 goto reset;
1658 TCP_CHECK_TIMER(sk);
1659 if (opt_skb)
1660 goto ipv6_pktoptions;
1661 return 0;
1662
1663reset:
1664 tcp_v6_send_reset(skb);
1665discard:
1666 if (opt_skb)
1667 __kfree_skb(opt_skb);
1668 kfree_skb(skb);
1669 return 0;
1670csum_err:
1671 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1672 goto discard;
1673
1674
1675ipv6_pktoptions:
1676 /* Do you ask, what is it?
1677
1678 1. skb was enqueued by tcp.
1679 2. skb is added to tail of read queue, rather than out of order.
1680 3. socket is not in passive state.
1681 4. Finally, it really contains options, which user wants to receive.
1682 */
1683 tp = tcp_sk(sk);
1684 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1685 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1686 if (np->rxopt.bits.rxinfo)
1687 np->mcast_oif = tcp_v6_iif(opt_skb);
1688 if (np->rxopt.bits.rxhlim)
1689 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1690 if (ipv6_opt_accepted(sk, opt_skb)) {
1691 skb_set_owner_r(opt_skb, sk);
1692 opt_skb = xchg(&np->pktoptions, opt_skb);
1693 } else {
1694 __kfree_skb(opt_skb);
1695 opt_skb = xchg(&np->pktoptions, NULL);
1696 }
1697 }
1698
1699 if (opt_skb)
1700 kfree_skb(opt_skb);
1701 return 0;
1702}
1703
1704static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1705{
1706 struct sk_buff *skb = *pskb;
1707 struct tcphdr *th;
1708 struct sock *sk;
1709 int ret;
1710
1711 if (skb->pkt_type != PACKET_HOST)
1712 goto discard_it;
1713
1714 /*
1715 * Count it even if it's bad.
1716 */
1717 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1718
1719 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1720 goto discard_it;
1721
1722 th = skb->h.th;
1723
1724 if (th->doff < sizeof(struct tcphdr)/4)
1725 goto bad_packet;
1726 if (!pskb_may_pull(skb, th->doff*4))
1727 goto discard_it;
1728
1729 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1730 tcp_v6_checksum_init(skb) < 0))
1731 goto bad_packet;
1732
1733 th = skb->h.th;
1734 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1735 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1736 skb->len - th->doff*4);
1737 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1738 TCP_SKB_CB(skb)->when = 0;
1739 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1740 TCP_SKB_CB(skb)->sacked = 0;
1741
1742 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1743 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1744
1745 if (!sk)
1746 goto no_tcp_socket;
1747
1748process:
1749 if (sk->sk_state == TCP_TIME_WAIT)
1750 goto do_time_wait;
1751
1752 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1753 goto discard_and_relse;
1754
1755 if (sk_filter(sk, skb, 0))
1756 goto discard_and_relse;
1757
1758 skb->dev = NULL;
1759
1760 bh_lock_sock(sk);
1761 ret = 0;
1762 if (!sock_owned_by_user(sk)) {
1763 if (!tcp_prequeue(sk, skb))
1764 ret = tcp_v6_do_rcv(sk, skb);
1765 } else
1766 sk_add_backlog(sk, skb);
1767 bh_unlock_sock(sk);
1768
1769 sock_put(sk);
1770 return ret ? -1 : 0;
1771
1772no_tcp_socket:
1773 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1774 goto discard_it;
1775
1776 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1777bad_packet:
1778 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1779 } else {
1780 tcp_v6_send_reset(skb);
1781 }
1782
1783discard_it:
1784
1785 /*
1786 * Discard frame
1787 */
1788
1789 kfree_skb(skb);
1790 return 0;
1791
1792discard_and_relse:
1793 sock_put(sk);
1794 goto discard_it;
1795
1796do_time_wait:
1797 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1798 tcp_tw_put((struct tcp_tw_bucket *) sk);
1799 goto discard_it;
1800 }
1801
1802 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1803 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1804 tcp_tw_put((struct tcp_tw_bucket *) sk);
1805 goto discard_it;
1806 }
1807
1808 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1809 skb, th, skb->len)) {
1810 case TCP_TW_SYN:
1811 {
1812 struct sock *sk2;
1813
1814 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1815 if (sk2 != NULL) {
1816 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1817 tcp_tw_put((struct tcp_tw_bucket *)sk);
1818 sk = sk2;
1819 goto process;
1820 }
1821 /* Fall through to ACK */
1822 }
1823 case TCP_TW_ACK:
1824 tcp_v6_timewait_ack(sk, skb);
1825 break;
1826 case TCP_TW_RST:
1827 goto no_tcp_socket;
1828 case TCP_TW_SUCCESS:;
1829 }
1830 goto discard_it;
1831}
1832
1833static int tcp_v6_rebuild_header(struct sock *sk)
1834{
1835 int err;
1836 struct dst_entry *dst;
1837 struct ipv6_pinfo *np = inet6_sk(sk);
1838
1839 dst = __sk_dst_check(sk, np->dst_cookie);
1840
1841 if (dst == NULL) {
1842 struct inet_sock *inet = inet_sk(sk);
1843 struct in6_addr *final_p = NULL, final;
1844 struct flowi fl;
1845
1846 memset(&fl, 0, sizeof(fl));
1847 fl.proto = IPPROTO_TCP;
1848 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1849 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1850 fl.fl6_flowlabel = np->flow_label;
1851 fl.oif = sk->sk_bound_dev_if;
1852 fl.fl_ip_dport = inet->dport;
1853 fl.fl_ip_sport = inet->sport;
1854
1855 if (np->opt && np->opt->srcrt) {
1856 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1857 ipv6_addr_copy(&final, &fl.fl6_dst);
1858 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1859 final_p = &final;
1860 }
1861
1862 err = ip6_dst_lookup(sk, &dst, &fl);
1863 if (err) {
1864 sk->sk_route_caps = 0;
1865 return err;
1866 }
1867 if (final_p)
1868 ipv6_addr_copy(&fl.fl6_dst, final_p);
1869
1870 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1871 sk->sk_err_soft = -err;
1872 dst_release(dst);
1873 return err;
1874 }
1875
1876 ip6_dst_store(sk, dst, NULL);
1877 sk->sk_route_caps = dst->dev->features &
1878 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1879 }
1880
1881 return 0;
1882}
1883
1884static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1885{
1886 struct sock *sk = skb->sk;
1887 struct inet_sock *inet = inet_sk(sk);
1888 struct ipv6_pinfo *np = inet6_sk(sk);
1889 struct flowi fl;
1890 struct dst_entry *dst;
1891 struct in6_addr *final_p = NULL, final;
1892
1893 memset(&fl, 0, sizeof(fl));
1894 fl.proto = IPPROTO_TCP;
1895 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1896 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1897 fl.fl6_flowlabel = np->flow_label;
1898 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1899 fl.oif = sk->sk_bound_dev_if;
1900 fl.fl_ip_sport = inet->sport;
1901 fl.fl_ip_dport = inet->dport;
1902
1903 if (np->opt && np->opt->srcrt) {
1904 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1905 ipv6_addr_copy(&final, &fl.fl6_dst);
1906 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1907 final_p = &final;
1908 }
1909
1910 dst = __sk_dst_check(sk, np->dst_cookie);
1911
1912 if (dst == NULL) {
1913 int err = ip6_dst_lookup(sk, &dst, &fl);
1914
1915 if (err) {
1916 sk->sk_err_soft = -err;
1917 return err;
1918 }
1919
1920 if (final_p)
1921 ipv6_addr_copy(&fl.fl6_dst, final_p);
1922
1923 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1924 sk->sk_route_caps = 0;
1925 dst_release(dst);
1926 return err;
1927 }
1928
1929 ip6_dst_store(sk, dst, NULL);
1930 sk->sk_route_caps = dst->dev->features &
1931 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1932 }
1933
1934 skb->dst = dst_clone(dst);
1935
1936 /* Restore final destination back after routing done */
1937 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1938
1939 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1940}
1941
1942static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1943{
1944 struct ipv6_pinfo *np = inet6_sk(sk);
1945 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1946
1947 sin6->sin6_family = AF_INET6;
1948 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1949 sin6->sin6_port = inet_sk(sk)->dport;
1950 /* We do not store received flowlabel for TCP */
1951 sin6->sin6_flowinfo = 0;
1952 sin6->sin6_scope_id = 0;
1953 if (sk->sk_bound_dev_if &&
1954 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1955 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1956}
1957
1958static int tcp_v6_remember_stamp(struct sock *sk)
1959{
1960 /* Alas, not yet... */
1961 return 0;
1962}
1963
1964static struct tcp_func ipv6_specific = {
1965 .queue_xmit = tcp_v6_xmit,
1966 .send_check = tcp_v6_send_check,
1967 .rebuild_header = tcp_v6_rebuild_header,
1968 .conn_request = tcp_v6_conn_request,
1969 .syn_recv_sock = tcp_v6_syn_recv_sock,
1970 .remember_stamp = tcp_v6_remember_stamp,
1971 .net_header_len = sizeof(struct ipv6hdr),
1972
1973 .setsockopt = ipv6_setsockopt,
1974 .getsockopt = ipv6_getsockopt,
1975 .addr2sockaddr = v6_addr2sockaddr,
1976 .sockaddr_len = sizeof(struct sockaddr_in6)
1977};
1978
1979/*
1980 * TCP over IPv4 via INET6 API
1981 */
1982
1983static struct tcp_func ipv6_mapped = {
1984 .queue_xmit = ip_queue_xmit,
1985 .send_check = tcp_v4_send_check,
1986 .rebuild_header = tcp_v4_rebuild_header,
1987 .conn_request = tcp_v6_conn_request,
1988 .syn_recv_sock = tcp_v6_syn_recv_sock,
1989 .remember_stamp = tcp_v4_remember_stamp,
1990 .net_header_len = sizeof(struct iphdr),
1991
1992 .setsockopt = ipv6_setsockopt,
1993 .getsockopt = ipv6_getsockopt,
1994 .addr2sockaddr = v6_addr2sockaddr,
1995 .sockaddr_len = sizeof(struct sockaddr_in6)
1996};
1997
1998
1999
2000/* NOTE: A lot of things set to zero explicitly by call to
2001 * sk_alloc() so need not be done here.
2002 */
2003static int tcp_v6_init_sock(struct sock *sk)
2004{
2005 struct tcp_sock *tp = tcp_sk(sk);
2006
2007 skb_queue_head_init(&tp->out_of_order_queue);
2008 tcp_init_xmit_timers(sk);
2009 tcp_prequeue_init(tp);
2010
2011 tp->rto = TCP_TIMEOUT_INIT;
2012 tp->mdev = TCP_TIMEOUT_INIT;
2013
2014 /* So many TCP implementations out there (incorrectly) count the
2015 * initial SYN frame in their delayed-ACK and congestion control
2016 * algorithms that we must have the following bandaid to talk
2017 * efficiently to them. -DaveM
2018 */
2019 tp->snd_cwnd = 2;
2020
2021 /* See draft-stevens-tcpca-spec-01 for discussion of the
2022 * initialization of these values.
2023 */
2024 tp->snd_ssthresh = 0x7fffffff;
2025 tp->snd_cwnd_clamp = ~0;
David S. Millerc1b4a7e2005-07-05 15:24:38 -07002026 tp->mss_cache = 536;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027
2028 tp->reordering = sysctl_tcp_reordering;
2029
2030 sk->sk_state = TCP_CLOSE;
2031
2032 tp->af_specific = &ipv6_specific;
Stephen Hemminger5f8ef482005-06-23 20:37:36 -07002033 tp->ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 sk->sk_write_space = sk_stream_write_space;
2035 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2036
2037 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2038 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2039
2040 atomic_inc(&tcp_sockets_allocated);
2041
2042 return 0;
2043}
2044
2045static int tcp_v6_destroy_sock(struct sock *sk)
2046{
2047 extern int tcp_v4_destroy_sock(struct sock *sk);
2048
2049 tcp_v4_destroy_sock(sk);
2050 return inet6_destroy_sock(sk);
2051}
2052
2053/* Proc filesystem TCPv6 sock list dumping. */
2054static void get_openreq6(struct seq_file *seq,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002055 struct sock *sk, struct request_sock *req, int i, int uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056{
2057 struct in6_addr *dest, *src;
2058 int ttd = req->expires - jiffies;
2059
2060 if (ttd < 0)
2061 ttd = 0;
2062
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002063 src = &tcp6_rsk(req)->loc_addr;
2064 dest = &tcp6_rsk(req)->rmt_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065 seq_printf(seq,
2066 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2067 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2068 i,
2069 src->s6_addr32[0], src->s6_addr32[1],
2070 src->s6_addr32[2], src->s6_addr32[3],
2071 ntohs(inet_sk(sk)->sport),
2072 dest->s6_addr32[0], dest->s6_addr32[1],
2073 dest->s6_addr32[2], dest->s6_addr32[3],
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002074 ntohs(inet_rsk(req)->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075 TCP_SYN_RECV,
2076 0,0, /* could print option size, but that is af dependent. */
2077 1, /* timers active (only the expire timer) */
2078 jiffies_to_clock_t(ttd),
2079 req->retrans,
2080 uid,
2081 0, /* non standard timer */
2082 0, /* open_requests have no inode */
2083 0, req);
2084}
2085
2086static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2087{
2088 struct in6_addr *dest, *src;
2089 __u16 destp, srcp;
2090 int timer_active;
2091 unsigned long timer_expires;
2092 struct inet_sock *inet = inet_sk(sp);
2093 struct tcp_sock *tp = tcp_sk(sp);
2094 struct ipv6_pinfo *np = inet6_sk(sp);
2095
2096 dest = &np->daddr;
2097 src = &np->rcv_saddr;
2098 destp = ntohs(inet->dport);
2099 srcp = ntohs(inet->sport);
2100 if (tp->pending == TCP_TIME_RETRANS) {
2101 timer_active = 1;
2102 timer_expires = tp->timeout;
2103 } else if (tp->pending == TCP_TIME_PROBE0) {
2104 timer_active = 4;
2105 timer_expires = tp->timeout;
2106 } else if (timer_pending(&sp->sk_timer)) {
2107 timer_active = 2;
2108 timer_expires = sp->sk_timer.expires;
2109 } else {
2110 timer_active = 0;
2111 timer_expires = jiffies;
2112 }
2113
2114 seq_printf(seq,
2115 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2116 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2117 i,
2118 src->s6_addr32[0], src->s6_addr32[1],
2119 src->s6_addr32[2], src->s6_addr32[3], srcp,
2120 dest->s6_addr32[0], dest->s6_addr32[1],
2121 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2122 sp->sk_state,
2123 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2124 timer_active,
2125 jiffies_to_clock_t(timer_expires - jiffies),
2126 tp->retransmits,
2127 sock_i_uid(sp),
2128 tp->probes_out,
2129 sock_i_ino(sp),
2130 atomic_read(&sp->sk_refcnt), sp,
2131 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2132 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2133 );
2134}
2135
2136static void get_timewait6_sock(struct seq_file *seq,
2137 struct tcp_tw_bucket *tw, int i)
2138{
2139 struct in6_addr *dest, *src;
2140 __u16 destp, srcp;
2141 int ttd = tw->tw_ttd - jiffies;
2142
2143 if (ttd < 0)
2144 ttd = 0;
2145
2146 dest = &tw->tw_v6_daddr;
2147 src = &tw->tw_v6_rcv_saddr;
2148 destp = ntohs(tw->tw_dport);
2149 srcp = ntohs(tw->tw_sport);
2150
2151 seq_printf(seq,
2152 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2153 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2154 i,
2155 src->s6_addr32[0], src->s6_addr32[1],
2156 src->s6_addr32[2], src->s6_addr32[3], srcp,
2157 dest->s6_addr32[0], dest->s6_addr32[1],
2158 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2159 tw->tw_substate, 0, 0,
2160 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2161 atomic_read(&tw->tw_refcnt), tw);
2162}
2163
2164#ifdef CONFIG_PROC_FS
2165static int tcp6_seq_show(struct seq_file *seq, void *v)
2166{
2167 struct tcp_iter_state *st;
2168
2169 if (v == SEQ_START_TOKEN) {
2170 seq_puts(seq,
2171 " sl "
2172 "local_address "
2173 "remote_address "
2174 "st tx_queue rx_queue tr tm->when retrnsmt"
2175 " uid timeout inode\n");
2176 goto out;
2177 }
2178 st = seq->private;
2179
2180 switch (st->state) {
2181 case TCP_SEQ_STATE_LISTENING:
2182 case TCP_SEQ_STATE_ESTABLISHED:
2183 get_tcp6_sock(seq, v, st->num);
2184 break;
2185 case TCP_SEQ_STATE_OPENREQ:
2186 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2187 break;
2188 case TCP_SEQ_STATE_TIME_WAIT:
2189 get_timewait6_sock(seq, v, st->num);
2190 break;
2191 }
2192out:
2193 return 0;
2194}
2195
2196static struct file_operations tcp6_seq_fops;
2197static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2198 .owner = THIS_MODULE,
2199 .name = "tcp6",
2200 .family = AF_INET6,
2201 .seq_show = tcp6_seq_show,
2202 .seq_fops = &tcp6_seq_fops,
2203};
2204
2205int __init tcp6_proc_init(void)
2206{
2207 return tcp_proc_register(&tcp6_seq_afinfo);
2208}
2209
2210void tcp6_proc_exit(void)
2211{
2212 tcp_proc_unregister(&tcp6_seq_afinfo);
2213}
2214#endif
2215
2216struct proto tcpv6_prot = {
2217 .name = "TCPv6",
2218 .owner = THIS_MODULE,
2219 .close = tcp_close,
2220 .connect = tcp_v6_connect,
2221 .disconnect = tcp_disconnect,
2222 .accept = tcp_accept,
2223 .ioctl = tcp_ioctl,
2224 .init = tcp_v6_init_sock,
2225 .destroy = tcp_v6_destroy_sock,
2226 .shutdown = tcp_shutdown,
2227 .setsockopt = tcp_setsockopt,
2228 .getsockopt = tcp_getsockopt,
2229 .sendmsg = tcp_sendmsg,
2230 .recvmsg = tcp_recvmsg,
2231 .backlog_rcv = tcp_v6_do_rcv,
2232 .hash = tcp_v6_hash,
2233 .unhash = tcp_unhash,
2234 .get_port = tcp_v6_get_port,
2235 .enter_memory_pressure = tcp_enter_memory_pressure,
2236 .sockets_allocated = &tcp_sockets_allocated,
2237 .memory_allocated = &tcp_memory_allocated,
2238 .memory_pressure = &tcp_memory_pressure,
2239 .sysctl_mem = sysctl_tcp_mem,
2240 .sysctl_wmem = sysctl_tcp_wmem,
2241 .sysctl_rmem = sysctl_tcp_rmem,
2242 .max_header = MAX_TCP_HEADER,
2243 .obj_size = sizeof(struct tcp6_sock),
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002244 .rsk_prot = &tcp6_request_sock_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245};
2246
2247static struct inet6_protocol tcpv6_protocol = {
2248 .handler = tcp_v6_rcv,
2249 .err_handler = tcp_v6_err,
2250 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2251};
2252
2253extern struct proto_ops inet6_stream_ops;
2254
2255static struct inet_protosw tcpv6_protosw = {
2256 .type = SOCK_STREAM,
2257 .protocol = IPPROTO_TCP,
2258 .prot = &tcpv6_prot,
2259 .ops = &inet6_stream_ops,
2260 .capability = -1,
2261 .no_check = 0,
2262 .flags = INET_PROTOSW_PERMANENT,
2263};
2264
2265void __init tcpv6_init(void)
2266{
2267 /* register inet6 protocol */
2268 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2269 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2270 inet6_register_protosw(&tcpv6_protosw);
2271}