blob: 9199ad2fde0d6d2af9c4d634aa790f83f2499e64 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
50#include <net/ipv6.h>
51#include <net/transp_v6.h>
52#include <net/addrconf.h>
53#include <net/ip6_route.h>
54#include <net/ip6_checksum.h>
55#include <net/inet_ecn.h>
56#include <net/protocol.h>
57#include <net/xfrm.h>
58#include <net/addrconf.h>
59#include <net/snmp.h>
60#include <net/dsfield.h>
61
62#include <asm/uaccess.h>
63
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
66
67static void tcp_v6_send_reset(struct sk_buff *skb);
68static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb);
71
72static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75static struct tcp_func ipv6_mapped;
76static struct tcp_func ipv6_specific;
77
78/* I have no idea if this is a good hash for v6 or not. -DaveM */
79static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
81{
82 int hashent = (lport ^ fport);
83
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
88}
89
90static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91{
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
99}
100
101static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
103{
104 struct sock *sk2;
105 struct hlist_node *node;
106
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
109 if (sk != sk2 &&
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
116 break;
117 }
118
119 return node != NULL;
120}
121
122/* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
126 */
127static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128{
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
132 int ret;
133
134 local_bh_disable();
135 if (snum == 0) {
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
139 int rover;
140
141 spin_lock(&tcp_portalloc_lock);
Folkert van Heusdenc3924c702005-05-03 14:36:45 -0700142 if (tcp_port_rover < low)
143 rover = low;
144 else
145 rover = tcp_port_rover;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 do { rover++;
Folkert van Heusdenc3924c702005-05-03 14:36:45 -0700147 if (rover > high)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 rover = low;
149 head = &tcp_bhash[tcp_bhashfn(rover)];
150 spin_lock(&head->lock);
151 tb_for_each(tb, node, &head->chain)
152 if (tb->port == rover)
153 goto next;
154 break;
155 next:
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
158 tcp_port_rover = rover;
159 spin_unlock(&tcp_portalloc_lock);
160
161 /* Exhausted local port range during search? */
162 ret = 1;
163 if (remaining <= 0)
164 goto fail;
165
166 /* OK, here is the one we will use. */
167 snum = rover;
168 } else {
169 head = &tcp_bhash[tcp_bhashfn(snum)];
170 spin_lock(&head->lock);
171 tb_for_each(tb, node, &head->chain)
172 if (tb->port == snum)
173 goto tb_found;
174 }
175 tb = NULL;
176 goto tb_not_found;
177tb_found:
178 if (tb && !hlist_empty(&tb->owners)) {
179 if (tb->fastreuse > 0 && sk->sk_reuse &&
180 sk->sk_state != TCP_LISTEN) {
181 goto success;
182 } else {
183 ret = 1;
184 if (tcp_v6_bind_conflict(sk, tb))
185 goto fail_unlock;
186 }
187 }
188tb_not_found:
189 ret = 1;
190 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
191 goto fail_unlock;
192 if (hlist_empty(&tb->owners)) {
193 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
194 tb->fastreuse = 1;
195 else
196 tb->fastreuse = 0;
197 } else if (tb->fastreuse &&
198 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
199 tb->fastreuse = 0;
200
201success:
202 if (!tcp_sk(sk)->bind_hash)
203 tcp_bind_hash(sk, tb, snum);
204 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
205 ret = 0;
206
207fail_unlock:
208 spin_unlock(&head->lock);
209fail:
210 local_bh_enable();
211 return ret;
212}
213
214static __inline__ void __tcp_v6_hash(struct sock *sk)
215{
216 struct hlist_head *list;
217 rwlock_t *lock;
218
219 BUG_TRAP(sk_unhashed(sk));
220
221 if (sk->sk_state == TCP_LISTEN) {
222 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
223 lock = &tcp_lhash_lock;
224 tcp_listen_wlock();
225 } else {
226 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
227 list = &tcp_ehash[sk->sk_hashent].chain;
228 lock = &tcp_ehash[sk->sk_hashent].lock;
229 write_lock(lock);
230 }
231
232 __sk_add_node(sk, list);
233 sock_prot_inc_use(sk->sk_prot);
234 write_unlock(lock);
235}
236
237
238static void tcp_v6_hash(struct sock *sk)
239{
240 if (sk->sk_state != TCP_CLOSE) {
241 struct tcp_sock *tp = tcp_sk(sk);
242
243 if (tp->af_specific == &ipv6_mapped) {
244 tcp_prot.hash(sk);
245 return;
246 }
247 local_bh_disable();
248 __tcp_v6_hash(sk);
249 local_bh_enable();
250 }
251}
252
253static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
254{
255 struct sock *sk;
256 struct hlist_node *node;
257 struct sock *result = NULL;
258 int score, hiscore;
259
260 hiscore=0;
261 read_lock(&tcp_lhash_lock);
262 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
263 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
264 struct ipv6_pinfo *np = inet6_sk(sk);
265
266 score = 1;
267 if (!ipv6_addr_any(&np->rcv_saddr)) {
268 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
269 continue;
270 score++;
271 }
272 if (sk->sk_bound_dev_if) {
273 if (sk->sk_bound_dev_if != dif)
274 continue;
275 score++;
276 }
277 if (score == 3) {
278 result = sk;
279 break;
280 }
281 if (score > hiscore) {
282 hiscore = score;
283 result = sk;
284 }
285 }
286 }
287 if (result)
288 sock_hold(result);
289 read_unlock(&tcp_lhash_lock);
290 return result;
291}
292
293/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
294 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
295 *
296 * The sockhash lock must be held as a reader here.
297 */
298
299static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
300 struct in6_addr *daddr, u16 hnum,
301 int dif)
302{
303 struct tcp_ehash_bucket *head;
304 struct sock *sk;
305 struct hlist_node *node;
306 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
307 int hash;
308
309 /* Optimize here for direct hit, only listening connections can
310 * have wildcards anyways.
311 */
312 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
313 head = &tcp_ehash[hash];
314 read_lock(&head->lock);
315 sk_for_each(sk, node, &head->chain) {
316 /* For IPV6 do the cheaper port and family tests first. */
317 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
318 goto hit; /* You sunk my battleship! */
319 }
320 /* Must check for a TIME_WAIT'er before going to listener hash. */
321 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
322 /* FIXME: acme: check this... */
323 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
324
325 if(*((__u32 *)&(tw->tw_dport)) == ports &&
326 sk->sk_family == PF_INET6) {
327 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
328 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
329 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
330 goto hit;
331 }
332 }
333 read_unlock(&head->lock);
334 return NULL;
335
336hit:
337 sock_hold(sk);
338 read_unlock(&head->lock);
339 return sk;
340}
341
342
343static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
344 struct in6_addr *daddr, u16 hnum,
345 int dif)
346{
347 struct sock *sk;
348
349 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
350
351 if (sk)
352 return sk;
353
354 return tcp_v6_lookup_listener(daddr, hnum, dif);
355}
356
357inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
358 struct in6_addr *daddr, u16 dport,
359 int dif)
360{
361 struct sock *sk;
362
363 local_bh_disable();
364 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
365 local_bh_enable();
366
367 return sk;
368}
369
370EXPORT_SYMBOL_GPL(tcp_v6_lookup);
371
372
373/*
374 * Open request hash tables.
375 */
376
377static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
378{
379 u32 a, b, c;
380
381 a = raddr->s6_addr32[0];
382 b = raddr->s6_addr32[1];
383 c = raddr->s6_addr32[2];
384
385 a += JHASH_GOLDEN_RATIO;
386 b += JHASH_GOLDEN_RATIO;
387 c += rnd;
388 __jhash_mix(a, b, c);
389
390 a += raddr->s6_addr32[3];
391 b += (u32) rport;
392 __jhash_mix(a, b, c);
393
394 return c & (TCP_SYNQ_HSIZE - 1);
395}
396
397static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
398 struct open_request ***prevp,
399 __u16 rport,
400 struct in6_addr *raddr,
401 struct in6_addr *laddr,
402 int iif)
403{
404 struct tcp_listen_opt *lopt = tp->listen_opt;
405 struct open_request *req, **prev;
406
407 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
408 (req = *prev) != NULL;
409 prev = &req->dl_next) {
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700410 const struct tcp6_request_sock *treq = tcp6_rsk(req);
411
412 if (inet_rsk(req)->rmt_port == rport &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 req->class->family == AF_INET6 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700414 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
415 ipv6_addr_equal(&treq->loc_addr, laddr) &&
416 (!treq->iif || treq->iif == iif)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 BUG_TRAP(req->sk == NULL);
418 *prevp = prev;
419 return req;
420 }
421 }
422
423 return NULL;
424}
425
426static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
427 struct in6_addr *saddr,
428 struct in6_addr *daddr,
429 unsigned long base)
430{
431 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
432}
433
434static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
435{
436 if (skb->protocol == htons(ETH_P_IPV6)) {
437 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
438 skb->nh.ipv6h->saddr.s6_addr32,
439 skb->h.th->dest,
440 skb->h.th->source);
441 } else {
442 return secure_tcp_sequence_number(skb->nh.iph->daddr,
443 skb->nh.iph->saddr,
444 skb->h.th->dest,
445 skb->h.th->source);
446 }
447}
448
449static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
450 struct tcp_tw_bucket **twp)
451{
452 struct inet_sock *inet = inet_sk(sk);
453 struct ipv6_pinfo *np = inet6_sk(sk);
454 struct in6_addr *daddr = &np->rcv_saddr;
455 struct in6_addr *saddr = &np->daddr;
456 int dif = sk->sk_bound_dev_if;
457 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
458 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
459 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
460 struct sock *sk2;
461 struct hlist_node *node;
462 struct tcp_tw_bucket *tw;
463
464 write_lock(&head->lock);
465
466 /* Check TIME-WAIT sockets first. */
467 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
468 tw = (struct tcp_tw_bucket*)sk2;
469
470 if(*((__u32 *)&(tw->tw_dport)) == ports &&
471 sk2->sk_family == PF_INET6 &&
472 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
473 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
474 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
475 struct tcp_sock *tp = tcp_sk(sk);
476
477 if (tw->tw_ts_recent_stamp &&
478 (!twp || (sysctl_tcp_tw_reuse &&
479 xtime.tv_sec -
480 tw->tw_ts_recent_stamp > 1))) {
481 /* See comment in tcp_ipv4.c */
482 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
483 if (!tp->write_seq)
484 tp->write_seq = 1;
485 tp->rx_opt.ts_recent = tw->tw_ts_recent;
486 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
487 sock_hold(sk2);
488 goto unique;
489 } else
490 goto not_unique;
491 }
492 }
493 tw = NULL;
494
495 /* And established part... */
496 sk_for_each(sk2, node, &head->chain) {
497 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
498 goto not_unique;
499 }
500
501unique:
502 BUG_TRAP(sk_unhashed(sk));
503 __sk_add_node(sk, &head->chain);
504 sk->sk_hashent = hash;
505 sock_prot_inc_use(sk->sk_prot);
506 write_unlock(&head->lock);
507
508 if (twp) {
509 *twp = tw;
510 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
511 } else if (tw) {
512 /* Silly. Should hash-dance instead... */
513 tcp_tw_deschedule(tw);
514 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
515
516 tcp_tw_put(tw);
517 }
518 return 0;
519
520not_unique:
521 write_unlock(&head->lock);
522 return -EADDRNOTAVAIL;
523}
524
525static inline u32 tcpv6_port_offset(const struct sock *sk)
526{
527 const struct inet_sock *inet = inet_sk(sk);
528 const struct ipv6_pinfo *np = inet6_sk(sk);
529
530 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
531 np->daddr.s6_addr32,
532 inet->dport);
533}
534
535static int tcp_v6_hash_connect(struct sock *sk)
536{
537 unsigned short snum = inet_sk(sk)->num;
538 struct tcp_bind_hashbucket *head;
539 struct tcp_bind_bucket *tb;
540 int ret;
541
542 if (!snum) {
543 int low = sysctl_local_port_range[0];
544 int high = sysctl_local_port_range[1];
545 int range = high - low;
546 int i;
547 int port;
548 static u32 hint;
549 u32 offset = hint + tcpv6_port_offset(sk);
550 struct hlist_node *node;
551 struct tcp_tw_bucket *tw = NULL;
552
553 local_bh_disable();
554 for (i = 1; i <= range; i++) {
555 port = low + (i + offset) % range;
556 head = &tcp_bhash[tcp_bhashfn(port)];
557 spin_lock(&head->lock);
558
559 /* Does not bother with rcv_saddr checks,
560 * because the established check is already
561 * unique enough.
562 */
563 tb_for_each(tb, node, &head->chain) {
564 if (tb->port == port) {
565 BUG_TRAP(!hlist_empty(&tb->owners));
566 if (tb->fastreuse >= 0)
567 goto next_port;
568 if (!__tcp_v6_check_established(sk,
569 port,
570 &tw))
571 goto ok;
572 goto next_port;
573 }
574 }
575
576 tb = tcp_bucket_create(head, port);
577 if (!tb) {
578 spin_unlock(&head->lock);
579 break;
580 }
581 tb->fastreuse = -1;
582 goto ok;
583
584 next_port:
585 spin_unlock(&head->lock);
586 }
587 local_bh_enable();
588
589 return -EADDRNOTAVAIL;
590
591ok:
592 hint += i;
593
594 /* Head lock still held and bh's disabled */
595 tcp_bind_hash(sk, tb, port);
596 if (sk_unhashed(sk)) {
597 inet_sk(sk)->sport = htons(port);
598 __tcp_v6_hash(sk);
599 }
600 spin_unlock(&head->lock);
601
602 if (tw) {
603 tcp_tw_deschedule(tw);
604 tcp_tw_put(tw);
605 }
606
607 ret = 0;
608 goto out;
609 }
610
611 head = &tcp_bhash[tcp_bhashfn(snum)];
612 tb = tcp_sk(sk)->bind_hash;
613 spin_lock_bh(&head->lock);
614
615 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
616 __tcp_v6_hash(sk);
617 spin_unlock_bh(&head->lock);
618 return 0;
619 } else {
620 spin_unlock(&head->lock);
621 /* No definite answer... Walk to established hash table */
622 ret = __tcp_v6_check_established(sk, snum, NULL);
623out:
624 local_bh_enable();
625 return ret;
626 }
627}
628
629static __inline__ int tcp_v6_iif(struct sk_buff *skb)
630{
631 return IP6CB(skb)->iif;
632}
633
634static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
635 int addr_len)
636{
637 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
638 struct inet_sock *inet = inet_sk(sk);
639 struct ipv6_pinfo *np = inet6_sk(sk);
640 struct tcp_sock *tp = tcp_sk(sk);
641 struct in6_addr *saddr = NULL, *final_p = NULL, final;
642 struct flowi fl;
643 struct dst_entry *dst;
644 int addr_type;
645 int err;
646
647 if (addr_len < SIN6_LEN_RFC2133)
648 return -EINVAL;
649
650 if (usin->sin6_family != AF_INET6)
651 return(-EAFNOSUPPORT);
652
653 memset(&fl, 0, sizeof(fl));
654
655 if (np->sndflow) {
656 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
657 IP6_ECN_flow_init(fl.fl6_flowlabel);
658 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
659 struct ip6_flowlabel *flowlabel;
660 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
661 if (flowlabel == NULL)
662 return -EINVAL;
663 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
664 fl6_sock_release(flowlabel);
665 }
666 }
667
668 /*
669 * connect() to INADDR_ANY means loopback (BSD'ism).
670 */
671
672 if(ipv6_addr_any(&usin->sin6_addr))
673 usin->sin6_addr.s6_addr[15] = 0x1;
674
675 addr_type = ipv6_addr_type(&usin->sin6_addr);
676
677 if(addr_type & IPV6_ADDR_MULTICAST)
678 return -ENETUNREACH;
679
680 if (addr_type&IPV6_ADDR_LINKLOCAL) {
681 if (addr_len >= sizeof(struct sockaddr_in6) &&
682 usin->sin6_scope_id) {
683 /* If interface is set while binding, indices
684 * must coincide.
685 */
686 if (sk->sk_bound_dev_if &&
687 sk->sk_bound_dev_if != usin->sin6_scope_id)
688 return -EINVAL;
689
690 sk->sk_bound_dev_if = usin->sin6_scope_id;
691 }
692
693 /* Connect to link-local address requires an interface */
694 if (!sk->sk_bound_dev_if)
695 return -EINVAL;
696 }
697
698 if (tp->rx_opt.ts_recent_stamp &&
699 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
700 tp->rx_opt.ts_recent = 0;
701 tp->rx_opt.ts_recent_stamp = 0;
702 tp->write_seq = 0;
703 }
704
705 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
706 np->flow_label = fl.fl6_flowlabel;
707
708 /*
709 * TCP over IPv4
710 */
711
712 if (addr_type == IPV6_ADDR_MAPPED) {
713 u32 exthdrlen = tp->ext_header_len;
714 struct sockaddr_in sin;
715
716 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
717
718 if (__ipv6_only_sock(sk))
719 return -ENETUNREACH;
720
721 sin.sin_family = AF_INET;
722 sin.sin_port = usin->sin6_port;
723 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
724
725 tp->af_specific = &ipv6_mapped;
726 sk->sk_backlog_rcv = tcp_v4_do_rcv;
727
728 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
729
730 if (err) {
731 tp->ext_header_len = exthdrlen;
732 tp->af_specific = &ipv6_specific;
733 sk->sk_backlog_rcv = tcp_v6_do_rcv;
734 goto failure;
735 } else {
736 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
737 inet->saddr);
738 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
739 inet->rcv_saddr);
740 }
741
742 return err;
743 }
744
745 if (!ipv6_addr_any(&np->rcv_saddr))
746 saddr = &np->rcv_saddr;
747
748 fl.proto = IPPROTO_TCP;
749 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
750 ipv6_addr_copy(&fl.fl6_src,
751 (saddr ? saddr : &np->saddr));
752 fl.oif = sk->sk_bound_dev_if;
753 fl.fl_ip_dport = usin->sin6_port;
754 fl.fl_ip_sport = inet->sport;
755
756 if (np->opt && np->opt->srcrt) {
757 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
758 ipv6_addr_copy(&final, &fl.fl6_dst);
759 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
760 final_p = &final;
761 }
762
763 err = ip6_dst_lookup(sk, &dst, &fl);
764 if (err)
765 goto failure;
766 if (final_p)
767 ipv6_addr_copy(&fl.fl6_dst, final_p);
768
769 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
770 dst_release(dst);
771 goto failure;
772 }
773
774 if (saddr == NULL) {
775 saddr = &fl.fl6_src;
776 ipv6_addr_copy(&np->rcv_saddr, saddr);
777 }
778
779 /* set the source address */
780 ipv6_addr_copy(&np->saddr, saddr);
781 inet->rcv_saddr = LOOPBACK4_IPV6;
782
783 ip6_dst_store(sk, dst, NULL);
784 sk->sk_route_caps = dst->dev->features &
785 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
786
787 tp->ext_header_len = 0;
788 if (np->opt)
789 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
790
791 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
792
793 inet->dport = usin->sin6_port;
794
795 tcp_set_state(sk, TCP_SYN_SENT);
796 err = tcp_v6_hash_connect(sk);
797 if (err)
798 goto late_failure;
799
800 if (!tp->write_seq)
801 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
802 np->daddr.s6_addr32,
803 inet->sport,
804 inet->dport);
805
806 err = tcp_connect(sk);
807 if (err)
808 goto late_failure;
809
810 return 0;
811
812late_failure:
813 tcp_set_state(sk, TCP_CLOSE);
814 __sk_dst_reset(sk);
815failure:
816 inet->dport = 0;
817 sk->sk_route_caps = 0;
818 return err;
819}
820
821static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
822 int type, int code, int offset, __u32 info)
823{
824 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
825 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
826 struct ipv6_pinfo *np;
827 struct sock *sk;
828 int err;
829 struct tcp_sock *tp;
830 __u32 seq;
831
832 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
833
834 if (sk == NULL) {
835 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
836 return;
837 }
838
839 if (sk->sk_state == TCP_TIME_WAIT) {
840 tcp_tw_put((struct tcp_tw_bucket*)sk);
841 return;
842 }
843
844 bh_lock_sock(sk);
845 if (sock_owned_by_user(sk))
846 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
847
848 if (sk->sk_state == TCP_CLOSE)
849 goto out;
850
851 tp = tcp_sk(sk);
852 seq = ntohl(th->seq);
853 if (sk->sk_state != TCP_LISTEN &&
854 !between(seq, tp->snd_una, tp->snd_nxt)) {
855 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
856 goto out;
857 }
858
859 np = inet6_sk(sk);
860
861 if (type == ICMPV6_PKT_TOOBIG) {
862 struct dst_entry *dst = NULL;
863
864 if (sock_owned_by_user(sk))
865 goto out;
866 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
867 goto out;
868
869 /* icmp should have updated the destination cache entry */
870 dst = __sk_dst_check(sk, np->dst_cookie);
871
872 if (dst == NULL) {
873 struct inet_sock *inet = inet_sk(sk);
874 struct flowi fl;
875
876 /* BUGGG_FUTURE: Again, it is not clear how
877 to handle rthdr case. Ignore this complexity
878 for now.
879 */
880 memset(&fl, 0, sizeof(fl));
881 fl.proto = IPPROTO_TCP;
882 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
883 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
884 fl.oif = sk->sk_bound_dev_if;
885 fl.fl_ip_dport = inet->dport;
886 fl.fl_ip_sport = inet->sport;
887
888 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
889 sk->sk_err_soft = -err;
890 goto out;
891 }
892
893 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
894 sk->sk_err_soft = -err;
895 goto out;
896 }
897
898 } else
899 dst_hold(dst);
900
901 if (tp->pmtu_cookie > dst_mtu(dst)) {
902 tcp_sync_mss(sk, dst_mtu(dst));
903 tcp_simple_retransmit(sk);
904 } /* else let the usual retransmit timer handle it */
905 dst_release(dst);
906 goto out;
907 }
908
909 icmpv6_err_convert(type, code, &err);
910
911 /* Might be for an open_request */
912 switch (sk->sk_state) {
913 struct open_request *req, **prev;
914 case TCP_LISTEN:
915 if (sock_owned_by_user(sk))
916 goto out;
917
918 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
919 &hdr->saddr, tcp_v6_iif(skb));
920 if (!req)
921 goto out;
922
923 /* ICMPs are not backlogged, hence we cannot get
924 * an established socket here.
925 */
926 BUG_TRAP(req->sk == NULL);
927
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700928 if (seq != tcp_rsk(req)->snt_isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
930 goto out;
931 }
932
933 tcp_synq_drop(sk, req, prev);
934 goto out;
935
936 case TCP_SYN_SENT:
937 case TCP_SYN_RECV: /* Cannot happen.
938 It can, it SYNs are crossed. --ANK */
939 if (!sock_owned_by_user(sk)) {
940 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
941 sk->sk_err = err;
942 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
943
944 tcp_done(sk);
945 } else
946 sk->sk_err_soft = err;
947 goto out;
948 }
949
950 if (!sock_owned_by_user(sk) && np->recverr) {
951 sk->sk_err = err;
952 sk->sk_error_report(sk);
953 } else
954 sk->sk_err_soft = err;
955
956out:
957 bh_unlock_sock(sk);
958 sock_put(sk);
959}
960
961
962static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
963 struct dst_entry *dst)
964{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700965 struct tcp6_request_sock *treq = tcp6_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 struct ipv6_pinfo *np = inet6_sk(sk);
967 struct sk_buff * skb;
968 struct ipv6_txoptions *opt = NULL;
969 struct in6_addr * final_p = NULL, final;
970 struct flowi fl;
971 int err = -1;
972
973 memset(&fl, 0, sizeof(fl));
974 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700975 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
976 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 fl.fl6_flowlabel = 0;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700978 fl.oif = treq->iif;
979 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 fl.fl_ip_sport = inet_sk(sk)->sport;
981
982 if (dst == NULL) {
983 opt = np->opt;
984 if (opt == NULL &&
985 np->rxopt.bits.srcrt == 2 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700986 treq->pktopts) {
987 struct sk_buff *pktopts = treq->pktopts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
989 if (rxopt->srcrt)
990 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
991 }
992
993 if (opt && opt->srcrt) {
994 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
995 ipv6_addr_copy(&final, &fl.fl6_dst);
996 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
997 final_p = &final;
998 }
999
1000 err = ip6_dst_lookup(sk, &dst, &fl);
1001 if (err)
1002 goto done;
1003 if (final_p)
1004 ipv6_addr_copy(&fl.fl6_dst, final_p);
1005 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1006 goto done;
1007 }
1008
1009 skb = tcp_make_synack(sk, dst, req);
1010 if (skb) {
1011 struct tcphdr *th = skb->h.th;
1012
1013 th->check = tcp_v6_check(th, skb->len,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001014 &treq->loc_addr, &treq->rmt_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 csum_partial((char *)th, skb->len, skb->csum));
1016
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001017 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 err = ip6_xmit(sk, skb, &fl, opt, 0);
1019 if (err == NET_XMIT_CN)
1020 err = 0;
1021 }
1022
1023done:
1024 dst_release(dst);
1025 if (opt && opt != np->opt)
1026 sock_kfree_s(sk, opt, opt->tot_len);
1027 return err;
1028}
1029
1030static void tcp_v6_or_free(struct open_request *req)
1031{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001032 if (tcp6_rsk(req)->pktopts)
1033 kfree_skb(tcp6_rsk(req)->pktopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034}
1035
1036static struct or_calltable or_ipv6 = {
1037 .family = AF_INET6,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001038 .obj_size = sizeof(struct tcp6_request_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 .rtx_syn_ack = tcp_v6_send_synack,
1040 .send_ack = tcp_v6_or_send_ack,
1041 .destructor = tcp_v6_or_free,
1042 .send_reset = tcp_v6_send_reset
1043};
1044
1045static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1046{
1047 struct ipv6_pinfo *np = inet6_sk(sk);
1048 struct inet6_skb_parm *opt = IP6CB(skb);
1049
1050 if (np->rxopt.all) {
1051 if ((opt->hop && np->rxopt.bits.hopopts) ||
1052 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1053 np->rxopt.bits.rxflow) ||
1054 (opt->srcrt && np->rxopt.bits.srcrt) ||
1055 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1056 return 1;
1057 }
1058 return 0;
1059}
1060
1061
1062static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1063 struct sk_buff *skb)
1064{
1065 struct ipv6_pinfo *np = inet6_sk(sk);
1066
1067 if (skb->ip_summed == CHECKSUM_HW) {
1068 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1069 skb->csum = offsetof(struct tcphdr, check);
1070 } else {
1071 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1072 csum_partial((char *)th, th->doff<<2,
1073 skb->csum));
1074 }
1075}
1076
1077
1078static void tcp_v6_send_reset(struct sk_buff *skb)
1079{
1080 struct tcphdr *th = skb->h.th, *t1;
1081 struct sk_buff *buff;
1082 struct flowi fl;
1083
1084 if (th->rst)
1085 return;
1086
1087 if (!ipv6_unicast_destination(skb))
1088 return;
1089
1090 /*
1091 * We need to grab some memory, and put together an RST,
1092 * and then put it into the queue to be sent.
1093 */
1094
1095 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1096 GFP_ATOMIC);
1097 if (buff == NULL)
1098 return;
1099
1100 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1101
1102 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1103
1104 /* Swap the send and the receive. */
1105 memset(t1, 0, sizeof(*t1));
1106 t1->dest = th->source;
1107 t1->source = th->dest;
1108 t1->doff = sizeof(*t1)/4;
1109 t1->rst = 1;
1110
1111 if(th->ack) {
1112 t1->seq = th->ack_seq;
1113 } else {
1114 t1->ack = 1;
1115 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1116 + skb->len - (th->doff<<2));
1117 }
1118
1119 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1120
1121 memset(&fl, 0, sizeof(fl));
1122 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1123 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1124
1125 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1126 sizeof(*t1), IPPROTO_TCP,
1127 buff->csum);
1128
1129 fl.proto = IPPROTO_TCP;
1130 fl.oif = tcp_v6_iif(skb);
1131 fl.fl_ip_dport = t1->dest;
1132 fl.fl_ip_sport = t1->source;
1133
1134 /* sk = NULL, but it is safe for now. RST socket required. */
1135 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1136
1137 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1138 dst_release(buff->dst);
1139 return;
1140 }
1141
1142 ip6_xmit(NULL, buff, &fl, NULL, 0);
1143 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1144 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1145 return;
1146 }
1147
1148 kfree_skb(buff);
1149}
1150
1151static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1152{
1153 struct tcphdr *th = skb->h.th, *t1;
1154 struct sk_buff *buff;
1155 struct flowi fl;
1156 int tot_len = sizeof(struct tcphdr);
1157
1158 if (ts)
1159 tot_len += 3*4;
1160
1161 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1162 GFP_ATOMIC);
1163 if (buff == NULL)
1164 return;
1165
1166 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1167
1168 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1169
1170 /* Swap the send and the receive. */
1171 memset(t1, 0, sizeof(*t1));
1172 t1->dest = th->source;
1173 t1->source = th->dest;
1174 t1->doff = tot_len/4;
1175 t1->seq = htonl(seq);
1176 t1->ack_seq = htonl(ack);
1177 t1->ack = 1;
1178 t1->window = htons(win);
1179
1180 if (ts) {
1181 u32 *ptr = (u32*)(t1 + 1);
1182 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1183 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1184 *ptr++ = htonl(tcp_time_stamp);
1185 *ptr = htonl(ts);
1186 }
1187
1188 buff->csum = csum_partial((char *)t1, tot_len, 0);
1189
1190 memset(&fl, 0, sizeof(fl));
1191 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1192 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1193
1194 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1195 tot_len, IPPROTO_TCP,
1196 buff->csum);
1197
1198 fl.proto = IPPROTO_TCP;
1199 fl.oif = tcp_v6_iif(skb);
1200 fl.fl_ip_dport = t1->dest;
1201 fl.fl_ip_sport = t1->source;
1202
1203 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1204 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1205 dst_release(buff->dst);
1206 return;
1207 }
1208 ip6_xmit(NULL, buff, &fl, NULL, 0);
1209 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1210 return;
1211 }
1212
1213 kfree_skb(buff);
1214}
1215
1216static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1217{
1218 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1219
1220 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1221 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1222
1223 tcp_tw_put(tw);
1224}
1225
1226static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1227{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001228 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229}
1230
1231
1232static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1233{
1234 struct open_request *req, **prev;
1235 struct tcphdr *th = skb->h.th;
1236 struct tcp_sock *tp = tcp_sk(sk);
1237 struct sock *nsk;
1238
1239 /* Find possible connection requests. */
1240 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1241 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1242 if (req)
1243 return tcp_check_req(sk, skb, req, prev);
1244
1245 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1246 th->source,
1247 &skb->nh.ipv6h->daddr,
1248 ntohs(th->dest),
1249 tcp_v6_iif(skb));
1250
1251 if (nsk) {
1252 if (nsk->sk_state != TCP_TIME_WAIT) {
1253 bh_lock_sock(nsk);
1254 return nsk;
1255 }
1256 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1257 return NULL;
1258 }
1259
1260#if 0 /*def CONFIG_SYN_COOKIES*/
1261 if (!th->rst && !th->syn && th->ack)
1262 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1263#endif
1264 return sk;
1265}
1266
1267static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1268{
1269 struct tcp_sock *tp = tcp_sk(sk);
1270 struct tcp_listen_opt *lopt = tp->listen_opt;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001271 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272
1273 req->sk = NULL;
1274 req->expires = jiffies + TCP_TIMEOUT_INIT;
1275 req->retrans = 0;
1276 req->dl_next = lopt->syn_table[h];
1277
1278 write_lock(&tp->syn_wait_lock);
1279 lopt->syn_table[h] = req;
1280 write_unlock(&tp->syn_wait_lock);
1281
1282 tcp_synq_added(sk);
1283}
1284
1285
1286/* FIXME: this is substantially similar to the ipv4 code.
1287 * Can some kind of merge be done? -- erics
1288 */
1289static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1290{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001291 struct tcp6_request_sock *treq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 struct ipv6_pinfo *np = inet6_sk(sk);
1293 struct tcp_options_received tmp_opt;
1294 struct tcp_sock *tp = tcp_sk(sk);
1295 struct open_request *req = NULL;
1296 __u32 isn = TCP_SKB_CB(skb)->when;
1297
1298 if (skb->protocol == htons(ETH_P_IP))
1299 return tcp_v4_conn_request(sk, skb);
1300
1301 if (!ipv6_unicast_destination(skb))
1302 goto drop;
1303
1304 /*
1305 * There are no SYN attacks on IPv6, yet...
1306 */
1307 if (tcp_synq_is_full(sk) && !isn) {
1308 if (net_ratelimit())
1309 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1310 goto drop;
1311 }
1312
1313 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1314 goto drop;
1315
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001316 req = tcp_openreq_alloc(&or_ipv6);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 if (req == NULL)
1318 goto drop;
1319
1320 tcp_clear_options(&tmp_opt);
1321 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1322 tmp_opt.user_mss = tp->rx_opt.user_mss;
1323
1324 tcp_parse_options(skb, &tmp_opt, 0);
1325
1326 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1327 tcp_openreq_init(req, &tmp_opt, skb);
1328
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001329 treq = tcp6_rsk(req);
1330 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1331 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 TCP_ECN_create_request(req, skb->h.th);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001333 treq->pktopts = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 if (ipv6_opt_accepted(sk, skb) ||
1335 np->rxopt.bits.rxinfo ||
1336 np->rxopt.bits.rxhlim) {
1337 atomic_inc(&skb->users);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001338 treq->pktopts = skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001340 treq->iif = sk->sk_bound_dev_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341
1342 /* So that link locals have meaning */
1343 if (!sk->sk_bound_dev_if &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001344 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1345 treq->iif = tcp_v6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346
1347 if (isn == 0)
1348 isn = tcp_v6_init_sequence(sk,skb);
1349
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001350 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
1352 if (tcp_v6_send_synack(sk, req, NULL))
1353 goto drop;
1354
1355 tcp_v6_synq_add(sk, req);
1356
1357 return 0;
1358
1359drop:
1360 if (req)
1361 tcp_openreq_free(req);
1362
1363 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1364 return 0; /* don't send reset */
1365}
1366
1367static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1368 struct open_request *req,
1369 struct dst_entry *dst)
1370{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001371 struct tcp6_request_sock *treq = tcp6_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1373 struct tcp6_sock *newtcp6sk;
1374 struct inet_sock *newinet;
1375 struct tcp_sock *newtp;
1376 struct sock *newsk;
1377 struct ipv6_txoptions *opt;
1378
1379 if (skb->protocol == htons(ETH_P_IP)) {
1380 /*
1381 * v6 mapped
1382 */
1383
1384 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1385
1386 if (newsk == NULL)
1387 return NULL;
1388
1389 newtcp6sk = (struct tcp6_sock *)newsk;
1390 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1391
1392 newinet = inet_sk(newsk);
1393 newnp = inet6_sk(newsk);
1394 newtp = tcp_sk(newsk);
1395
1396 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1397
1398 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1399 newinet->daddr);
1400
1401 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1402 newinet->saddr);
1403
1404 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1405
1406 newtp->af_specific = &ipv6_mapped;
1407 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1408 newnp->pktoptions = NULL;
1409 newnp->opt = NULL;
1410 newnp->mcast_oif = tcp_v6_iif(skb);
1411 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1412
1413 /* Charge newly allocated IPv6 socket. Though it is mapped,
1414 * it is IPv6 yet.
1415 */
1416#ifdef INET_REFCNT_DEBUG
1417 atomic_inc(&inet6_sock_nr);
1418#endif
1419
1420 /* It is tricky place. Until this moment IPv4 tcp
1421 worked with IPv6 af_tcp.af_specific.
1422 Sync it now.
1423 */
1424 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1425
1426 return newsk;
1427 }
1428
1429 opt = np->opt;
1430
1431 if (sk_acceptq_is_full(sk))
1432 goto out_overflow;
1433
1434 if (np->rxopt.bits.srcrt == 2 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001435 opt == NULL && treq->pktopts) {
1436 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 if (rxopt->srcrt)
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001438 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 }
1440
1441 if (dst == NULL) {
1442 struct in6_addr *final_p = NULL, final;
1443 struct flowi fl;
1444
1445 memset(&fl, 0, sizeof(fl));
1446 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001447 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 if (opt && opt->srcrt) {
1449 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1450 ipv6_addr_copy(&final, &fl.fl6_dst);
1451 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1452 final_p = &final;
1453 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001454 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 fl.oif = sk->sk_bound_dev_if;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001456 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 fl.fl_ip_sport = inet_sk(sk)->sport;
1458
1459 if (ip6_dst_lookup(sk, &dst, &fl))
1460 goto out;
1461
1462 if (final_p)
1463 ipv6_addr_copy(&fl.fl6_dst, final_p);
1464
1465 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1466 goto out;
1467 }
1468
1469 newsk = tcp_create_openreq_child(sk, req, skb);
1470 if (newsk == NULL)
1471 goto out;
1472
1473 /* Charge newly allocated IPv6 socket */
1474#ifdef INET_REFCNT_DEBUG
1475 atomic_inc(&inet6_sock_nr);
1476#endif
1477
1478 ip6_dst_store(newsk, dst, NULL);
1479 newsk->sk_route_caps = dst->dev->features &
1480 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1481
1482 newtcp6sk = (struct tcp6_sock *)newsk;
1483 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1484
1485 newtp = tcp_sk(newsk);
1486 newinet = inet_sk(newsk);
1487 newnp = inet6_sk(newsk);
1488
1489 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1490
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001491 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1492 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1493 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1494 newsk->sk_bound_dev_if = treq->iif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495
1496 /* Now IPv6 options...
1497
1498 First: no IPv4 options.
1499 */
1500 newinet->opt = NULL;
1501
1502 /* Clone RX bits */
1503 newnp->rxopt.all = np->rxopt.all;
1504
1505 /* Clone pktoptions received with SYN */
1506 newnp->pktoptions = NULL;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001507 if (treq->pktopts != NULL) {
1508 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1509 kfree_skb(treq->pktopts);
1510 treq->pktopts = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 if (newnp->pktoptions)
1512 skb_set_owner_r(newnp->pktoptions, newsk);
1513 }
1514 newnp->opt = NULL;
1515 newnp->mcast_oif = tcp_v6_iif(skb);
1516 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1517
1518 /* Clone native IPv6 options from listening socket (if any)
1519
1520 Yes, keeping reference count would be much more clever,
1521 but we make one more one thing there: reattach optmem
1522 to newsk.
1523 */
1524 if (opt) {
1525 newnp->opt = ipv6_dup_options(newsk, opt);
1526 if (opt != np->opt)
1527 sock_kfree_s(sk, opt, opt->tot_len);
1528 }
1529
1530 newtp->ext_header_len = 0;
1531 if (newnp->opt)
1532 newtp->ext_header_len = newnp->opt->opt_nflen +
1533 newnp->opt->opt_flen;
1534
1535 tcp_sync_mss(newsk, dst_mtu(dst));
1536 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1537 tcp_initialize_rcv_mss(newsk);
1538
1539 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1540
1541 __tcp_v6_hash(newsk);
1542 tcp_inherit_port(sk, newsk);
1543
1544 return newsk;
1545
1546out_overflow:
1547 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1548out:
1549 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1550 if (opt && opt != np->opt)
1551 sock_kfree_s(sk, opt, opt->tot_len);
1552 dst_release(dst);
1553 return NULL;
1554}
1555
1556static int tcp_v6_checksum_init(struct sk_buff *skb)
1557{
1558 if (skb->ip_summed == CHECKSUM_HW) {
1559 skb->ip_summed = CHECKSUM_UNNECESSARY;
1560 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1561 &skb->nh.ipv6h->daddr,skb->csum))
1562 return 0;
1563 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1564 }
1565 if (skb->len <= 76) {
1566 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1567 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1568 return -1;
1569 skb->ip_summed = CHECKSUM_UNNECESSARY;
1570 } else {
1571 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1572 &skb->nh.ipv6h->daddr,0);
1573 }
1574 return 0;
1575}
1576
1577/* The socket must have it's spinlock held when we get
1578 * here.
1579 *
1580 * We have a potential double-lock case here, so even when
1581 * doing backlog processing we use the BH locking scheme.
1582 * This is because we cannot sleep with the original spinlock
1583 * held.
1584 */
1585static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1586{
1587 struct ipv6_pinfo *np = inet6_sk(sk);
1588 struct tcp_sock *tp;
1589 struct sk_buff *opt_skb = NULL;
1590
1591 /* Imagine: socket is IPv6. IPv4 packet arrives,
1592 goes to IPv4 receive handler and backlogged.
1593 From backlog it always goes here. Kerboom...
1594 Fortunately, tcp_rcv_established and rcv_established
1595 handle them correctly, but it is not case with
1596 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1597 */
1598
1599 if (skb->protocol == htons(ETH_P_IP))
1600 return tcp_v4_do_rcv(sk, skb);
1601
1602 if (sk_filter(sk, skb, 0))
1603 goto discard;
1604
1605 /*
1606 * socket locking is here for SMP purposes as backlog rcv
1607 * is currently called with bh processing disabled.
1608 */
1609
1610 /* Do Stevens' IPV6_PKTOPTIONS.
1611
1612 Yes, guys, it is the only place in our code, where we
1613 may make it not affecting IPv4.
1614 The rest of code is protocol independent,
1615 and I do not like idea to uglify IPv4.
1616
1617 Actually, all the idea behind IPV6_PKTOPTIONS
1618 looks not very well thought. For now we latch
1619 options, received in the last packet, enqueued
1620 by tcp. Feel free to propose better solution.
1621 --ANK (980728)
1622 */
1623 if (np->rxopt.all)
1624 opt_skb = skb_clone(skb, GFP_ATOMIC);
1625
1626 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1627 TCP_CHECK_TIMER(sk);
1628 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1629 goto reset;
1630 TCP_CHECK_TIMER(sk);
1631 if (opt_skb)
1632 goto ipv6_pktoptions;
1633 return 0;
1634 }
1635
1636 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1637 goto csum_err;
1638
1639 if (sk->sk_state == TCP_LISTEN) {
1640 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1641 if (!nsk)
1642 goto discard;
1643
1644 /*
1645 * Queue it on the new socket if the new socket is active,
1646 * otherwise we just shortcircuit this and continue with
1647 * the new socket..
1648 */
1649 if(nsk != sk) {
1650 if (tcp_child_process(sk, nsk, skb))
1651 goto reset;
1652 if (opt_skb)
1653 __kfree_skb(opt_skb);
1654 return 0;
1655 }
1656 }
1657
1658 TCP_CHECK_TIMER(sk);
1659 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1660 goto reset;
1661 TCP_CHECK_TIMER(sk);
1662 if (opt_skb)
1663 goto ipv6_pktoptions;
1664 return 0;
1665
1666reset:
1667 tcp_v6_send_reset(skb);
1668discard:
1669 if (opt_skb)
1670 __kfree_skb(opt_skb);
1671 kfree_skb(skb);
1672 return 0;
1673csum_err:
1674 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1675 goto discard;
1676
1677
1678ipv6_pktoptions:
1679 /* Do you ask, what is it?
1680
1681 1. skb was enqueued by tcp.
1682 2. skb is added to tail of read queue, rather than out of order.
1683 3. socket is not in passive state.
1684 4. Finally, it really contains options, which user wants to receive.
1685 */
1686 tp = tcp_sk(sk);
1687 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1688 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1689 if (np->rxopt.bits.rxinfo)
1690 np->mcast_oif = tcp_v6_iif(opt_skb);
1691 if (np->rxopt.bits.rxhlim)
1692 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1693 if (ipv6_opt_accepted(sk, opt_skb)) {
1694 skb_set_owner_r(opt_skb, sk);
1695 opt_skb = xchg(&np->pktoptions, opt_skb);
1696 } else {
1697 __kfree_skb(opt_skb);
1698 opt_skb = xchg(&np->pktoptions, NULL);
1699 }
1700 }
1701
1702 if (opt_skb)
1703 kfree_skb(opt_skb);
1704 return 0;
1705}
1706
1707static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1708{
1709 struct sk_buff *skb = *pskb;
1710 struct tcphdr *th;
1711 struct sock *sk;
1712 int ret;
1713
1714 if (skb->pkt_type != PACKET_HOST)
1715 goto discard_it;
1716
1717 /*
1718 * Count it even if it's bad.
1719 */
1720 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1721
1722 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1723 goto discard_it;
1724
1725 th = skb->h.th;
1726
1727 if (th->doff < sizeof(struct tcphdr)/4)
1728 goto bad_packet;
1729 if (!pskb_may_pull(skb, th->doff*4))
1730 goto discard_it;
1731
1732 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1733 tcp_v6_checksum_init(skb) < 0))
1734 goto bad_packet;
1735
1736 th = skb->h.th;
1737 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1738 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1739 skb->len - th->doff*4);
1740 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1741 TCP_SKB_CB(skb)->when = 0;
1742 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1743 TCP_SKB_CB(skb)->sacked = 0;
1744
1745 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1746 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1747
1748 if (!sk)
1749 goto no_tcp_socket;
1750
1751process:
1752 if (sk->sk_state == TCP_TIME_WAIT)
1753 goto do_time_wait;
1754
1755 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1756 goto discard_and_relse;
1757
1758 if (sk_filter(sk, skb, 0))
1759 goto discard_and_relse;
1760
1761 skb->dev = NULL;
1762
1763 bh_lock_sock(sk);
1764 ret = 0;
1765 if (!sock_owned_by_user(sk)) {
1766 if (!tcp_prequeue(sk, skb))
1767 ret = tcp_v6_do_rcv(sk, skb);
1768 } else
1769 sk_add_backlog(sk, skb);
1770 bh_unlock_sock(sk);
1771
1772 sock_put(sk);
1773 return ret ? -1 : 0;
1774
1775no_tcp_socket:
1776 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1777 goto discard_it;
1778
1779 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1780bad_packet:
1781 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1782 } else {
1783 tcp_v6_send_reset(skb);
1784 }
1785
1786discard_it:
1787
1788 /*
1789 * Discard frame
1790 */
1791
1792 kfree_skb(skb);
1793 return 0;
1794
1795discard_and_relse:
1796 sock_put(sk);
1797 goto discard_it;
1798
1799do_time_wait:
1800 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801 tcp_tw_put((struct tcp_tw_bucket *) sk);
1802 goto discard_it;
1803 }
1804
1805 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1806 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1807 tcp_tw_put((struct tcp_tw_bucket *) sk);
1808 goto discard_it;
1809 }
1810
1811 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1812 skb, th, skb->len)) {
1813 case TCP_TW_SYN:
1814 {
1815 struct sock *sk2;
1816
1817 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1818 if (sk2 != NULL) {
1819 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1820 tcp_tw_put((struct tcp_tw_bucket *)sk);
1821 sk = sk2;
1822 goto process;
1823 }
1824 /* Fall through to ACK */
1825 }
1826 case TCP_TW_ACK:
1827 tcp_v6_timewait_ack(sk, skb);
1828 break;
1829 case TCP_TW_RST:
1830 goto no_tcp_socket;
1831 case TCP_TW_SUCCESS:;
1832 }
1833 goto discard_it;
1834}
1835
1836static int tcp_v6_rebuild_header(struct sock *sk)
1837{
1838 int err;
1839 struct dst_entry *dst;
1840 struct ipv6_pinfo *np = inet6_sk(sk);
1841
1842 dst = __sk_dst_check(sk, np->dst_cookie);
1843
1844 if (dst == NULL) {
1845 struct inet_sock *inet = inet_sk(sk);
1846 struct in6_addr *final_p = NULL, final;
1847 struct flowi fl;
1848
1849 memset(&fl, 0, sizeof(fl));
1850 fl.proto = IPPROTO_TCP;
1851 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1852 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1853 fl.fl6_flowlabel = np->flow_label;
1854 fl.oif = sk->sk_bound_dev_if;
1855 fl.fl_ip_dport = inet->dport;
1856 fl.fl_ip_sport = inet->sport;
1857
1858 if (np->opt && np->opt->srcrt) {
1859 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1860 ipv6_addr_copy(&final, &fl.fl6_dst);
1861 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1862 final_p = &final;
1863 }
1864
1865 err = ip6_dst_lookup(sk, &dst, &fl);
1866 if (err) {
1867 sk->sk_route_caps = 0;
1868 return err;
1869 }
1870 if (final_p)
1871 ipv6_addr_copy(&fl.fl6_dst, final_p);
1872
1873 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1874 sk->sk_err_soft = -err;
1875 dst_release(dst);
1876 return err;
1877 }
1878
1879 ip6_dst_store(sk, dst, NULL);
1880 sk->sk_route_caps = dst->dev->features &
1881 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1882 }
1883
1884 return 0;
1885}
1886
1887static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1888{
1889 struct sock *sk = skb->sk;
1890 struct inet_sock *inet = inet_sk(sk);
1891 struct ipv6_pinfo *np = inet6_sk(sk);
1892 struct flowi fl;
1893 struct dst_entry *dst;
1894 struct in6_addr *final_p = NULL, final;
1895
1896 memset(&fl, 0, sizeof(fl));
1897 fl.proto = IPPROTO_TCP;
1898 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1899 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1900 fl.fl6_flowlabel = np->flow_label;
1901 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1902 fl.oif = sk->sk_bound_dev_if;
1903 fl.fl_ip_sport = inet->sport;
1904 fl.fl_ip_dport = inet->dport;
1905
1906 if (np->opt && np->opt->srcrt) {
1907 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1908 ipv6_addr_copy(&final, &fl.fl6_dst);
1909 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1910 final_p = &final;
1911 }
1912
1913 dst = __sk_dst_check(sk, np->dst_cookie);
1914
1915 if (dst == NULL) {
1916 int err = ip6_dst_lookup(sk, &dst, &fl);
1917
1918 if (err) {
1919 sk->sk_err_soft = -err;
1920 return err;
1921 }
1922
1923 if (final_p)
1924 ipv6_addr_copy(&fl.fl6_dst, final_p);
1925
1926 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1927 sk->sk_route_caps = 0;
1928 dst_release(dst);
1929 return err;
1930 }
1931
1932 ip6_dst_store(sk, dst, NULL);
1933 sk->sk_route_caps = dst->dev->features &
1934 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1935 }
1936
1937 skb->dst = dst_clone(dst);
1938
1939 /* Restore final destination back after routing done */
1940 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1941
1942 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1943}
1944
1945static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1946{
1947 struct ipv6_pinfo *np = inet6_sk(sk);
1948 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1949
1950 sin6->sin6_family = AF_INET6;
1951 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1952 sin6->sin6_port = inet_sk(sk)->dport;
1953 /* We do not store received flowlabel for TCP */
1954 sin6->sin6_flowinfo = 0;
1955 sin6->sin6_scope_id = 0;
1956 if (sk->sk_bound_dev_if &&
1957 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1958 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1959}
1960
1961static int tcp_v6_remember_stamp(struct sock *sk)
1962{
1963 /* Alas, not yet... */
1964 return 0;
1965}
1966
1967static struct tcp_func ipv6_specific = {
1968 .queue_xmit = tcp_v6_xmit,
1969 .send_check = tcp_v6_send_check,
1970 .rebuild_header = tcp_v6_rebuild_header,
1971 .conn_request = tcp_v6_conn_request,
1972 .syn_recv_sock = tcp_v6_syn_recv_sock,
1973 .remember_stamp = tcp_v6_remember_stamp,
1974 .net_header_len = sizeof(struct ipv6hdr),
1975
1976 .setsockopt = ipv6_setsockopt,
1977 .getsockopt = ipv6_getsockopt,
1978 .addr2sockaddr = v6_addr2sockaddr,
1979 .sockaddr_len = sizeof(struct sockaddr_in6)
1980};
1981
1982/*
1983 * TCP over IPv4 via INET6 API
1984 */
1985
1986static struct tcp_func ipv6_mapped = {
1987 .queue_xmit = ip_queue_xmit,
1988 .send_check = tcp_v4_send_check,
1989 .rebuild_header = tcp_v4_rebuild_header,
1990 .conn_request = tcp_v6_conn_request,
1991 .syn_recv_sock = tcp_v6_syn_recv_sock,
1992 .remember_stamp = tcp_v4_remember_stamp,
1993 .net_header_len = sizeof(struct iphdr),
1994
1995 .setsockopt = ipv6_setsockopt,
1996 .getsockopt = ipv6_getsockopt,
1997 .addr2sockaddr = v6_addr2sockaddr,
1998 .sockaddr_len = sizeof(struct sockaddr_in6)
1999};
2000
2001
2002
2003/* NOTE: A lot of things set to zero explicitly by call to
2004 * sk_alloc() so need not be done here.
2005 */
2006static int tcp_v6_init_sock(struct sock *sk)
2007{
2008 struct tcp_sock *tp = tcp_sk(sk);
2009
2010 skb_queue_head_init(&tp->out_of_order_queue);
2011 tcp_init_xmit_timers(sk);
2012 tcp_prequeue_init(tp);
2013
2014 tp->rto = TCP_TIMEOUT_INIT;
2015 tp->mdev = TCP_TIMEOUT_INIT;
2016
2017 /* So many TCP implementations out there (incorrectly) count the
2018 * initial SYN frame in their delayed-ACK and congestion control
2019 * algorithms that we must have the following bandaid to talk
2020 * efficiently to them. -DaveM
2021 */
2022 tp->snd_cwnd = 2;
2023
2024 /* See draft-stevens-tcpca-spec-01 for discussion of the
2025 * initialization of these values.
2026 */
2027 tp->snd_ssthresh = 0x7fffffff;
2028 tp->snd_cwnd_clamp = ~0;
2029 tp->mss_cache_std = tp->mss_cache = 536;
2030
2031 tp->reordering = sysctl_tcp_reordering;
2032
2033 sk->sk_state = TCP_CLOSE;
2034
2035 tp->af_specific = &ipv6_specific;
2036
2037 sk->sk_write_space = sk_stream_write_space;
2038 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2039
2040 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2041 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2042
2043 atomic_inc(&tcp_sockets_allocated);
2044
2045 return 0;
2046}
2047
2048static int tcp_v6_destroy_sock(struct sock *sk)
2049{
2050 extern int tcp_v4_destroy_sock(struct sock *sk);
2051
2052 tcp_v4_destroy_sock(sk);
2053 return inet6_destroy_sock(sk);
2054}
2055
2056/* Proc filesystem TCPv6 sock list dumping. */
2057static void get_openreq6(struct seq_file *seq,
2058 struct sock *sk, struct open_request *req, int i, int uid)
2059{
2060 struct in6_addr *dest, *src;
2061 int ttd = req->expires - jiffies;
2062
2063 if (ttd < 0)
2064 ttd = 0;
2065
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002066 src = &tcp6_rsk(req)->loc_addr;
2067 dest = &tcp6_rsk(req)->rmt_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 seq_printf(seq,
2069 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2070 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2071 i,
2072 src->s6_addr32[0], src->s6_addr32[1],
2073 src->s6_addr32[2], src->s6_addr32[3],
2074 ntohs(inet_sk(sk)->sport),
2075 dest->s6_addr32[0], dest->s6_addr32[1],
2076 dest->s6_addr32[2], dest->s6_addr32[3],
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002077 ntohs(inet_rsk(req)->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078 TCP_SYN_RECV,
2079 0,0, /* could print option size, but that is af dependent. */
2080 1, /* timers active (only the expire timer) */
2081 jiffies_to_clock_t(ttd),
2082 req->retrans,
2083 uid,
2084 0, /* non standard timer */
2085 0, /* open_requests have no inode */
2086 0, req);
2087}
2088
2089static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2090{
2091 struct in6_addr *dest, *src;
2092 __u16 destp, srcp;
2093 int timer_active;
2094 unsigned long timer_expires;
2095 struct inet_sock *inet = inet_sk(sp);
2096 struct tcp_sock *tp = tcp_sk(sp);
2097 struct ipv6_pinfo *np = inet6_sk(sp);
2098
2099 dest = &np->daddr;
2100 src = &np->rcv_saddr;
2101 destp = ntohs(inet->dport);
2102 srcp = ntohs(inet->sport);
2103 if (tp->pending == TCP_TIME_RETRANS) {
2104 timer_active = 1;
2105 timer_expires = tp->timeout;
2106 } else if (tp->pending == TCP_TIME_PROBE0) {
2107 timer_active = 4;
2108 timer_expires = tp->timeout;
2109 } else if (timer_pending(&sp->sk_timer)) {
2110 timer_active = 2;
2111 timer_expires = sp->sk_timer.expires;
2112 } else {
2113 timer_active = 0;
2114 timer_expires = jiffies;
2115 }
2116
2117 seq_printf(seq,
2118 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2119 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2120 i,
2121 src->s6_addr32[0], src->s6_addr32[1],
2122 src->s6_addr32[2], src->s6_addr32[3], srcp,
2123 dest->s6_addr32[0], dest->s6_addr32[1],
2124 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2125 sp->sk_state,
2126 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2127 timer_active,
2128 jiffies_to_clock_t(timer_expires - jiffies),
2129 tp->retransmits,
2130 sock_i_uid(sp),
2131 tp->probes_out,
2132 sock_i_ino(sp),
2133 atomic_read(&sp->sk_refcnt), sp,
2134 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2135 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2136 );
2137}
2138
2139static void get_timewait6_sock(struct seq_file *seq,
2140 struct tcp_tw_bucket *tw, int i)
2141{
2142 struct in6_addr *dest, *src;
2143 __u16 destp, srcp;
2144 int ttd = tw->tw_ttd - jiffies;
2145
2146 if (ttd < 0)
2147 ttd = 0;
2148
2149 dest = &tw->tw_v6_daddr;
2150 src = &tw->tw_v6_rcv_saddr;
2151 destp = ntohs(tw->tw_dport);
2152 srcp = ntohs(tw->tw_sport);
2153
2154 seq_printf(seq,
2155 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2156 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2157 i,
2158 src->s6_addr32[0], src->s6_addr32[1],
2159 src->s6_addr32[2], src->s6_addr32[3], srcp,
2160 dest->s6_addr32[0], dest->s6_addr32[1],
2161 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2162 tw->tw_substate, 0, 0,
2163 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2164 atomic_read(&tw->tw_refcnt), tw);
2165}
2166
2167#ifdef CONFIG_PROC_FS
2168static int tcp6_seq_show(struct seq_file *seq, void *v)
2169{
2170 struct tcp_iter_state *st;
2171
2172 if (v == SEQ_START_TOKEN) {
2173 seq_puts(seq,
2174 " sl "
2175 "local_address "
2176 "remote_address "
2177 "st tx_queue rx_queue tr tm->when retrnsmt"
2178 " uid timeout inode\n");
2179 goto out;
2180 }
2181 st = seq->private;
2182
2183 switch (st->state) {
2184 case TCP_SEQ_STATE_LISTENING:
2185 case TCP_SEQ_STATE_ESTABLISHED:
2186 get_tcp6_sock(seq, v, st->num);
2187 break;
2188 case TCP_SEQ_STATE_OPENREQ:
2189 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2190 break;
2191 case TCP_SEQ_STATE_TIME_WAIT:
2192 get_timewait6_sock(seq, v, st->num);
2193 break;
2194 }
2195out:
2196 return 0;
2197}
2198
2199static struct file_operations tcp6_seq_fops;
2200static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2201 .owner = THIS_MODULE,
2202 .name = "tcp6",
2203 .family = AF_INET6,
2204 .seq_show = tcp6_seq_show,
2205 .seq_fops = &tcp6_seq_fops,
2206};
2207
2208int __init tcp6_proc_init(void)
2209{
2210 return tcp_proc_register(&tcp6_seq_afinfo);
2211}
2212
2213void tcp6_proc_exit(void)
2214{
2215 tcp_proc_unregister(&tcp6_seq_afinfo);
2216}
2217#endif
2218
2219struct proto tcpv6_prot = {
2220 .name = "TCPv6",
2221 .owner = THIS_MODULE,
2222 .close = tcp_close,
2223 .connect = tcp_v6_connect,
2224 .disconnect = tcp_disconnect,
2225 .accept = tcp_accept,
2226 .ioctl = tcp_ioctl,
2227 .init = tcp_v6_init_sock,
2228 .destroy = tcp_v6_destroy_sock,
2229 .shutdown = tcp_shutdown,
2230 .setsockopt = tcp_setsockopt,
2231 .getsockopt = tcp_getsockopt,
2232 .sendmsg = tcp_sendmsg,
2233 .recvmsg = tcp_recvmsg,
2234 .backlog_rcv = tcp_v6_do_rcv,
2235 .hash = tcp_v6_hash,
2236 .unhash = tcp_unhash,
2237 .get_port = tcp_v6_get_port,
2238 .enter_memory_pressure = tcp_enter_memory_pressure,
2239 .sockets_allocated = &tcp_sockets_allocated,
2240 .memory_allocated = &tcp_memory_allocated,
2241 .memory_pressure = &tcp_memory_pressure,
2242 .sysctl_mem = sysctl_tcp_mem,
2243 .sysctl_wmem = sysctl_tcp_wmem,
2244 .sysctl_rmem = sysctl_tcp_rmem,
2245 .max_header = MAX_TCP_HEADER,
2246 .obj_size = sizeof(struct tcp6_sock),
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002247 .rsk_prot = &or_ipv6,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248};
2249
2250static struct inet6_protocol tcpv6_protocol = {
2251 .handler = tcp_v6_rcv,
2252 .err_handler = tcp_v6_err,
2253 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2254};
2255
2256extern struct proto_ops inet6_stream_ops;
2257
2258static struct inet_protosw tcpv6_protosw = {
2259 .type = SOCK_STREAM,
2260 .protocol = IPPROTO_TCP,
2261 .prot = &tcpv6_prot,
2262 .ops = &inet6_stream_ops,
2263 .capability = -1,
2264 .no_check = 0,
2265 .flags = INET_PROTOSW_PERMANENT,
2266};
2267
2268void __init tcpv6_init(void)
2269{
2270 /* register inet6 protocol */
2271 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2272 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2273 inet6_register_protosw(&tcpv6_protosw);
2274}