blob: 4760c85e19db83c0a7278893e07f608064f9f25a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
50#include <net/ipv6.h>
51#include <net/transp_v6.h>
52#include <net/addrconf.h>
53#include <net/ip6_route.h>
54#include <net/ip6_checksum.h>
55#include <net/inet_ecn.h>
56#include <net/protocol.h>
57#include <net/xfrm.h>
58#include <net/addrconf.h>
59#include <net/snmp.h>
60#include <net/dsfield.h>
61
62#include <asm/uaccess.h>
63
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
66
67static void tcp_v6_send_reset(struct sk_buff *skb);
68static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb);
71
72static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75static struct tcp_func ipv6_mapped;
76static struct tcp_func ipv6_specific;
77
78/* I have no idea if this is a good hash for v6 or not. -DaveM */
79static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
81{
82 int hashent = (lport ^ fport);
83
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
88}
89
90static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91{
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
99}
100
101static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
103{
104 struct sock *sk2;
105 struct hlist_node *node;
106
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
109 if (sk != sk2 &&
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
116 break;
117 }
118
119 return node != NULL;
120}
121
122/* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
126 */
127static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128{
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
132 int ret;
133
134 local_bh_disable();
135 if (snum == 0) {
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
139 int rover;
140
141 spin_lock(&tcp_portalloc_lock);
142 rover = tcp_port_rover;
143 do { rover++;
144 if ((rover < low) || (rover > high))
145 rover = low;
146 head = &tcp_bhash[tcp_bhashfn(rover)];
147 spin_lock(&head->lock);
148 tb_for_each(tb, node, &head->chain)
149 if (tb->port == rover)
150 goto next;
151 break;
152 next:
153 spin_unlock(&head->lock);
154 } while (--remaining > 0);
155 tcp_port_rover = rover;
156 spin_unlock(&tcp_portalloc_lock);
157
158 /* Exhausted local port range during search? */
159 ret = 1;
160 if (remaining <= 0)
161 goto fail;
162
163 /* OK, here is the one we will use. */
164 snum = rover;
165 } else {
166 head = &tcp_bhash[tcp_bhashfn(snum)];
167 spin_lock(&head->lock);
168 tb_for_each(tb, node, &head->chain)
169 if (tb->port == snum)
170 goto tb_found;
171 }
172 tb = NULL;
173 goto tb_not_found;
174tb_found:
175 if (tb && !hlist_empty(&tb->owners)) {
176 if (tb->fastreuse > 0 && sk->sk_reuse &&
177 sk->sk_state != TCP_LISTEN) {
178 goto success;
179 } else {
180 ret = 1;
181 if (tcp_v6_bind_conflict(sk, tb))
182 goto fail_unlock;
183 }
184 }
185tb_not_found:
186 ret = 1;
187 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
188 goto fail_unlock;
189 if (hlist_empty(&tb->owners)) {
190 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
191 tb->fastreuse = 1;
192 else
193 tb->fastreuse = 0;
194 } else if (tb->fastreuse &&
195 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
196 tb->fastreuse = 0;
197
198success:
199 if (!tcp_sk(sk)->bind_hash)
200 tcp_bind_hash(sk, tb, snum);
201 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
202 ret = 0;
203
204fail_unlock:
205 spin_unlock(&head->lock);
206fail:
207 local_bh_enable();
208 return ret;
209}
210
211static __inline__ void __tcp_v6_hash(struct sock *sk)
212{
213 struct hlist_head *list;
214 rwlock_t *lock;
215
216 BUG_TRAP(sk_unhashed(sk));
217
218 if (sk->sk_state == TCP_LISTEN) {
219 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
220 lock = &tcp_lhash_lock;
221 tcp_listen_wlock();
222 } else {
223 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
224 list = &tcp_ehash[sk->sk_hashent].chain;
225 lock = &tcp_ehash[sk->sk_hashent].lock;
226 write_lock(lock);
227 }
228
229 __sk_add_node(sk, list);
230 sock_prot_inc_use(sk->sk_prot);
231 write_unlock(lock);
232}
233
234
235static void tcp_v6_hash(struct sock *sk)
236{
237 if (sk->sk_state != TCP_CLOSE) {
238 struct tcp_sock *tp = tcp_sk(sk);
239
240 if (tp->af_specific == &ipv6_mapped) {
241 tcp_prot.hash(sk);
242 return;
243 }
244 local_bh_disable();
245 __tcp_v6_hash(sk);
246 local_bh_enable();
247 }
248}
249
250static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
251{
252 struct sock *sk;
253 struct hlist_node *node;
254 struct sock *result = NULL;
255 int score, hiscore;
256
257 hiscore=0;
258 read_lock(&tcp_lhash_lock);
259 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
260 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
261 struct ipv6_pinfo *np = inet6_sk(sk);
262
263 score = 1;
264 if (!ipv6_addr_any(&np->rcv_saddr)) {
265 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
266 continue;
267 score++;
268 }
269 if (sk->sk_bound_dev_if) {
270 if (sk->sk_bound_dev_if != dif)
271 continue;
272 score++;
273 }
274 if (score == 3) {
275 result = sk;
276 break;
277 }
278 if (score > hiscore) {
279 hiscore = score;
280 result = sk;
281 }
282 }
283 }
284 if (result)
285 sock_hold(result);
286 read_unlock(&tcp_lhash_lock);
287 return result;
288}
289
290/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
291 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
292 *
293 * The sockhash lock must be held as a reader here.
294 */
295
296static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
297 struct in6_addr *daddr, u16 hnum,
298 int dif)
299{
300 struct tcp_ehash_bucket *head;
301 struct sock *sk;
302 struct hlist_node *node;
303 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
304 int hash;
305
306 /* Optimize here for direct hit, only listening connections can
307 * have wildcards anyways.
308 */
309 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
310 head = &tcp_ehash[hash];
311 read_lock(&head->lock);
312 sk_for_each(sk, node, &head->chain) {
313 /* For IPV6 do the cheaper port and family tests first. */
314 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
315 goto hit; /* You sunk my battleship! */
316 }
317 /* Must check for a TIME_WAIT'er before going to listener hash. */
318 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
319 /* FIXME: acme: check this... */
320 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
321
322 if(*((__u32 *)&(tw->tw_dport)) == ports &&
323 sk->sk_family == PF_INET6) {
324 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
325 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
326 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
327 goto hit;
328 }
329 }
330 read_unlock(&head->lock);
331 return NULL;
332
333hit:
334 sock_hold(sk);
335 read_unlock(&head->lock);
336 return sk;
337}
338
339
340static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
341 struct in6_addr *daddr, u16 hnum,
342 int dif)
343{
344 struct sock *sk;
345
346 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
347
348 if (sk)
349 return sk;
350
351 return tcp_v6_lookup_listener(daddr, hnum, dif);
352}
353
354inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
355 struct in6_addr *daddr, u16 dport,
356 int dif)
357{
358 struct sock *sk;
359
360 local_bh_disable();
361 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
362 local_bh_enable();
363
364 return sk;
365}
366
367EXPORT_SYMBOL_GPL(tcp_v6_lookup);
368
369
370/*
371 * Open request hash tables.
372 */
373
374static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
375{
376 u32 a, b, c;
377
378 a = raddr->s6_addr32[0];
379 b = raddr->s6_addr32[1];
380 c = raddr->s6_addr32[2];
381
382 a += JHASH_GOLDEN_RATIO;
383 b += JHASH_GOLDEN_RATIO;
384 c += rnd;
385 __jhash_mix(a, b, c);
386
387 a += raddr->s6_addr32[3];
388 b += (u32) rport;
389 __jhash_mix(a, b, c);
390
391 return c & (TCP_SYNQ_HSIZE - 1);
392}
393
394static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
395 struct open_request ***prevp,
396 __u16 rport,
397 struct in6_addr *raddr,
398 struct in6_addr *laddr,
399 int iif)
400{
401 struct tcp_listen_opt *lopt = tp->listen_opt;
402 struct open_request *req, **prev;
403
404 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
405 (req = *prev) != NULL;
406 prev = &req->dl_next) {
407 if (req->rmt_port == rport &&
408 req->class->family == AF_INET6 &&
409 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
410 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
411 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
412 BUG_TRAP(req->sk == NULL);
413 *prevp = prev;
414 return req;
415 }
416 }
417
418 return NULL;
419}
420
421static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
422 struct in6_addr *saddr,
423 struct in6_addr *daddr,
424 unsigned long base)
425{
426 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
427}
428
429static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
430{
431 if (skb->protocol == htons(ETH_P_IPV6)) {
432 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
433 skb->nh.ipv6h->saddr.s6_addr32,
434 skb->h.th->dest,
435 skb->h.th->source);
436 } else {
437 return secure_tcp_sequence_number(skb->nh.iph->daddr,
438 skb->nh.iph->saddr,
439 skb->h.th->dest,
440 skb->h.th->source);
441 }
442}
443
444static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
445 struct tcp_tw_bucket **twp)
446{
447 struct inet_sock *inet = inet_sk(sk);
448 struct ipv6_pinfo *np = inet6_sk(sk);
449 struct in6_addr *daddr = &np->rcv_saddr;
450 struct in6_addr *saddr = &np->daddr;
451 int dif = sk->sk_bound_dev_if;
452 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
453 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
454 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
455 struct sock *sk2;
456 struct hlist_node *node;
457 struct tcp_tw_bucket *tw;
458
459 write_lock(&head->lock);
460
461 /* Check TIME-WAIT sockets first. */
462 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
463 tw = (struct tcp_tw_bucket*)sk2;
464
465 if(*((__u32 *)&(tw->tw_dport)) == ports &&
466 sk2->sk_family == PF_INET6 &&
467 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
468 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
469 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
470 struct tcp_sock *tp = tcp_sk(sk);
471
472 if (tw->tw_ts_recent_stamp &&
473 (!twp || (sysctl_tcp_tw_reuse &&
474 xtime.tv_sec -
475 tw->tw_ts_recent_stamp > 1))) {
476 /* See comment in tcp_ipv4.c */
477 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
478 if (!tp->write_seq)
479 tp->write_seq = 1;
480 tp->rx_opt.ts_recent = tw->tw_ts_recent;
481 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
482 sock_hold(sk2);
483 goto unique;
484 } else
485 goto not_unique;
486 }
487 }
488 tw = NULL;
489
490 /* And established part... */
491 sk_for_each(sk2, node, &head->chain) {
492 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
493 goto not_unique;
494 }
495
496unique:
497 BUG_TRAP(sk_unhashed(sk));
498 __sk_add_node(sk, &head->chain);
499 sk->sk_hashent = hash;
500 sock_prot_inc_use(sk->sk_prot);
501 write_unlock(&head->lock);
502
503 if (twp) {
504 *twp = tw;
505 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
506 } else if (tw) {
507 /* Silly. Should hash-dance instead... */
508 tcp_tw_deschedule(tw);
509 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
510
511 tcp_tw_put(tw);
512 }
513 return 0;
514
515not_unique:
516 write_unlock(&head->lock);
517 return -EADDRNOTAVAIL;
518}
519
520static inline u32 tcpv6_port_offset(const struct sock *sk)
521{
522 const struct inet_sock *inet = inet_sk(sk);
523 const struct ipv6_pinfo *np = inet6_sk(sk);
524
525 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
526 np->daddr.s6_addr32,
527 inet->dport);
528}
529
530static int tcp_v6_hash_connect(struct sock *sk)
531{
532 unsigned short snum = inet_sk(sk)->num;
533 struct tcp_bind_hashbucket *head;
534 struct tcp_bind_bucket *tb;
535 int ret;
536
537 if (!snum) {
538 int low = sysctl_local_port_range[0];
539 int high = sysctl_local_port_range[1];
540 int range = high - low;
541 int i;
542 int port;
543 static u32 hint;
544 u32 offset = hint + tcpv6_port_offset(sk);
545 struct hlist_node *node;
546 struct tcp_tw_bucket *tw = NULL;
547
548 local_bh_disable();
549 for (i = 1; i <= range; i++) {
550 port = low + (i + offset) % range;
551 head = &tcp_bhash[tcp_bhashfn(port)];
552 spin_lock(&head->lock);
553
554 /* Does not bother with rcv_saddr checks,
555 * because the established check is already
556 * unique enough.
557 */
558 tb_for_each(tb, node, &head->chain) {
559 if (tb->port == port) {
560 BUG_TRAP(!hlist_empty(&tb->owners));
561 if (tb->fastreuse >= 0)
562 goto next_port;
563 if (!__tcp_v6_check_established(sk,
564 port,
565 &tw))
566 goto ok;
567 goto next_port;
568 }
569 }
570
571 tb = tcp_bucket_create(head, port);
572 if (!tb) {
573 spin_unlock(&head->lock);
574 break;
575 }
576 tb->fastreuse = -1;
577 goto ok;
578
579 next_port:
580 spin_unlock(&head->lock);
581 }
582 local_bh_enable();
583
584 return -EADDRNOTAVAIL;
585
586ok:
587 hint += i;
588
589 /* Head lock still held and bh's disabled */
590 tcp_bind_hash(sk, tb, port);
591 if (sk_unhashed(sk)) {
592 inet_sk(sk)->sport = htons(port);
593 __tcp_v6_hash(sk);
594 }
595 spin_unlock(&head->lock);
596
597 if (tw) {
598 tcp_tw_deschedule(tw);
599 tcp_tw_put(tw);
600 }
601
602 ret = 0;
603 goto out;
604 }
605
606 head = &tcp_bhash[tcp_bhashfn(snum)];
607 tb = tcp_sk(sk)->bind_hash;
608 spin_lock_bh(&head->lock);
609
610 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
611 __tcp_v6_hash(sk);
612 spin_unlock_bh(&head->lock);
613 return 0;
614 } else {
615 spin_unlock(&head->lock);
616 /* No definite answer... Walk to established hash table */
617 ret = __tcp_v6_check_established(sk, snum, NULL);
618out:
619 local_bh_enable();
620 return ret;
621 }
622}
623
624static __inline__ int tcp_v6_iif(struct sk_buff *skb)
625{
626 return IP6CB(skb)->iif;
627}
628
629static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
630 int addr_len)
631{
632 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
633 struct inet_sock *inet = inet_sk(sk);
634 struct ipv6_pinfo *np = inet6_sk(sk);
635 struct tcp_sock *tp = tcp_sk(sk);
636 struct in6_addr *saddr = NULL, *final_p = NULL, final;
637 struct flowi fl;
638 struct dst_entry *dst;
639 int addr_type;
640 int err;
641
642 if (addr_len < SIN6_LEN_RFC2133)
643 return -EINVAL;
644
645 if (usin->sin6_family != AF_INET6)
646 return(-EAFNOSUPPORT);
647
648 memset(&fl, 0, sizeof(fl));
649
650 if (np->sndflow) {
651 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
652 IP6_ECN_flow_init(fl.fl6_flowlabel);
653 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
654 struct ip6_flowlabel *flowlabel;
655 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
656 if (flowlabel == NULL)
657 return -EINVAL;
658 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
659 fl6_sock_release(flowlabel);
660 }
661 }
662
663 /*
664 * connect() to INADDR_ANY means loopback (BSD'ism).
665 */
666
667 if(ipv6_addr_any(&usin->sin6_addr))
668 usin->sin6_addr.s6_addr[15] = 0x1;
669
670 addr_type = ipv6_addr_type(&usin->sin6_addr);
671
672 if(addr_type & IPV6_ADDR_MULTICAST)
673 return -ENETUNREACH;
674
675 if (addr_type&IPV6_ADDR_LINKLOCAL) {
676 if (addr_len >= sizeof(struct sockaddr_in6) &&
677 usin->sin6_scope_id) {
678 /* If interface is set while binding, indices
679 * must coincide.
680 */
681 if (sk->sk_bound_dev_if &&
682 sk->sk_bound_dev_if != usin->sin6_scope_id)
683 return -EINVAL;
684
685 sk->sk_bound_dev_if = usin->sin6_scope_id;
686 }
687
688 /* Connect to link-local address requires an interface */
689 if (!sk->sk_bound_dev_if)
690 return -EINVAL;
691 }
692
693 if (tp->rx_opt.ts_recent_stamp &&
694 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
695 tp->rx_opt.ts_recent = 0;
696 tp->rx_opt.ts_recent_stamp = 0;
697 tp->write_seq = 0;
698 }
699
700 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
701 np->flow_label = fl.fl6_flowlabel;
702
703 /*
704 * TCP over IPv4
705 */
706
707 if (addr_type == IPV6_ADDR_MAPPED) {
708 u32 exthdrlen = tp->ext_header_len;
709 struct sockaddr_in sin;
710
711 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
712
713 if (__ipv6_only_sock(sk))
714 return -ENETUNREACH;
715
716 sin.sin_family = AF_INET;
717 sin.sin_port = usin->sin6_port;
718 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
719
720 tp->af_specific = &ipv6_mapped;
721 sk->sk_backlog_rcv = tcp_v4_do_rcv;
722
723 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
724
725 if (err) {
726 tp->ext_header_len = exthdrlen;
727 tp->af_specific = &ipv6_specific;
728 sk->sk_backlog_rcv = tcp_v6_do_rcv;
729 goto failure;
730 } else {
731 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
732 inet->saddr);
733 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
734 inet->rcv_saddr);
735 }
736
737 return err;
738 }
739
740 if (!ipv6_addr_any(&np->rcv_saddr))
741 saddr = &np->rcv_saddr;
742
743 fl.proto = IPPROTO_TCP;
744 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
745 ipv6_addr_copy(&fl.fl6_src,
746 (saddr ? saddr : &np->saddr));
747 fl.oif = sk->sk_bound_dev_if;
748 fl.fl_ip_dport = usin->sin6_port;
749 fl.fl_ip_sport = inet->sport;
750
751 if (np->opt && np->opt->srcrt) {
752 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
753 ipv6_addr_copy(&final, &fl.fl6_dst);
754 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
755 final_p = &final;
756 }
757
758 err = ip6_dst_lookup(sk, &dst, &fl);
759 if (err)
760 goto failure;
761 if (final_p)
762 ipv6_addr_copy(&fl.fl6_dst, final_p);
763
764 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
765 dst_release(dst);
766 goto failure;
767 }
768
769 if (saddr == NULL) {
770 saddr = &fl.fl6_src;
771 ipv6_addr_copy(&np->rcv_saddr, saddr);
772 }
773
774 /* set the source address */
775 ipv6_addr_copy(&np->saddr, saddr);
776 inet->rcv_saddr = LOOPBACK4_IPV6;
777
778 ip6_dst_store(sk, dst, NULL);
779 sk->sk_route_caps = dst->dev->features &
780 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
781
782 tp->ext_header_len = 0;
783 if (np->opt)
784 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
785
786 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
787
788 inet->dport = usin->sin6_port;
789
790 tcp_set_state(sk, TCP_SYN_SENT);
791 err = tcp_v6_hash_connect(sk);
792 if (err)
793 goto late_failure;
794
795 if (!tp->write_seq)
796 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
797 np->daddr.s6_addr32,
798 inet->sport,
799 inet->dport);
800
801 err = tcp_connect(sk);
802 if (err)
803 goto late_failure;
804
805 return 0;
806
807late_failure:
808 tcp_set_state(sk, TCP_CLOSE);
809 __sk_dst_reset(sk);
810failure:
811 inet->dport = 0;
812 sk->sk_route_caps = 0;
813 return err;
814}
815
816static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
817 int type, int code, int offset, __u32 info)
818{
819 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
820 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
821 struct ipv6_pinfo *np;
822 struct sock *sk;
823 int err;
824 struct tcp_sock *tp;
825 __u32 seq;
826
827 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
828
829 if (sk == NULL) {
830 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
831 return;
832 }
833
834 if (sk->sk_state == TCP_TIME_WAIT) {
835 tcp_tw_put((struct tcp_tw_bucket*)sk);
836 return;
837 }
838
839 bh_lock_sock(sk);
840 if (sock_owned_by_user(sk))
841 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
842
843 if (sk->sk_state == TCP_CLOSE)
844 goto out;
845
846 tp = tcp_sk(sk);
847 seq = ntohl(th->seq);
848 if (sk->sk_state != TCP_LISTEN &&
849 !between(seq, tp->snd_una, tp->snd_nxt)) {
850 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
851 goto out;
852 }
853
854 np = inet6_sk(sk);
855
856 if (type == ICMPV6_PKT_TOOBIG) {
857 struct dst_entry *dst = NULL;
858
859 if (sock_owned_by_user(sk))
860 goto out;
861 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
862 goto out;
863
864 /* icmp should have updated the destination cache entry */
865 dst = __sk_dst_check(sk, np->dst_cookie);
866
867 if (dst == NULL) {
868 struct inet_sock *inet = inet_sk(sk);
869 struct flowi fl;
870
871 /* BUGGG_FUTURE: Again, it is not clear how
872 to handle rthdr case. Ignore this complexity
873 for now.
874 */
875 memset(&fl, 0, sizeof(fl));
876 fl.proto = IPPROTO_TCP;
877 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
878 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
879 fl.oif = sk->sk_bound_dev_if;
880 fl.fl_ip_dport = inet->dport;
881 fl.fl_ip_sport = inet->sport;
882
883 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
884 sk->sk_err_soft = -err;
885 goto out;
886 }
887
888 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
889 sk->sk_err_soft = -err;
890 goto out;
891 }
892
893 } else
894 dst_hold(dst);
895
896 if (tp->pmtu_cookie > dst_mtu(dst)) {
897 tcp_sync_mss(sk, dst_mtu(dst));
898 tcp_simple_retransmit(sk);
899 } /* else let the usual retransmit timer handle it */
900 dst_release(dst);
901 goto out;
902 }
903
904 icmpv6_err_convert(type, code, &err);
905
906 /* Might be for an open_request */
907 switch (sk->sk_state) {
908 struct open_request *req, **prev;
909 case TCP_LISTEN:
910 if (sock_owned_by_user(sk))
911 goto out;
912
913 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
914 &hdr->saddr, tcp_v6_iif(skb));
915 if (!req)
916 goto out;
917
918 /* ICMPs are not backlogged, hence we cannot get
919 * an established socket here.
920 */
921 BUG_TRAP(req->sk == NULL);
922
923 if (seq != req->snt_isn) {
924 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
925 goto out;
926 }
927
928 tcp_synq_drop(sk, req, prev);
929 goto out;
930
931 case TCP_SYN_SENT:
932 case TCP_SYN_RECV: /* Cannot happen.
933 It can, it SYNs are crossed. --ANK */
934 if (!sock_owned_by_user(sk)) {
935 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
936 sk->sk_err = err;
937 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
938
939 tcp_done(sk);
940 } else
941 sk->sk_err_soft = err;
942 goto out;
943 }
944
945 if (!sock_owned_by_user(sk) && np->recverr) {
946 sk->sk_err = err;
947 sk->sk_error_report(sk);
948 } else
949 sk->sk_err_soft = err;
950
951out:
952 bh_unlock_sock(sk);
953 sock_put(sk);
954}
955
956
957static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
958 struct dst_entry *dst)
959{
960 struct ipv6_pinfo *np = inet6_sk(sk);
961 struct sk_buff * skb;
962 struct ipv6_txoptions *opt = NULL;
963 struct in6_addr * final_p = NULL, final;
964 struct flowi fl;
965 int err = -1;
966
967 memset(&fl, 0, sizeof(fl));
968 fl.proto = IPPROTO_TCP;
969 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
970 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
971 fl.fl6_flowlabel = 0;
972 fl.oif = req->af.v6_req.iif;
973 fl.fl_ip_dport = req->rmt_port;
974 fl.fl_ip_sport = inet_sk(sk)->sport;
975
976 if (dst == NULL) {
977 opt = np->opt;
978 if (opt == NULL &&
979 np->rxopt.bits.srcrt == 2 &&
980 req->af.v6_req.pktopts) {
981 struct sk_buff *pktopts = req->af.v6_req.pktopts;
982 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
983 if (rxopt->srcrt)
984 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
985 }
986
987 if (opt && opt->srcrt) {
988 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
989 ipv6_addr_copy(&final, &fl.fl6_dst);
990 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
991 final_p = &final;
992 }
993
994 err = ip6_dst_lookup(sk, &dst, &fl);
995 if (err)
996 goto done;
997 if (final_p)
998 ipv6_addr_copy(&fl.fl6_dst, final_p);
999 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1000 goto done;
1001 }
1002
1003 skb = tcp_make_synack(sk, dst, req);
1004 if (skb) {
1005 struct tcphdr *th = skb->h.th;
1006
1007 th->check = tcp_v6_check(th, skb->len,
1008 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
1009 csum_partial((char *)th, skb->len, skb->csum));
1010
1011 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1012 err = ip6_xmit(sk, skb, &fl, opt, 0);
1013 if (err == NET_XMIT_CN)
1014 err = 0;
1015 }
1016
1017done:
1018 dst_release(dst);
1019 if (opt && opt != np->opt)
1020 sock_kfree_s(sk, opt, opt->tot_len);
1021 return err;
1022}
1023
1024static void tcp_v6_or_free(struct open_request *req)
1025{
1026 if (req->af.v6_req.pktopts)
1027 kfree_skb(req->af.v6_req.pktopts);
1028}
1029
1030static struct or_calltable or_ipv6 = {
1031 .family = AF_INET6,
1032 .rtx_syn_ack = tcp_v6_send_synack,
1033 .send_ack = tcp_v6_or_send_ack,
1034 .destructor = tcp_v6_or_free,
1035 .send_reset = tcp_v6_send_reset
1036};
1037
1038static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1039{
1040 struct ipv6_pinfo *np = inet6_sk(sk);
1041 struct inet6_skb_parm *opt = IP6CB(skb);
1042
1043 if (np->rxopt.all) {
1044 if ((opt->hop && np->rxopt.bits.hopopts) ||
1045 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1046 np->rxopt.bits.rxflow) ||
1047 (opt->srcrt && np->rxopt.bits.srcrt) ||
1048 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1049 return 1;
1050 }
1051 return 0;
1052}
1053
1054
1055static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1056 struct sk_buff *skb)
1057{
1058 struct ipv6_pinfo *np = inet6_sk(sk);
1059
1060 if (skb->ip_summed == CHECKSUM_HW) {
1061 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1062 skb->csum = offsetof(struct tcphdr, check);
1063 } else {
1064 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1065 csum_partial((char *)th, th->doff<<2,
1066 skb->csum));
1067 }
1068}
1069
1070
1071static void tcp_v6_send_reset(struct sk_buff *skb)
1072{
1073 struct tcphdr *th = skb->h.th, *t1;
1074 struct sk_buff *buff;
1075 struct flowi fl;
1076
1077 if (th->rst)
1078 return;
1079
1080 if (!ipv6_unicast_destination(skb))
1081 return;
1082
1083 /*
1084 * We need to grab some memory, and put together an RST,
1085 * and then put it into the queue to be sent.
1086 */
1087
1088 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1089 GFP_ATOMIC);
1090 if (buff == NULL)
1091 return;
1092
1093 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1094
1095 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1096
1097 /* Swap the send and the receive. */
1098 memset(t1, 0, sizeof(*t1));
1099 t1->dest = th->source;
1100 t1->source = th->dest;
1101 t1->doff = sizeof(*t1)/4;
1102 t1->rst = 1;
1103
1104 if(th->ack) {
1105 t1->seq = th->ack_seq;
1106 } else {
1107 t1->ack = 1;
1108 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1109 + skb->len - (th->doff<<2));
1110 }
1111
1112 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1113
1114 memset(&fl, 0, sizeof(fl));
1115 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1116 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1117
1118 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1119 sizeof(*t1), IPPROTO_TCP,
1120 buff->csum);
1121
1122 fl.proto = IPPROTO_TCP;
1123 fl.oif = tcp_v6_iif(skb);
1124 fl.fl_ip_dport = t1->dest;
1125 fl.fl_ip_sport = t1->source;
1126
1127 /* sk = NULL, but it is safe for now. RST socket required. */
1128 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1129
1130 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1131 dst_release(buff->dst);
1132 return;
1133 }
1134
1135 ip6_xmit(NULL, buff, &fl, NULL, 0);
1136 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1137 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1138 return;
1139 }
1140
1141 kfree_skb(buff);
1142}
1143
1144static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1145{
1146 struct tcphdr *th = skb->h.th, *t1;
1147 struct sk_buff *buff;
1148 struct flowi fl;
1149 int tot_len = sizeof(struct tcphdr);
1150
1151 if (ts)
1152 tot_len += 3*4;
1153
1154 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1155 GFP_ATOMIC);
1156 if (buff == NULL)
1157 return;
1158
1159 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1160
1161 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1162
1163 /* Swap the send and the receive. */
1164 memset(t1, 0, sizeof(*t1));
1165 t1->dest = th->source;
1166 t1->source = th->dest;
1167 t1->doff = tot_len/4;
1168 t1->seq = htonl(seq);
1169 t1->ack_seq = htonl(ack);
1170 t1->ack = 1;
1171 t1->window = htons(win);
1172
1173 if (ts) {
1174 u32 *ptr = (u32*)(t1 + 1);
1175 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1176 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1177 *ptr++ = htonl(tcp_time_stamp);
1178 *ptr = htonl(ts);
1179 }
1180
1181 buff->csum = csum_partial((char *)t1, tot_len, 0);
1182
1183 memset(&fl, 0, sizeof(fl));
1184 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1185 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1186
1187 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1188 tot_len, IPPROTO_TCP,
1189 buff->csum);
1190
1191 fl.proto = IPPROTO_TCP;
1192 fl.oif = tcp_v6_iif(skb);
1193 fl.fl_ip_dport = t1->dest;
1194 fl.fl_ip_sport = t1->source;
1195
1196 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1197 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1198 dst_release(buff->dst);
1199 return;
1200 }
1201 ip6_xmit(NULL, buff, &fl, NULL, 0);
1202 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1203 return;
1204 }
1205
1206 kfree_skb(buff);
1207}
1208
1209static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1210{
1211 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1212
1213 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1214 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1215
1216 tcp_tw_put(tw);
1217}
1218
1219static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1220{
1221 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1222}
1223
1224
1225static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1226{
1227 struct open_request *req, **prev;
1228 struct tcphdr *th = skb->h.th;
1229 struct tcp_sock *tp = tcp_sk(sk);
1230 struct sock *nsk;
1231
1232 /* Find possible connection requests. */
1233 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1234 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1235 if (req)
1236 return tcp_check_req(sk, skb, req, prev);
1237
1238 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1239 th->source,
1240 &skb->nh.ipv6h->daddr,
1241 ntohs(th->dest),
1242 tcp_v6_iif(skb));
1243
1244 if (nsk) {
1245 if (nsk->sk_state != TCP_TIME_WAIT) {
1246 bh_lock_sock(nsk);
1247 return nsk;
1248 }
1249 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1250 return NULL;
1251 }
1252
1253#if 0 /*def CONFIG_SYN_COOKIES*/
1254 if (!th->rst && !th->syn && th->ack)
1255 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1256#endif
1257 return sk;
1258}
1259
1260static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1261{
1262 struct tcp_sock *tp = tcp_sk(sk);
1263 struct tcp_listen_opt *lopt = tp->listen_opt;
1264 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1265
1266 req->sk = NULL;
1267 req->expires = jiffies + TCP_TIMEOUT_INIT;
1268 req->retrans = 0;
1269 req->dl_next = lopt->syn_table[h];
1270
1271 write_lock(&tp->syn_wait_lock);
1272 lopt->syn_table[h] = req;
1273 write_unlock(&tp->syn_wait_lock);
1274
1275 tcp_synq_added(sk);
1276}
1277
1278
1279/* FIXME: this is substantially similar to the ipv4 code.
1280 * Can some kind of merge be done? -- erics
1281 */
1282static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1283{
1284 struct ipv6_pinfo *np = inet6_sk(sk);
1285 struct tcp_options_received tmp_opt;
1286 struct tcp_sock *tp = tcp_sk(sk);
1287 struct open_request *req = NULL;
1288 __u32 isn = TCP_SKB_CB(skb)->when;
1289
1290 if (skb->protocol == htons(ETH_P_IP))
1291 return tcp_v4_conn_request(sk, skb);
1292
1293 if (!ipv6_unicast_destination(skb))
1294 goto drop;
1295
1296 /*
1297 * There are no SYN attacks on IPv6, yet...
1298 */
1299 if (tcp_synq_is_full(sk) && !isn) {
1300 if (net_ratelimit())
1301 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1302 goto drop;
1303 }
1304
1305 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1306 goto drop;
1307
1308 req = tcp_openreq_alloc();
1309 if (req == NULL)
1310 goto drop;
1311
1312 tcp_clear_options(&tmp_opt);
1313 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1314 tmp_opt.user_mss = tp->rx_opt.user_mss;
1315
1316 tcp_parse_options(skb, &tmp_opt, 0);
1317
1318 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1319 tcp_openreq_init(req, &tmp_opt, skb);
1320
1321 req->class = &or_ipv6;
1322 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1323 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1324 TCP_ECN_create_request(req, skb->h.th);
1325 req->af.v6_req.pktopts = NULL;
1326 if (ipv6_opt_accepted(sk, skb) ||
1327 np->rxopt.bits.rxinfo ||
1328 np->rxopt.bits.rxhlim) {
1329 atomic_inc(&skb->users);
1330 req->af.v6_req.pktopts = skb;
1331 }
1332 req->af.v6_req.iif = sk->sk_bound_dev_if;
1333
1334 /* So that link locals have meaning */
1335 if (!sk->sk_bound_dev_if &&
1336 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1337 req->af.v6_req.iif = tcp_v6_iif(skb);
1338
1339 if (isn == 0)
1340 isn = tcp_v6_init_sequence(sk,skb);
1341
1342 req->snt_isn = isn;
1343
1344 if (tcp_v6_send_synack(sk, req, NULL))
1345 goto drop;
1346
1347 tcp_v6_synq_add(sk, req);
1348
1349 return 0;
1350
1351drop:
1352 if (req)
1353 tcp_openreq_free(req);
1354
1355 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1356 return 0; /* don't send reset */
1357}
1358
1359static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1360 struct open_request *req,
1361 struct dst_entry *dst)
1362{
1363 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1364 struct tcp6_sock *newtcp6sk;
1365 struct inet_sock *newinet;
1366 struct tcp_sock *newtp;
1367 struct sock *newsk;
1368 struct ipv6_txoptions *opt;
1369
1370 if (skb->protocol == htons(ETH_P_IP)) {
1371 /*
1372 * v6 mapped
1373 */
1374
1375 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1376
1377 if (newsk == NULL)
1378 return NULL;
1379
1380 newtcp6sk = (struct tcp6_sock *)newsk;
1381 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1382
1383 newinet = inet_sk(newsk);
1384 newnp = inet6_sk(newsk);
1385 newtp = tcp_sk(newsk);
1386
1387 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1388
1389 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1390 newinet->daddr);
1391
1392 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1393 newinet->saddr);
1394
1395 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1396
1397 newtp->af_specific = &ipv6_mapped;
1398 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1399 newnp->pktoptions = NULL;
1400 newnp->opt = NULL;
1401 newnp->mcast_oif = tcp_v6_iif(skb);
1402 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1403
1404 /* Charge newly allocated IPv6 socket. Though it is mapped,
1405 * it is IPv6 yet.
1406 */
1407#ifdef INET_REFCNT_DEBUG
1408 atomic_inc(&inet6_sock_nr);
1409#endif
1410
1411 /* It is tricky place. Until this moment IPv4 tcp
1412 worked with IPv6 af_tcp.af_specific.
1413 Sync it now.
1414 */
1415 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1416
1417 return newsk;
1418 }
1419
1420 opt = np->opt;
1421
1422 if (sk_acceptq_is_full(sk))
1423 goto out_overflow;
1424
1425 if (np->rxopt.bits.srcrt == 2 &&
1426 opt == NULL && req->af.v6_req.pktopts) {
1427 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1428 if (rxopt->srcrt)
1429 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1430 }
1431
1432 if (dst == NULL) {
1433 struct in6_addr *final_p = NULL, final;
1434 struct flowi fl;
1435
1436 memset(&fl, 0, sizeof(fl));
1437 fl.proto = IPPROTO_TCP;
1438 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1439 if (opt && opt->srcrt) {
1440 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1441 ipv6_addr_copy(&final, &fl.fl6_dst);
1442 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1443 final_p = &final;
1444 }
1445 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1446 fl.oif = sk->sk_bound_dev_if;
1447 fl.fl_ip_dport = req->rmt_port;
1448 fl.fl_ip_sport = inet_sk(sk)->sport;
1449
1450 if (ip6_dst_lookup(sk, &dst, &fl))
1451 goto out;
1452
1453 if (final_p)
1454 ipv6_addr_copy(&fl.fl6_dst, final_p);
1455
1456 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1457 goto out;
1458 }
1459
1460 newsk = tcp_create_openreq_child(sk, req, skb);
1461 if (newsk == NULL)
1462 goto out;
1463
1464 /* Charge newly allocated IPv6 socket */
1465#ifdef INET_REFCNT_DEBUG
1466 atomic_inc(&inet6_sock_nr);
1467#endif
1468
1469 ip6_dst_store(newsk, dst, NULL);
1470 newsk->sk_route_caps = dst->dev->features &
1471 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1472
1473 newtcp6sk = (struct tcp6_sock *)newsk;
1474 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1475
1476 newtp = tcp_sk(newsk);
1477 newinet = inet_sk(newsk);
1478 newnp = inet6_sk(newsk);
1479
1480 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1481
1482 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1483 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1484 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1485 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1486
1487 /* Now IPv6 options...
1488
1489 First: no IPv4 options.
1490 */
1491 newinet->opt = NULL;
1492
1493 /* Clone RX bits */
1494 newnp->rxopt.all = np->rxopt.all;
1495
1496 /* Clone pktoptions received with SYN */
1497 newnp->pktoptions = NULL;
1498 if (req->af.v6_req.pktopts) {
1499 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1500 GFP_ATOMIC);
1501 kfree_skb(req->af.v6_req.pktopts);
1502 req->af.v6_req.pktopts = NULL;
1503 if (newnp->pktoptions)
1504 skb_set_owner_r(newnp->pktoptions, newsk);
1505 }
1506 newnp->opt = NULL;
1507 newnp->mcast_oif = tcp_v6_iif(skb);
1508 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1509
1510 /* Clone native IPv6 options from listening socket (if any)
1511
1512 Yes, keeping reference count would be much more clever,
1513 but we make one more one thing there: reattach optmem
1514 to newsk.
1515 */
1516 if (opt) {
1517 newnp->opt = ipv6_dup_options(newsk, opt);
1518 if (opt != np->opt)
1519 sock_kfree_s(sk, opt, opt->tot_len);
1520 }
1521
1522 newtp->ext_header_len = 0;
1523 if (newnp->opt)
1524 newtp->ext_header_len = newnp->opt->opt_nflen +
1525 newnp->opt->opt_flen;
1526
1527 tcp_sync_mss(newsk, dst_mtu(dst));
1528 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1529 tcp_initialize_rcv_mss(newsk);
1530
1531 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1532
1533 __tcp_v6_hash(newsk);
1534 tcp_inherit_port(sk, newsk);
1535
1536 return newsk;
1537
1538out_overflow:
1539 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1540out:
1541 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1542 if (opt && opt != np->opt)
1543 sock_kfree_s(sk, opt, opt->tot_len);
1544 dst_release(dst);
1545 return NULL;
1546}
1547
1548static int tcp_v6_checksum_init(struct sk_buff *skb)
1549{
1550 if (skb->ip_summed == CHECKSUM_HW) {
1551 skb->ip_summed = CHECKSUM_UNNECESSARY;
1552 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1553 &skb->nh.ipv6h->daddr,skb->csum))
1554 return 0;
1555 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1556 }
1557 if (skb->len <= 76) {
1558 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1559 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1560 return -1;
1561 skb->ip_summed = CHECKSUM_UNNECESSARY;
1562 } else {
1563 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564 &skb->nh.ipv6h->daddr,0);
1565 }
1566 return 0;
1567}
1568
1569/* The socket must have it's spinlock held when we get
1570 * here.
1571 *
1572 * We have a potential double-lock case here, so even when
1573 * doing backlog processing we use the BH locking scheme.
1574 * This is because we cannot sleep with the original spinlock
1575 * held.
1576 */
1577static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1578{
1579 struct ipv6_pinfo *np = inet6_sk(sk);
1580 struct tcp_sock *tp;
1581 struct sk_buff *opt_skb = NULL;
1582
1583 /* Imagine: socket is IPv6. IPv4 packet arrives,
1584 goes to IPv4 receive handler and backlogged.
1585 From backlog it always goes here. Kerboom...
1586 Fortunately, tcp_rcv_established and rcv_established
1587 handle them correctly, but it is not case with
1588 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1589 */
1590
1591 if (skb->protocol == htons(ETH_P_IP))
1592 return tcp_v4_do_rcv(sk, skb);
1593
1594 if (sk_filter(sk, skb, 0))
1595 goto discard;
1596
1597 /*
1598 * socket locking is here for SMP purposes as backlog rcv
1599 * is currently called with bh processing disabled.
1600 */
1601
1602 /* Do Stevens' IPV6_PKTOPTIONS.
1603
1604 Yes, guys, it is the only place in our code, where we
1605 may make it not affecting IPv4.
1606 The rest of code is protocol independent,
1607 and I do not like idea to uglify IPv4.
1608
1609 Actually, all the idea behind IPV6_PKTOPTIONS
1610 looks not very well thought. For now we latch
1611 options, received in the last packet, enqueued
1612 by tcp. Feel free to propose better solution.
1613 --ANK (980728)
1614 */
1615 if (np->rxopt.all)
1616 opt_skb = skb_clone(skb, GFP_ATOMIC);
1617
1618 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1619 TCP_CHECK_TIMER(sk);
1620 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1621 goto reset;
1622 TCP_CHECK_TIMER(sk);
1623 if (opt_skb)
1624 goto ipv6_pktoptions;
1625 return 0;
1626 }
1627
1628 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1629 goto csum_err;
1630
1631 if (sk->sk_state == TCP_LISTEN) {
1632 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1633 if (!nsk)
1634 goto discard;
1635
1636 /*
1637 * Queue it on the new socket if the new socket is active,
1638 * otherwise we just shortcircuit this and continue with
1639 * the new socket..
1640 */
1641 if(nsk != sk) {
1642 if (tcp_child_process(sk, nsk, skb))
1643 goto reset;
1644 if (opt_skb)
1645 __kfree_skb(opt_skb);
1646 return 0;
1647 }
1648 }
1649
1650 TCP_CHECK_TIMER(sk);
1651 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1652 goto reset;
1653 TCP_CHECK_TIMER(sk);
1654 if (opt_skb)
1655 goto ipv6_pktoptions;
1656 return 0;
1657
1658reset:
1659 tcp_v6_send_reset(skb);
1660discard:
1661 if (opt_skb)
1662 __kfree_skb(opt_skb);
1663 kfree_skb(skb);
1664 return 0;
1665csum_err:
1666 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1667 goto discard;
1668
1669
1670ipv6_pktoptions:
1671 /* Do you ask, what is it?
1672
1673 1. skb was enqueued by tcp.
1674 2. skb is added to tail of read queue, rather than out of order.
1675 3. socket is not in passive state.
1676 4. Finally, it really contains options, which user wants to receive.
1677 */
1678 tp = tcp_sk(sk);
1679 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1680 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1681 if (np->rxopt.bits.rxinfo)
1682 np->mcast_oif = tcp_v6_iif(opt_skb);
1683 if (np->rxopt.bits.rxhlim)
1684 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1685 if (ipv6_opt_accepted(sk, opt_skb)) {
1686 skb_set_owner_r(opt_skb, sk);
1687 opt_skb = xchg(&np->pktoptions, opt_skb);
1688 } else {
1689 __kfree_skb(opt_skb);
1690 opt_skb = xchg(&np->pktoptions, NULL);
1691 }
1692 }
1693
1694 if (opt_skb)
1695 kfree_skb(opt_skb);
1696 return 0;
1697}
1698
1699static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1700{
1701 struct sk_buff *skb = *pskb;
1702 struct tcphdr *th;
1703 struct sock *sk;
1704 int ret;
1705
1706 if (skb->pkt_type != PACKET_HOST)
1707 goto discard_it;
1708
1709 /*
1710 * Count it even if it's bad.
1711 */
1712 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1713
1714 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1715 goto discard_it;
1716
1717 th = skb->h.th;
1718
1719 if (th->doff < sizeof(struct tcphdr)/4)
1720 goto bad_packet;
1721 if (!pskb_may_pull(skb, th->doff*4))
1722 goto discard_it;
1723
1724 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1725 tcp_v6_checksum_init(skb) < 0))
1726 goto bad_packet;
1727
1728 th = skb->h.th;
1729 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1730 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1731 skb->len - th->doff*4);
1732 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1733 TCP_SKB_CB(skb)->when = 0;
1734 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1735 TCP_SKB_CB(skb)->sacked = 0;
1736
1737 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1738 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1739
1740 if (!sk)
1741 goto no_tcp_socket;
1742
1743process:
1744 if (sk->sk_state == TCP_TIME_WAIT)
1745 goto do_time_wait;
1746
1747 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1748 goto discard_and_relse;
1749
1750 if (sk_filter(sk, skb, 0))
1751 goto discard_and_relse;
1752
1753 skb->dev = NULL;
1754
1755 bh_lock_sock(sk);
1756 ret = 0;
1757 if (!sock_owned_by_user(sk)) {
1758 if (!tcp_prequeue(sk, skb))
1759 ret = tcp_v6_do_rcv(sk, skb);
1760 } else
1761 sk_add_backlog(sk, skb);
1762 bh_unlock_sock(sk);
1763
1764 sock_put(sk);
1765 return ret ? -1 : 0;
1766
1767no_tcp_socket:
1768 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1769 goto discard_it;
1770
1771 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1772bad_packet:
1773 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1774 } else {
1775 tcp_v6_send_reset(skb);
1776 }
1777
1778discard_it:
1779
1780 /*
1781 * Discard frame
1782 */
1783
1784 kfree_skb(skb);
1785 return 0;
1786
1787discard_and_relse:
1788 sock_put(sk);
1789 goto discard_it;
1790
1791do_time_wait:
1792 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1793 tcp_tw_put((struct tcp_tw_bucket *) sk);
1794 goto discard_it;
1795 }
1796
1797 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1798 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1799 tcp_tw_put((struct tcp_tw_bucket *) sk);
1800 goto discard_it;
1801 }
1802
1803 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1804 skb, th, skb->len)) {
1805 case TCP_TW_SYN:
1806 {
1807 struct sock *sk2;
1808
1809 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1810 if (sk2 != NULL) {
1811 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1812 tcp_tw_put((struct tcp_tw_bucket *)sk);
1813 sk = sk2;
1814 goto process;
1815 }
1816 /* Fall through to ACK */
1817 }
1818 case TCP_TW_ACK:
1819 tcp_v6_timewait_ack(sk, skb);
1820 break;
1821 case TCP_TW_RST:
1822 goto no_tcp_socket;
1823 case TCP_TW_SUCCESS:;
1824 }
1825 goto discard_it;
1826}
1827
1828static int tcp_v6_rebuild_header(struct sock *sk)
1829{
1830 int err;
1831 struct dst_entry *dst;
1832 struct ipv6_pinfo *np = inet6_sk(sk);
1833
1834 dst = __sk_dst_check(sk, np->dst_cookie);
1835
1836 if (dst == NULL) {
1837 struct inet_sock *inet = inet_sk(sk);
1838 struct in6_addr *final_p = NULL, final;
1839 struct flowi fl;
1840
1841 memset(&fl, 0, sizeof(fl));
1842 fl.proto = IPPROTO_TCP;
1843 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1844 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1845 fl.fl6_flowlabel = np->flow_label;
1846 fl.oif = sk->sk_bound_dev_if;
1847 fl.fl_ip_dport = inet->dport;
1848 fl.fl_ip_sport = inet->sport;
1849
1850 if (np->opt && np->opt->srcrt) {
1851 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1852 ipv6_addr_copy(&final, &fl.fl6_dst);
1853 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1854 final_p = &final;
1855 }
1856
1857 err = ip6_dst_lookup(sk, &dst, &fl);
1858 if (err) {
1859 sk->sk_route_caps = 0;
1860 return err;
1861 }
1862 if (final_p)
1863 ipv6_addr_copy(&fl.fl6_dst, final_p);
1864
1865 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1866 sk->sk_err_soft = -err;
1867 dst_release(dst);
1868 return err;
1869 }
1870
1871 ip6_dst_store(sk, dst, NULL);
1872 sk->sk_route_caps = dst->dev->features &
1873 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1874 }
1875
1876 return 0;
1877}
1878
1879static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1880{
1881 struct sock *sk = skb->sk;
1882 struct inet_sock *inet = inet_sk(sk);
1883 struct ipv6_pinfo *np = inet6_sk(sk);
1884 struct flowi fl;
1885 struct dst_entry *dst;
1886 struct in6_addr *final_p = NULL, final;
1887
1888 memset(&fl, 0, sizeof(fl));
1889 fl.proto = IPPROTO_TCP;
1890 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1891 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1892 fl.fl6_flowlabel = np->flow_label;
1893 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1894 fl.oif = sk->sk_bound_dev_if;
1895 fl.fl_ip_sport = inet->sport;
1896 fl.fl_ip_dport = inet->dport;
1897
1898 if (np->opt && np->opt->srcrt) {
1899 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1900 ipv6_addr_copy(&final, &fl.fl6_dst);
1901 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1902 final_p = &final;
1903 }
1904
1905 dst = __sk_dst_check(sk, np->dst_cookie);
1906
1907 if (dst == NULL) {
1908 int err = ip6_dst_lookup(sk, &dst, &fl);
1909
1910 if (err) {
1911 sk->sk_err_soft = -err;
1912 return err;
1913 }
1914
1915 if (final_p)
1916 ipv6_addr_copy(&fl.fl6_dst, final_p);
1917
1918 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1919 sk->sk_route_caps = 0;
1920 dst_release(dst);
1921 return err;
1922 }
1923
1924 ip6_dst_store(sk, dst, NULL);
1925 sk->sk_route_caps = dst->dev->features &
1926 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1927 }
1928
1929 skb->dst = dst_clone(dst);
1930
1931 /* Restore final destination back after routing done */
1932 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1933
1934 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1935}
1936
1937static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1938{
1939 struct ipv6_pinfo *np = inet6_sk(sk);
1940 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1941
1942 sin6->sin6_family = AF_INET6;
1943 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1944 sin6->sin6_port = inet_sk(sk)->dport;
1945 /* We do not store received flowlabel for TCP */
1946 sin6->sin6_flowinfo = 0;
1947 sin6->sin6_scope_id = 0;
1948 if (sk->sk_bound_dev_if &&
1949 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1950 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1951}
1952
1953static int tcp_v6_remember_stamp(struct sock *sk)
1954{
1955 /* Alas, not yet... */
1956 return 0;
1957}
1958
1959static struct tcp_func ipv6_specific = {
1960 .queue_xmit = tcp_v6_xmit,
1961 .send_check = tcp_v6_send_check,
1962 .rebuild_header = tcp_v6_rebuild_header,
1963 .conn_request = tcp_v6_conn_request,
1964 .syn_recv_sock = tcp_v6_syn_recv_sock,
1965 .remember_stamp = tcp_v6_remember_stamp,
1966 .net_header_len = sizeof(struct ipv6hdr),
1967
1968 .setsockopt = ipv6_setsockopt,
1969 .getsockopt = ipv6_getsockopt,
1970 .addr2sockaddr = v6_addr2sockaddr,
1971 .sockaddr_len = sizeof(struct sockaddr_in6)
1972};
1973
1974/*
1975 * TCP over IPv4 via INET6 API
1976 */
1977
1978static struct tcp_func ipv6_mapped = {
1979 .queue_xmit = ip_queue_xmit,
1980 .send_check = tcp_v4_send_check,
1981 .rebuild_header = tcp_v4_rebuild_header,
1982 .conn_request = tcp_v6_conn_request,
1983 .syn_recv_sock = tcp_v6_syn_recv_sock,
1984 .remember_stamp = tcp_v4_remember_stamp,
1985 .net_header_len = sizeof(struct iphdr),
1986
1987 .setsockopt = ipv6_setsockopt,
1988 .getsockopt = ipv6_getsockopt,
1989 .addr2sockaddr = v6_addr2sockaddr,
1990 .sockaddr_len = sizeof(struct sockaddr_in6)
1991};
1992
1993
1994
1995/* NOTE: A lot of things set to zero explicitly by call to
1996 * sk_alloc() so need not be done here.
1997 */
1998static int tcp_v6_init_sock(struct sock *sk)
1999{
2000 struct tcp_sock *tp = tcp_sk(sk);
2001
2002 skb_queue_head_init(&tp->out_of_order_queue);
2003 tcp_init_xmit_timers(sk);
2004 tcp_prequeue_init(tp);
2005
2006 tp->rto = TCP_TIMEOUT_INIT;
2007 tp->mdev = TCP_TIMEOUT_INIT;
2008
2009 /* So many TCP implementations out there (incorrectly) count the
2010 * initial SYN frame in their delayed-ACK and congestion control
2011 * algorithms that we must have the following bandaid to talk
2012 * efficiently to them. -DaveM
2013 */
2014 tp->snd_cwnd = 2;
2015
2016 /* See draft-stevens-tcpca-spec-01 for discussion of the
2017 * initialization of these values.
2018 */
2019 tp->snd_ssthresh = 0x7fffffff;
2020 tp->snd_cwnd_clamp = ~0;
2021 tp->mss_cache_std = tp->mss_cache = 536;
2022
2023 tp->reordering = sysctl_tcp_reordering;
2024
2025 sk->sk_state = TCP_CLOSE;
2026
2027 tp->af_specific = &ipv6_specific;
2028
2029 sk->sk_write_space = sk_stream_write_space;
2030 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2031
2032 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2033 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2034
2035 atomic_inc(&tcp_sockets_allocated);
2036
2037 return 0;
2038}
2039
2040static int tcp_v6_destroy_sock(struct sock *sk)
2041{
2042 extern int tcp_v4_destroy_sock(struct sock *sk);
2043
2044 tcp_v4_destroy_sock(sk);
2045 return inet6_destroy_sock(sk);
2046}
2047
2048/* Proc filesystem TCPv6 sock list dumping. */
2049static void get_openreq6(struct seq_file *seq,
2050 struct sock *sk, struct open_request *req, int i, int uid)
2051{
2052 struct in6_addr *dest, *src;
2053 int ttd = req->expires - jiffies;
2054
2055 if (ttd < 0)
2056 ttd = 0;
2057
2058 src = &req->af.v6_req.loc_addr;
2059 dest = &req->af.v6_req.rmt_addr;
2060 seq_printf(seq,
2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2063 i,
2064 src->s6_addr32[0], src->s6_addr32[1],
2065 src->s6_addr32[2], src->s6_addr32[3],
2066 ntohs(inet_sk(sk)->sport),
2067 dest->s6_addr32[0], dest->s6_addr32[1],
2068 dest->s6_addr32[2], dest->s6_addr32[3],
2069 ntohs(req->rmt_port),
2070 TCP_SYN_RECV,
2071 0,0, /* could print option size, but that is af dependent. */
2072 1, /* timers active (only the expire timer) */
2073 jiffies_to_clock_t(ttd),
2074 req->retrans,
2075 uid,
2076 0, /* non standard timer */
2077 0, /* open_requests have no inode */
2078 0, req);
2079}
2080
2081static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2082{
2083 struct in6_addr *dest, *src;
2084 __u16 destp, srcp;
2085 int timer_active;
2086 unsigned long timer_expires;
2087 struct inet_sock *inet = inet_sk(sp);
2088 struct tcp_sock *tp = tcp_sk(sp);
2089 struct ipv6_pinfo *np = inet6_sk(sp);
2090
2091 dest = &np->daddr;
2092 src = &np->rcv_saddr;
2093 destp = ntohs(inet->dport);
2094 srcp = ntohs(inet->sport);
2095 if (tp->pending == TCP_TIME_RETRANS) {
2096 timer_active = 1;
2097 timer_expires = tp->timeout;
2098 } else if (tp->pending == TCP_TIME_PROBE0) {
2099 timer_active = 4;
2100 timer_expires = tp->timeout;
2101 } else if (timer_pending(&sp->sk_timer)) {
2102 timer_active = 2;
2103 timer_expires = sp->sk_timer.expires;
2104 } else {
2105 timer_active = 0;
2106 timer_expires = jiffies;
2107 }
2108
2109 seq_printf(seq,
2110 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2111 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2112 i,
2113 src->s6_addr32[0], src->s6_addr32[1],
2114 src->s6_addr32[2], src->s6_addr32[3], srcp,
2115 dest->s6_addr32[0], dest->s6_addr32[1],
2116 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2117 sp->sk_state,
2118 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2119 timer_active,
2120 jiffies_to_clock_t(timer_expires - jiffies),
2121 tp->retransmits,
2122 sock_i_uid(sp),
2123 tp->probes_out,
2124 sock_i_ino(sp),
2125 atomic_read(&sp->sk_refcnt), sp,
2126 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2127 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2128 );
2129}
2130
2131static void get_timewait6_sock(struct seq_file *seq,
2132 struct tcp_tw_bucket *tw, int i)
2133{
2134 struct in6_addr *dest, *src;
2135 __u16 destp, srcp;
2136 int ttd = tw->tw_ttd - jiffies;
2137
2138 if (ttd < 0)
2139 ttd = 0;
2140
2141 dest = &tw->tw_v6_daddr;
2142 src = &tw->tw_v6_rcv_saddr;
2143 destp = ntohs(tw->tw_dport);
2144 srcp = ntohs(tw->tw_sport);
2145
2146 seq_printf(seq,
2147 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2148 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2149 i,
2150 src->s6_addr32[0], src->s6_addr32[1],
2151 src->s6_addr32[2], src->s6_addr32[3], srcp,
2152 dest->s6_addr32[0], dest->s6_addr32[1],
2153 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2154 tw->tw_substate, 0, 0,
2155 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2156 atomic_read(&tw->tw_refcnt), tw);
2157}
2158
2159#ifdef CONFIG_PROC_FS
2160static int tcp6_seq_show(struct seq_file *seq, void *v)
2161{
2162 struct tcp_iter_state *st;
2163
2164 if (v == SEQ_START_TOKEN) {
2165 seq_puts(seq,
2166 " sl "
2167 "local_address "
2168 "remote_address "
2169 "st tx_queue rx_queue tr tm->when retrnsmt"
2170 " uid timeout inode\n");
2171 goto out;
2172 }
2173 st = seq->private;
2174
2175 switch (st->state) {
2176 case TCP_SEQ_STATE_LISTENING:
2177 case TCP_SEQ_STATE_ESTABLISHED:
2178 get_tcp6_sock(seq, v, st->num);
2179 break;
2180 case TCP_SEQ_STATE_OPENREQ:
2181 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2182 break;
2183 case TCP_SEQ_STATE_TIME_WAIT:
2184 get_timewait6_sock(seq, v, st->num);
2185 break;
2186 }
2187out:
2188 return 0;
2189}
2190
2191static struct file_operations tcp6_seq_fops;
2192static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2193 .owner = THIS_MODULE,
2194 .name = "tcp6",
2195 .family = AF_INET6,
2196 .seq_show = tcp6_seq_show,
2197 .seq_fops = &tcp6_seq_fops,
2198};
2199
2200int __init tcp6_proc_init(void)
2201{
2202 return tcp_proc_register(&tcp6_seq_afinfo);
2203}
2204
2205void tcp6_proc_exit(void)
2206{
2207 tcp_proc_unregister(&tcp6_seq_afinfo);
2208}
2209#endif
2210
2211struct proto tcpv6_prot = {
2212 .name = "TCPv6",
2213 .owner = THIS_MODULE,
2214 .close = tcp_close,
2215 .connect = tcp_v6_connect,
2216 .disconnect = tcp_disconnect,
2217 .accept = tcp_accept,
2218 .ioctl = tcp_ioctl,
2219 .init = tcp_v6_init_sock,
2220 .destroy = tcp_v6_destroy_sock,
2221 .shutdown = tcp_shutdown,
2222 .setsockopt = tcp_setsockopt,
2223 .getsockopt = tcp_getsockopt,
2224 .sendmsg = tcp_sendmsg,
2225 .recvmsg = tcp_recvmsg,
2226 .backlog_rcv = tcp_v6_do_rcv,
2227 .hash = tcp_v6_hash,
2228 .unhash = tcp_unhash,
2229 .get_port = tcp_v6_get_port,
2230 .enter_memory_pressure = tcp_enter_memory_pressure,
2231 .sockets_allocated = &tcp_sockets_allocated,
2232 .memory_allocated = &tcp_memory_allocated,
2233 .memory_pressure = &tcp_memory_pressure,
2234 .sysctl_mem = sysctl_tcp_mem,
2235 .sysctl_wmem = sysctl_tcp_wmem,
2236 .sysctl_rmem = sysctl_tcp_rmem,
2237 .max_header = MAX_TCP_HEADER,
2238 .obj_size = sizeof(struct tcp6_sock),
2239};
2240
2241static struct inet6_protocol tcpv6_protocol = {
2242 .handler = tcp_v6_rcv,
2243 .err_handler = tcp_v6_err,
2244 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2245};
2246
2247extern struct proto_ops inet6_stream_ops;
2248
2249static struct inet_protosw tcpv6_protosw = {
2250 .type = SOCK_STREAM,
2251 .protocol = IPPROTO_TCP,
2252 .prot = &tcpv6_prot,
2253 .ops = &inet6_stream_ops,
2254 .capability = -1,
2255 .no_check = 0,
2256 .flags = INET_PROTOSW_PERMANENT,
2257};
2258
2259void __init tcpv6_init(void)
2260{
2261 /* register inet6 protocol */
2262 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2263 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2264 inet6_register_protosw(&tcpv6_protosw);
2265}