blob: 2bc7fafe76688629d182f5111f5063aaafba7976 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/module.h>
29#include <linux/config.h>
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/jiffies.h>
36#include <linux/in.h>
37#include <linux/in6.h>
38#include <linux/netdevice.h>
39#include <linux/init.h>
40#include <linux/jhash.h>
41#include <linux/ipsec.h>
42#include <linux/times.h>
43
44#include <linux/ipv6.h>
45#include <linux/icmpv6.h>
46#include <linux/random.h>
47
48#include <net/tcp.h>
49#include <net/ndisc.h>
50#include <net/ipv6.h>
51#include <net/transp_v6.h>
52#include <net/addrconf.h>
53#include <net/ip6_route.h>
54#include <net/ip6_checksum.h>
55#include <net/inet_ecn.h>
56#include <net/protocol.h>
57#include <net/xfrm.h>
58#include <net/addrconf.h>
59#include <net/snmp.h>
60#include <net/dsfield.h>
61
62#include <asm/uaccess.h>
63
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
66
67static void tcp_v6_send_reset(struct sk_buff *skb);
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -070068static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
Linus Torvalds1da177e2005-04-16 15:20:36 -070069static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
70 struct sk_buff *skb);
71
72static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75static struct tcp_func ipv6_mapped;
76static struct tcp_func ipv6_specific;
77
78/* I have no idea if this is a good hash for v6 or not. -DaveM */
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -030079static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
80 const struct in6_addr *faddr, const u16 fport,
81 const int ehash_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -070082{
83 int hashent = (lport ^ fport);
84
85 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
86 hashent ^= hashent>>16;
87 hashent ^= hashent>>8;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -030088 return (hashent & (ehash_size - 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -070089}
90
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -030091static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -070092{
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -030093 const struct inet_sock *inet = inet_sk(sk);
94 const struct ipv6_pinfo *np = inet6_sk(sk);
95 const struct in6_addr *laddr = &np->rcv_saddr;
96 const struct in6_addr *faddr = &np->daddr;
97 const __u16 lport = inet->num;
98 const __u16 fport = inet->dport;
99 return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100}
101
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700102static inline int tcp_v6_bind_conflict(const struct sock *sk,
103 const struct inet_bind_bucket *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104{
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700105 const struct sock *sk2;
106 const struct hlist_node *node;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107
108 /* We must walk the whole port owner list in this case. -DaveM */
109 sk_for_each_bound(sk2, node, &tb->owners) {
110 if (sk != sk2 &&
111 (!sk->sk_bound_dev_if ||
112 !sk2->sk_bound_dev_if ||
113 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
114 (!sk->sk_reuse || !sk2->sk_reuse ||
115 sk2->sk_state == TCP_LISTEN) &&
116 ipv6_rcv_saddr_equal(sk, sk2))
117 break;
118 }
119
120 return node != NULL;
121}
122
123/* Grrr, addr_type already calculated by caller, but I don't want
124 * to add some silly "cookie" argument to this method just for that.
125 * But it doesn't matter, the recalculation is in the rarest path
126 * this function ever takes.
127 */
128static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129{
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700130 struct inet_bind_hashbucket *head;
131 struct inet_bind_bucket *tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct hlist_node *node;
133 int ret;
134
135 local_bh_disable();
136 if (snum == 0) {
137 int low = sysctl_local_port_range[0];
138 int high = sysctl_local_port_range[1];
139 int remaining = (high - low) + 1;
140 int rover;
141
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700142 spin_lock(&tcp_hashinfo.portalloc_lock);
143 if (tcp_hashinfo.port_rover < low)
Folkert van Heusdenc3924c702005-05-03 14:36:45 -0700144 rover = low;
145 else
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700146 rover = tcp_hashinfo.port_rover;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 do { rover++;
Folkert van Heusdenc3924c702005-05-03 14:36:45 -0700148 if (rover > high)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 rover = low;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700150 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 spin_lock(&head->lock);
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700152 inet_bind_bucket_for_each(tb, node, &head->chain)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 if (tb->port == rover)
154 goto next;
155 break;
156 next:
157 spin_unlock(&head->lock);
158 } while (--remaining > 0);
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700159 tcp_hashinfo.port_rover = rover;
160 spin_unlock(&tcp_hashinfo.portalloc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161
David S. Millerd5d28372005-08-23 10:49:54 -0700162 /* Exhausted local port range during search? It is not
163 * possible for us to be holding one of the bind hash
164 * locks if this test triggers, because if 'remaining'
165 * drops to zero, we broke out of the do/while loop at
166 * the top level, not from the 'break;' statement.
167 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 ret = 1;
David S. Millerd5d28372005-08-23 10:49:54 -0700169 if (unlikely(remaining <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 goto fail;
171
172 /* OK, here is the one we will use. */
173 snum = rover;
174 } else {
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700175 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 spin_lock(&head->lock);
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700177 inet_bind_bucket_for_each(tb, node, &head->chain)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 if (tb->port == snum)
179 goto tb_found;
180 }
181 tb = NULL;
182 goto tb_not_found;
183tb_found:
184 if (tb && !hlist_empty(&tb->owners)) {
185 if (tb->fastreuse > 0 && sk->sk_reuse &&
186 sk->sk_state != TCP_LISTEN) {
187 goto success;
188 } else {
189 ret = 1;
190 if (tcp_v6_bind_conflict(sk, tb))
191 goto fail_unlock;
192 }
193 }
194tb_not_found:
195 ret = 1;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700196 if (tb == NULL) {
197 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
198 if (tb == NULL)
199 goto fail_unlock;
200 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 if (hlist_empty(&tb->owners)) {
202 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
203 tb->fastreuse = 1;
204 else
205 tb->fastreuse = 0;
206 } else if (tb->fastreuse &&
207 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
208 tb->fastreuse = 0;
209
210success:
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700211 if (!inet_csk(sk)->icsk_bind_hash)
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -0700212 inet_bind_hash(sk, tb, snum);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700213 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 ret = 0;
215
216fail_unlock:
217 spin_unlock(&head->lock);
218fail:
219 local_bh_enable();
220 return ret;
221}
222
223static __inline__ void __tcp_v6_hash(struct sock *sk)
224{
225 struct hlist_head *list;
226 rwlock_t *lock;
227
228 BUG_TRAP(sk_unhashed(sk));
229
230 if (sk->sk_state == TCP_LISTEN) {
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700231 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
232 lock = &tcp_hashinfo.lhash_lock;
Arnaldo Carvalho de Melof3f05f72005-08-09 20:08:09 -0700233 inet_listen_wlock(&tcp_hashinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 } else {
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300235 sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700236 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
237 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 write_lock(lock);
239 }
240
241 __sk_add_node(sk, list);
242 sock_prot_inc_use(sk->sk_prot);
243 write_unlock(lock);
244}
245
246
247static void tcp_v6_hash(struct sock *sk)
248{
249 if (sk->sk_state != TCP_CLOSE) {
250 struct tcp_sock *tp = tcp_sk(sk);
251
252 if (tp->af_specific == &ipv6_mapped) {
253 tcp_prot.hash(sk);
254 return;
255 }
256 local_bh_disable();
257 __tcp_v6_hash(sk);
258 local_bh_enable();
259 }
260}
261
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300262static struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
263 const struct in6_addr *daddr,
264 const unsigned short hnum,
265 const int dif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266{
267 struct sock *sk;
268 struct hlist_node *node;
269 struct sock *result = NULL;
270 int score, hiscore;
271
272 hiscore=0;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300273 read_lock(&hashinfo->lhash_lock);
274 sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
276 struct ipv6_pinfo *np = inet6_sk(sk);
277
278 score = 1;
279 if (!ipv6_addr_any(&np->rcv_saddr)) {
280 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
281 continue;
282 score++;
283 }
284 if (sk->sk_bound_dev_if) {
285 if (sk->sk_bound_dev_if != dif)
286 continue;
287 score++;
288 }
289 if (score == 3) {
290 result = sk;
291 break;
292 }
293 if (score > hiscore) {
294 hiscore = score;
295 result = sk;
296 }
297 }
298 }
299 if (result)
300 sock_hold(result);
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300301 read_unlock(&hashinfo->lhash_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 return result;
303}
304
305/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
306 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
307 *
308 * The sockhash lock must be held as a reader here.
309 */
310
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300311static inline struct sock *
312 __inet6_lookup_established(struct inet_hashinfo *hashinfo,
313 const struct in6_addr *saddr,
314 const u16 sport,
315 const struct in6_addr *daddr,
316 const u16 hnum,
317 const int dif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 struct sock *sk;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700320 const struct hlist_node *node;
321 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 /* Optimize here for direct hit, only listening connections can
323 * have wildcards anyways.
324 */
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300325 const int hash = inet6_ehashfn(daddr, hnum, saddr, sport,
326 hashinfo->ehash_size);
327 struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700328
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 read_lock(&head->lock);
330 sk_for_each(sk, node, &head->chain) {
331 /* For IPV6 do the cheaper port and family tests first. */
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700332 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 goto hit; /* You sunk my battleship! */
334 }
335 /* Must check for a TIME_WAIT'er before going to listener hash. */
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300336 sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700337 const struct inet_timewait_sock *tw = inet_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
339 if(*((__u32 *)&(tw->tw_dport)) == ports &&
340 sk->sk_family == PF_INET6) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700341 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
342
343 if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
344 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
345 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 goto hit;
347 }
348 }
349 read_unlock(&head->lock);
350 return NULL;
351
352hit:
353 sock_hold(sk);
354 read_unlock(&head->lock);
355 return sk;
356}
357
358
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300359static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
360 const struct in6_addr *saddr,
361 const u16 sport,
362 const struct in6_addr *daddr,
363 const u16 hnum,
364 const int dif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365{
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300366 struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
367 daddr, hnum, dif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 if (sk)
369 return sk;
370
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300371 return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372}
373
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300374inline struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
375 const struct in6_addr *saddr, const u16 sport,
376 const struct in6_addr *daddr, const u16 dport,
377 const int dif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378{
379 struct sock *sk;
380
381 local_bh_disable();
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300382 sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 local_bh_enable();
384
385 return sk;
386}
387
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300388EXPORT_SYMBOL_GPL(inet6_lookup);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
390
391/*
392 * Open request hash tables.
393 */
394
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700395static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396{
397 u32 a, b, c;
398
399 a = raddr->s6_addr32[0];
400 b = raddr->s6_addr32[1];
401 c = raddr->s6_addr32[2];
402
403 a += JHASH_GOLDEN_RATIO;
404 b += JHASH_GOLDEN_RATIO;
405 c += rnd;
406 __jhash_mix(a, b, c);
407
408 a += raddr->s6_addr32[3];
409 b += (u32) rport;
410 __jhash_mix(a, b, c);
411
412 return c & (TCP_SYNQ_HSIZE - 1);
413}
414
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700415static struct request_sock *tcp_v6_search_req(const struct sock *sk,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700416 struct request_sock ***prevp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 __u16 rport,
418 struct in6_addr *raddr,
419 struct in6_addr *laddr,
420 int iif)
421{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700422 const struct inet_connection_sock *icsk = inet_csk(sk);
423 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700424 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
427 (req = *prev) != NULL;
428 prev = &req->dl_next) {
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700429 const struct tcp6_request_sock *treq = tcp6_rsk(req);
430
431 if (inet_rsk(req)->rmt_port == rport &&
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700432 req->rsk_ops->family == AF_INET6 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700433 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
434 ipv6_addr_equal(&treq->loc_addr, laddr) &&
435 (!treq->iif || treq->iif == iif)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 BUG_TRAP(req->sk == NULL);
437 *prevp = prev;
438 return req;
439 }
440 }
441
442 return NULL;
443}
444
445static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
446 struct in6_addr *saddr,
447 struct in6_addr *daddr,
448 unsigned long base)
449{
450 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
451}
452
453static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
454{
455 if (skb->protocol == htons(ETH_P_IPV6)) {
456 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
457 skb->nh.ipv6h->saddr.s6_addr32,
458 skb->h.th->dest,
459 skb->h.th->source);
460 } else {
461 return secure_tcp_sequence_number(skb->nh.iph->daddr,
462 skb->nh.iph->saddr,
463 skb->h.th->dest,
464 skb->h.th->source);
465 }
466}
467
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300468static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700469 struct inet_timewait_sock **twp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470{
471 struct inet_sock *inet = inet_sk(sk);
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300472 const struct ipv6_pinfo *np = inet6_sk(sk);
473 const struct in6_addr *daddr = &np->rcv_saddr;
474 const struct in6_addr *saddr = &np->daddr;
475 const int dif = sk->sk_bound_dev_if;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700476 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300477 const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
478 tcp_hashinfo.ehash_size);
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700479 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 struct sock *sk2;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700481 const struct hlist_node *node;
482 struct inet_timewait_sock *tw;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483
484 write_lock(&head->lock);
485
486 /* Check TIME-WAIT sockets first. */
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700487 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700488 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
489
490 tw = inet_twsk(sk2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
492 if(*((__u32 *)&(tw->tw_dport)) == ports &&
493 sk2->sk_family == PF_INET6 &&
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700494 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
495 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700497 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 struct tcp_sock *tp = tcp_sk(sk);
499
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700500 if (tcptw->tw_ts_recent_stamp &&
501 (!twp ||
502 (sysctl_tcp_tw_reuse &&
503 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 /* See comment in tcp_ipv4.c */
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700505 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 if (!tp->write_seq)
507 tp->write_seq = 1;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700508 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
509 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 sock_hold(sk2);
511 goto unique;
512 } else
513 goto not_unique;
514 }
515 }
516 tw = NULL;
517
518 /* And established part... */
519 sk_for_each(sk2, node, &head->chain) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700520 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 goto not_unique;
522 }
523
524unique:
525 BUG_TRAP(sk_unhashed(sk));
526 __sk_add_node(sk, &head->chain);
527 sk->sk_hashent = hash;
528 sock_prot_inc_use(sk->sk_prot);
529 write_unlock(&head->lock);
530
531 if (twp) {
532 *twp = tw;
533 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
534 } else if (tw) {
535 /* Silly. Should hash-dance instead... */
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700536 inet_twsk_deschedule(tw, &tcp_death_row);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
538
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700539 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 }
541 return 0;
542
543not_unique:
544 write_unlock(&head->lock);
545 return -EADDRNOTAVAIL;
546}
547
548static inline u32 tcpv6_port_offset(const struct sock *sk)
549{
550 const struct inet_sock *inet = inet_sk(sk);
551 const struct ipv6_pinfo *np = inet6_sk(sk);
552
553 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
554 np->daddr.s6_addr32,
555 inet->dport);
556}
557
558static int tcp_v6_hash_connect(struct sock *sk)
559{
560 unsigned short snum = inet_sk(sk)->num;
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700561 struct inet_bind_hashbucket *head;
562 struct inet_bind_bucket *tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 int ret;
564
565 if (!snum) {
566 int low = sysctl_local_port_range[0];
567 int high = sysctl_local_port_range[1];
568 int range = high - low;
569 int i;
570 int port;
571 static u32 hint;
572 u32 offset = hint + tcpv6_port_offset(sk);
573 struct hlist_node *node;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700574 struct inet_timewait_sock *tw = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575
576 local_bh_disable();
577 for (i = 1; i <= range; i++) {
578 port = low + (i + offset) % range;
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700579 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 spin_lock(&head->lock);
581
582 /* Does not bother with rcv_saddr checks,
583 * because the established check is already
584 * unique enough.
585 */
Arnaldo Carvalho de Melo0f7ff922005-08-09 19:59:44 -0700586 inet_bind_bucket_for_each(tb, node, &head->chain) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 if (tb->port == port) {
588 BUG_TRAP(!hlist_empty(&tb->owners));
589 if (tb->fastreuse >= 0)
590 goto next_port;
591 if (!__tcp_v6_check_established(sk,
592 port,
593 &tw))
594 goto ok;
595 goto next_port;
596 }
597 }
598
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700599 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 if (!tb) {
601 spin_unlock(&head->lock);
602 break;
603 }
604 tb->fastreuse = -1;
605 goto ok;
606
607 next_port:
608 spin_unlock(&head->lock);
609 }
610 local_bh_enable();
611
612 return -EADDRNOTAVAIL;
613
614ok:
615 hint += i;
616
617 /* Head lock still held and bh's disabled */
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -0700618 inet_bind_hash(sk, tb, port);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 if (sk_unhashed(sk)) {
620 inet_sk(sk)->sport = htons(port);
621 __tcp_v6_hash(sk);
622 }
623 spin_unlock(&head->lock);
624
625 if (tw) {
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -0700626 inet_twsk_deschedule(tw, &tcp_death_row);
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700627 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 }
629
630 ret = 0;
631 goto out;
632 }
633
Arnaldo Carvalho de Melo6e04e022005-08-09 20:07:35 -0700634 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700635 tb = inet_csk(sk)->icsk_bind_hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 spin_lock_bh(&head->lock);
637
638 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
639 __tcp_v6_hash(sk);
640 spin_unlock_bh(&head->lock);
641 return 0;
642 } else {
643 spin_unlock(&head->lock);
644 /* No definite answer... Walk to established hash table */
645 ret = __tcp_v6_check_established(sk, snum, NULL);
646out:
647 local_bh_enable();
648 return ret;
649 }
650}
651
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
653 int addr_len)
654{
655 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
656 struct inet_sock *inet = inet_sk(sk);
657 struct ipv6_pinfo *np = inet6_sk(sk);
658 struct tcp_sock *tp = tcp_sk(sk);
659 struct in6_addr *saddr = NULL, *final_p = NULL, final;
660 struct flowi fl;
661 struct dst_entry *dst;
662 int addr_type;
663 int err;
664
665 if (addr_len < SIN6_LEN_RFC2133)
666 return -EINVAL;
667
668 if (usin->sin6_family != AF_INET6)
669 return(-EAFNOSUPPORT);
670
671 memset(&fl, 0, sizeof(fl));
672
673 if (np->sndflow) {
674 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
675 IP6_ECN_flow_init(fl.fl6_flowlabel);
676 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
677 struct ip6_flowlabel *flowlabel;
678 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
679 if (flowlabel == NULL)
680 return -EINVAL;
681 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
682 fl6_sock_release(flowlabel);
683 }
684 }
685
686 /*
687 * connect() to INADDR_ANY means loopback (BSD'ism).
688 */
689
690 if(ipv6_addr_any(&usin->sin6_addr))
691 usin->sin6_addr.s6_addr[15] = 0x1;
692
693 addr_type = ipv6_addr_type(&usin->sin6_addr);
694
695 if(addr_type & IPV6_ADDR_MULTICAST)
696 return -ENETUNREACH;
697
698 if (addr_type&IPV6_ADDR_LINKLOCAL) {
699 if (addr_len >= sizeof(struct sockaddr_in6) &&
700 usin->sin6_scope_id) {
701 /* If interface is set while binding, indices
702 * must coincide.
703 */
704 if (sk->sk_bound_dev_if &&
705 sk->sk_bound_dev_if != usin->sin6_scope_id)
706 return -EINVAL;
707
708 sk->sk_bound_dev_if = usin->sin6_scope_id;
709 }
710
711 /* Connect to link-local address requires an interface */
712 if (!sk->sk_bound_dev_if)
713 return -EINVAL;
714 }
715
716 if (tp->rx_opt.ts_recent_stamp &&
717 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
718 tp->rx_opt.ts_recent = 0;
719 tp->rx_opt.ts_recent_stamp = 0;
720 tp->write_seq = 0;
721 }
722
723 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
724 np->flow_label = fl.fl6_flowlabel;
725
726 /*
727 * TCP over IPv4
728 */
729
730 if (addr_type == IPV6_ADDR_MAPPED) {
731 u32 exthdrlen = tp->ext_header_len;
732 struct sockaddr_in sin;
733
734 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
735
736 if (__ipv6_only_sock(sk))
737 return -ENETUNREACH;
738
739 sin.sin_family = AF_INET;
740 sin.sin_port = usin->sin6_port;
741 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
742
743 tp->af_specific = &ipv6_mapped;
744 sk->sk_backlog_rcv = tcp_v4_do_rcv;
745
746 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
747
748 if (err) {
749 tp->ext_header_len = exthdrlen;
750 tp->af_specific = &ipv6_specific;
751 sk->sk_backlog_rcv = tcp_v6_do_rcv;
752 goto failure;
753 } else {
754 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
755 inet->saddr);
756 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
757 inet->rcv_saddr);
758 }
759
760 return err;
761 }
762
763 if (!ipv6_addr_any(&np->rcv_saddr))
764 saddr = &np->rcv_saddr;
765
766 fl.proto = IPPROTO_TCP;
767 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
768 ipv6_addr_copy(&fl.fl6_src,
769 (saddr ? saddr : &np->saddr));
770 fl.oif = sk->sk_bound_dev_if;
771 fl.fl_ip_dport = usin->sin6_port;
772 fl.fl_ip_sport = inet->sport;
773
774 if (np->opt && np->opt->srcrt) {
775 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
776 ipv6_addr_copy(&final, &fl.fl6_dst);
777 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
778 final_p = &final;
779 }
780
781 err = ip6_dst_lookup(sk, &dst, &fl);
782 if (err)
783 goto failure;
784 if (final_p)
785 ipv6_addr_copy(&fl.fl6_dst, final_p);
786
787 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
788 dst_release(dst);
789 goto failure;
790 }
791
792 if (saddr == NULL) {
793 saddr = &fl.fl6_src;
794 ipv6_addr_copy(&np->rcv_saddr, saddr);
795 }
796
797 /* set the source address */
798 ipv6_addr_copy(&np->saddr, saddr);
799 inet->rcv_saddr = LOOPBACK4_IPV6;
800
801 ip6_dst_store(sk, dst, NULL);
802 sk->sk_route_caps = dst->dev->features &
803 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
804
805 tp->ext_header_len = 0;
806 if (np->opt)
807 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
808
809 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
810
811 inet->dport = usin->sin6_port;
812
813 tcp_set_state(sk, TCP_SYN_SENT);
814 err = tcp_v6_hash_connect(sk);
815 if (err)
816 goto late_failure;
817
818 if (!tp->write_seq)
819 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
820 np->daddr.s6_addr32,
821 inet->sport,
822 inet->dport);
823
824 err = tcp_connect(sk);
825 if (err)
826 goto late_failure;
827
828 return 0;
829
830late_failure:
831 tcp_set_state(sk, TCP_CLOSE);
832 __sk_dst_reset(sk);
833failure:
834 inet->dport = 0;
835 sk->sk_route_caps = 0;
836 return err;
837}
838
839static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
840 int type, int code, int offset, __u32 info)
841{
842 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300843 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 struct ipv6_pinfo *np;
845 struct sock *sk;
846 int err;
847 struct tcp_sock *tp;
848 __u32 seq;
849
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300850 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
851 th->source, skb->dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852
853 if (sk == NULL) {
854 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
855 return;
856 }
857
858 if (sk->sk_state == TCP_TIME_WAIT) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -0700859 inet_twsk_put((struct inet_timewait_sock *)sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 return;
861 }
862
863 bh_lock_sock(sk);
864 if (sock_owned_by_user(sk))
865 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
866
867 if (sk->sk_state == TCP_CLOSE)
868 goto out;
869
870 tp = tcp_sk(sk);
871 seq = ntohl(th->seq);
872 if (sk->sk_state != TCP_LISTEN &&
873 !between(seq, tp->snd_una, tp->snd_nxt)) {
874 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
875 goto out;
876 }
877
878 np = inet6_sk(sk);
879
880 if (type == ICMPV6_PKT_TOOBIG) {
881 struct dst_entry *dst = NULL;
882
883 if (sock_owned_by_user(sk))
884 goto out;
885 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
886 goto out;
887
888 /* icmp should have updated the destination cache entry */
889 dst = __sk_dst_check(sk, np->dst_cookie);
890
891 if (dst == NULL) {
892 struct inet_sock *inet = inet_sk(sk);
893 struct flowi fl;
894
895 /* BUGGG_FUTURE: Again, it is not clear how
896 to handle rthdr case. Ignore this complexity
897 for now.
898 */
899 memset(&fl, 0, sizeof(fl));
900 fl.proto = IPPROTO_TCP;
901 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
902 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
903 fl.oif = sk->sk_bound_dev_if;
904 fl.fl_ip_dport = inet->dport;
905 fl.fl_ip_sport = inet->sport;
906
907 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
908 sk->sk_err_soft = -err;
909 goto out;
910 }
911
912 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
913 sk->sk_err_soft = -err;
914 goto out;
915 }
916
917 } else
918 dst_hold(dst);
919
920 if (tp->pmtu_cookie > dst_mtu(dst)) {
921 tcp_sync_mss(sk, dst_mtu(dst));
922 tcp_simple_retransmit(sk);
923 } /* else let the usual retransmit timer handle it */
924 dst_release(dst);
925 goto out;
926 }
927
928 icmpv6_err_convert(type, code, &err);
929
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700930 /* Might be for an request_sock */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 switch (sk->sk_state) {
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700932 struct request_sock *req, **prev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 case TCP_LISTEN:
934 if (sock_owned_by_user(sk))
935 goto out;
936
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700937 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -0300938 &hdr->saddr, inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 if (!req)
940 goto out;
941
942 /* ICMPs are not backlogged, hence we cannot get
943 * an established socket here.
944 */
945 BUG_TRAP(req->sk == NULL);
946
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700947 if (seq != tcp_rsk(req)->snt_isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
949 goto out;
950 }
951
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -0700952 inet_csk_reqsk_queue_drop(sk, req, prev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 goto out;
954
955 case TCP_SYN_SENT:
956 case TCP_SYN_RECV: /* Cannot happen.
957 It can, it SYNs are crossed. --ANK */
958 if (!sock_owned_by_user(sk)) {
959 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
960 sk->sk_err = err;
961 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
962
963 tcp_done(sk);
964 } else
965 sk->sk_err_soft = err;
966 goto out;
967 }
968
969 if (!sock_owned_by_user(sk) && np->recverr) {
970 sk->sk_err = err;
971 sk->sk_error_report(sk);
972 } else
973 sk->sk_err_soft = err;
974
975out:
976 bh_unlock_sock(sk);
977 sock_put(sk);
978}
979
980
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -0700981static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 struct dst_entry *dst)
983{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700984 struct tcp6_request_sock *treq = tcp6_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 struct ipv6_pinfo *np = inet6_sk(sk);
986 struct sk_buff * skb;
987 struct ipv6_txoptions *opt = NULL;
988 struct in6_addr * final_p = NULL, final;
989 struct flowi fl;
990 int err = -1;
991
992 memset(&fl, 0, sizeof(fl));
993 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700994 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
995 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 fl.fl6_flowlabel = 0;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700997 fl.oif = treq->iif;
998 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 fl.fl_ip_sport = inet_sk(sk)->sport;
1000
1001 if (dst == NULL) {
1002 opt = np->opt;
1003 if (opt == NULL &&
1004 np->rxopt.bits.srcrt == 2 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001005 treq->pktopts) {
1006 struct sk_buff *pktopts = treq->pktopts;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
1008 if (rxopt->srcrt)
1009 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
1010 }
1011
1012 if (opt && opt->srcrt) {
1013 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1014 ipv6_addr_copy(&final, &fl.fl6_dst);
1015 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1016 final_p = &final;
1017 }
1018
1019 err = ip6_dst_lookup(sk, &dst, &fl);
1020 if (err)
1021 goto done;
1022 if (final_p)
1023 ipv6_addr_copy(&fl.fl6_dst, final_p);
1024 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1025 goto done;
1026 }
1027
1028 skb = tcp_make_synack(sk, dst, req);
1029 if (skb) {
1030 struct tcphdr *th = skb->h.th;
1031
1032 th->check = tcp_v6_check(th, skb->len,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001033 &treq->loc_addr, &treq->rmt_addr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 csum_partial((char *)th, skb->len, skb->csum));
1035
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001036 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 err = ip6_xmit(sk, skb, &fl, opt, 0);
1038 if (err == NET_XMIT_CN)
1039 err = 0;
1040 }
1041
1042done:
1043 dst_release(dst);
1044 if (opt && opt != np->opt)
1045 sock_kfree_s(sk, opt, opt->tot_len);
1046 return err;
1047}
1048
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001049static void tcp_v6_reqsk_destructor(struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001051 if (tcp6_rsk(req)->pktopts)
1052 kfree_skb(tcp6_rsk(req)->pktopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053}
1054
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001055static struct request_sock_ops tcp6_request_sock_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 .family = AF_INET6,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001057 .obj_size = sizeof(struct tcp6_request_sock),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 .rtx_syn_ack = tcp_v6_send_synack,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001059 .send_ack = tcp_v6_reqsk_send_ack,
1060 .destructor = tcp_v6_reqsk_destructor,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 .send_reset = tcp_v6_send_reset
1062};
1063
1064static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1065{
1066 struct ipv6_pinfo *np = inet6_sk(sk);
1067 struct inet6_skb_parm *opt = IP6CB(skb);
1068
1069 if (np->rxopt.all) {
1070 if ((opt->hop && np->rxopt.bits.hopopts) ||
1071 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1072 np->rxopt.bits.rxflow) ||
1073 (opt->srcrt && np->rxopt.bits.srcrt) ||
1074 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1075 return 1;
1076 }
1077 return 0;
1078}
1079
1080
1081static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1082 struct sk_buff *skb)
1083{
1084 struct ipv6_pinfo *np = inet6_sk(sk);
1085
1086 if (skb->ip_summed == CHECKSUM_HW) {
1087 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1088 skb->csum = offsetof(struct tcphdr, check);
1089 } else {
1090 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1091 csum_partial((char *)th, th->doff<<2,
1092 skb->csum));
1093 }
1094}
1095
1096
1097static void tcp_v6_send_reset(struct sk_buff *skb)
1098{
1099 struct tcphdr *th = skb->h.th, *t1;
1100 struct sk_buff *buff;
1101 struct flowi fl;
1102
1103 if (th->rst)
1104 return;
1105
1106 if (!ipv6_unicast_destination(skb))
1107 return;
1108
1109 /*
1110 * We need to grab some memory, and put together an RST,
1111 * and then put it into the queue to be sent.
1112 */
1113
1114 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1115 GFP_ATOMIC);
1116 if (buff == NULL)
1117 return;
1118
1119 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1120
1121 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1122
1123 /* Swap the send and the receive. */
1124 memset(t1, 0, sizeof(*t1));
1125 t1->dest = th->source;
1126 t1->source = th->dest;
1127 t1->doff = sizeof(*t1)/4;
1128 t1->rst = 1;
1129
1130 if(th->ack) {
1131 t1->seq = th->ack_seq;
1132 } else {
1133 t1->ack = 1;
1134 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1135 + skb->len - (th->doff<<2));
1136 }
1137
1138 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1139
1140 memset(&fl, 0, sizeof(fl));
1141 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1142 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1143
1144 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1145 sizeof(*t1), IPPROTO_TCP,
1146 buff->csum);
1147
1148 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001149 fl.oif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 fl.fl_ip_dport = t1->dest;
1151 fl.fl_ip_sport = t1->source;
1152
1153 /* sk = NULL, but it is safe for now. RST socket required. */
1154 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1155
1156 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1157 dst_release(buff->dst);
1158 return;
1159 }
1160
1161 ip6_xmit(NULL, buff, &fl, NULL, 0);
1162 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1163 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1164 return;
1165 }
1166
1167 kfree_skb(buff);
1168}
1169
1170static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1171{
1172 struct tcphdr *th = skb->h.th, *t1;
1173 struct sk_buff *buff;
1174 struct flowi fl;
1175 int tot_len = sizeof(struct tcphdr);
1176
1177 if (ts)
1178 tot_len += 3*4;
1179
1180 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1181 GFP_ATOMIC);
1182 if (buff == NULL)
1183 return;
1184
1185 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1186
1187 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1188
1189 /* Swap the send and the receive. */
1190 memset(t1, 0, sizeof(*t1));
1191 t1->dest = th->source;
1192 t1->source = th->dest;
1193 t1->doff = tot_len/4;
1194 t1->seq = htonl(seq);
1195 t1->ack_seq = htonl(ack);
1196 t1->ack = 1;
1197 t1->window = htons(win);
1198
1199 if (ts) {
1200 u32 *ptr = (u32*)(t1 + 1);
1201 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1202 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1203 *ptr++ = htonl(tcp_time_stamp);
1204 *ptr = htonl(ts);
1205 }
1206
1207 buff->csum = csum_partial((char *)t1, tot_len, 0);
1208
1209 memset(&fl, 0, sizeof(fl));
1210 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1211 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1212
1213 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1214 tot_len, IPPROTO_TCP,
1215 buff->csum);
1216
1217 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001218 fl.oif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 fl.fl_ip_dport = t1->dest;
1220 fl.fl_ip_sport = t1->source;
1221
1222 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1223 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1224 dst_release(buff->dst);
1225 return;
1226 }
1227 ip6_xmit(NULL, buff, &fl, NULL, 0);
1228 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1229 return;
1230 }
1231
1232 kfree_skb(buff);
1233}
1234
1235static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1236{
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001237 struct inet_timewait_sock *tw = inet_twsk(sk);
1238 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001240 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1241 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1242 tcptw->tw_ts_recent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001244 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245}
1246
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001247static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001249 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250}
1251
1252
1253static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1254{
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001255 struct request_sock *req, **prev;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001256 const struct tcphdr *th = skb->h.th;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 struct sock *nsk;
1258
1259 /* Find possible connection requests. */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001260 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001261 &skb->nh.ipv6h->daddr, inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 if (req)
1263 return tcp_check_req(sk, skb, req, prev);
1264
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001265 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1266 th->source, &skb->nh.ipv6h->daddr,
1267 ntohs(th->dest), inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
1269 if (nsk) {
1270 if (nsk->sk_state != TCP_TIME_WAIT) {
1271 bh_lock_sock(nsk);
1272 return nsk;
1273 }
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001274 inet_twsk_put((struct inet_timewait_sock *)nsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 return NULL;
1276 }
1277
1278#if 0 /*def CONFIG_SYN_COOKIES*/
1279 if (!th->rst && !th->syn && th->ack)
1280 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1281#endif
1282 return sk;
1283}
1284
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001285static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286{
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001287 struct inet_connection_sock *icsk = inet_csk(sk);
1288 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1289 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001291 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1292 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293}
1294
1295
1296/* FIXME: this is substantially similar to the ipv4 code.
1297 * Can some kind of merge be done? -- erics
1298 */
1299static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1300{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001301 struct tcp6_request_sock *treq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 struct ipv6_pinfo *np = inet6_sk(sk);
1303 struct tcp_options_received tmp_opt;
1304 struct tcp_sock *tp = tcp_sk(sk);
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001305 struct request_sock *req = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 __u32 isn = TCP_SKB_CB(skb)->when;
1307
1308 if (skb->protocol == htons(ETH_P_IP))
1309 return tcp_v4_conn_request(sk, skb);
1310
1311 if (!ipv6_unicast_destination(skb))
1312 goto drop;
1313
1314 /*
1315 * There are no SYN attacks on IPv6, yet...
1316 */
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001317 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 if (net_ratelimit())
1319 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1320 goto drop;
1321 }
1322
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07001323 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 goto drop;
1325
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001326 req = reqsk_alloc(&tcp6_request_sock_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 if (req == NULL)
1328 goto drop;
1329
1330 tcp_clear_options(&tmp_opt);
1331 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1332 tmp_opt.user_mss = tp->rx_opt.user_mss;
1333
1334 tcp_parse_options(skb, &tmp_opt, 0);
1335
1336 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1337 tcp_openreq_init(req, &tmp_opt, skb);
1338
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001339 treq = tcp6_rsk(req);
1340 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1341 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 TCP_ECN_create_request(req, skb->h.th);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001343 treq->pktopts = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 if (ipv6_opt_accepted(sk, skb) ||
1345 np->rxopt.bits.rxinfo ||
1346 np->rxopt.bits.rxhlim) {
1347 atomic_inc(&skb->users);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001348 treq->pktopts = skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001350 treq->iif = sk->sk_bound_dev_if;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
1352 /* So that link locals have meaning */
1353 if (!sk->sk_bound_dev_if &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001354 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001355 treq->iif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356
1357 if (isn == 0)
1358 isn = tcp_v6_init_sequence(sk,skb);
1359
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001360 tcp_rsk(req)->snt_isn = isn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361
1362 if (tcp_v6_send_synack(sk, req, NULL))
1363 goto drop;
1364
1365 tcp_v6_synq_add(sk, req);
1366
1367 return 0;
1368
1369drop:
1370 if (req)
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001371 reqsk_free(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372
1373 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1374 return 0; /* don't send reset */
1375}
1376
1377static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07001378 struct request_sock *req,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 struct dst_entry *dst)
1380{
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001381 struct tcp6_request_sock *treq = tcp6_rsk(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1383 struct tcp6_sock *newtcp6sk;
1384 struct inet_sock *newinet;
1385 struct tcp_sock *newtp;
1386 struct sock *newsk;
1387 struct ipv6_txoptions *opt;
1388
1389 if (skb->protocol == htons(ETH_P_IP)) {
1390 /*
1391 * v6 mapped
1392 */
1393
1394 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1395
1396 if (newsk == NULL)
1397 return NULL;
1398
1399 newtcp6sk = (struct tcp6_sock *)newsk;
1400 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1401
1402 newinet = inet_sk(newsk);
1403 newnp = inet6_sk(newsk);
1404 newtp = tcp_sk(newsk);
1405
1406 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1407
1408 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1409 newinet->daddr);
1410
1411 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1412 newinet->saddr);
1413
1414 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1415
1416 newtp->af_specific = &ipv6_mapped;
1417 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1418 newnp->pktoptions = NULL;
1419 newnp->opt = NULL;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001420 newnp->mcast_oif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1422
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07001423 /*
1424 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1425 * here, tcp_create_openreq_child now does this for us, see the comment in
1426 * that function for the gory details. -acme
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
1429 /* It is tricky place. Until this moment IPv4 tcp
1430 worked with IPv6 af_tcp.af_specific.
1431 Sync it now.
1432 */
1433 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1434
1435 return newsk;
1436 }
1437
1438 opt = np->opt;
1439
1440 if (sk_acceptq_is_full(sk))
1441 goto out_overflow;
1442
1443 if (np->rxopt.bits.srcrt == 2 &&
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001444 opt == NULL && treq->pktopts) {
1445 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 if (rxopt->srcrt)
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001447 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 }
1449
1450 if (dst == NULL) {
1451 struct in6_addr *final_p = NULL, final;
1452 struct flowi fl;
1453
1454 memset(&fl, 0, sizeof(fl));
1455 fl.proto = IPPROTO_TCP;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001456 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 if (opt && opt->srcrt) {
1458 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1459 ipv6_addr_copy(&final, &fl.fl6_dst);
1460 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1461 final_p = &final;
1462 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001463 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464 fl.oif = sk->sk_bound_dev_if;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001465 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466 fl.fl_ip_sport = inet_sk(sk)->sport;
1467
1468 if (ip6_dst_lookup(sk, &dst, &fl))
1469 goto out;
1470
1471 if (final_p)
1472 ipv6_addr_copy(&fl.fl6_dst, final_p);
1473
1474 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1475 goto out;
1476 }
1477
1478 newsk = tcp_create_openreq_child(sk, req, skb);
1479 if (newsk == NULL)
1480 goto out;
1481
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07001482 /*
1483 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1484 * count here, tcp_create_openreq_child now does this for us, see the
1485 * comment in that function for the gory details. -acme
1486 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487
1488 ip6_dst_store(newsk, dst, NULL);
1489 newsk->sk_route_caps = dst->dev->features &
1490 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1491
1492 newtcp6sk = (struct tcp6_sock *)newsk;
1493 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1494
1495 newtp = tcp_sk(newsk);
1496 newinet = inet_sk(newsk);
1497 newnp = inet6_sk(newsk);
1498
1499 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1500
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001501 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1502 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1503 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1504 newsk->sk_bound_dev_if = treq->iif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505
1506 /* Now IPv6 options...
1507
1508 First: no IPv4 options.
1509 */
1510 newinet->opt = NULL;
1511
1512 /* Clone RX bits */
1513 newnp->rxopt.all = np->rxopt.all;
1514
1515 /* Clone pktoptions received with SYN */
1516 newnp->pktoptions = NULL;
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001517 if (treq->pktopts != NULL) {
1518 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1519 kfree_skb(treq->pktopts);
1520 treq->pktopts = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 if (newnp->pktoptions)
1522 skb_set_owner_r(newnp->pktoptions, newsk);
1523 }
1524 newnp->opt = NULL;
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001525 newnp->mcast_oif = inet6_iif(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1527
1528 /* Clone native IPv6 options from listening socket (if any)
1529
1530 Yes, keeping reference count would be much more clever,
1531 but we make one more one thing there: reattach optmem
1532 to newsk.
1533 */
1534 if (opt) {
1535 newnp->opt = ipv6_dup_options(newsk, opt);
1536 if (opt != np->opt)
1537 sock_kfree_s(sk, opt, opt->tot_len);
1538 }
1539
1540 newtp->ext_header_len = 0;
1541 if (newnp->opt)
1542 newtp->ext_header_len = newnp->opt->opt_nflen +
1543 newnp->opt->opt_flen;
1544
1545 tcp_sync_mss(newsk, dst_mtu(dst));
1546 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1547 tcp_initialize_rcv_mss(newsk);
1548
1549 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1550
1551 __tcp_v6_hash(newsk);
Arnaldo Carvalho de Melo2d8c4ce2005-08-09 20:07:13 -07001552 inet_inherit_port(&tcp_hashinfo, sk, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553
1554 return newsk;
1555
1556out_overflow:
1557 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1558out:
1559 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1560 if (opt && opt != np->opt)
1561 sock_kfree_s(sk, opt, opt->tot_len);
1562 dst_release(dst);
1563 return NULL;
1564}
1565
1566static int tcp_v6_checksum_init(struct sk_buff *skb)
1567{
1568 if (skb->ip_summed == CHECKSUM_HW) {
1569 skb->ip_summed = CHECKSUM_UNNECESSARY;
1570 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1571 &skb->nh.ipv6h->daddr,skb->csum))
1572 return 0;
Patrick McHardy64ce2072005-08-09 20:50:53 -07001573 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 }
1575 if (skb->len <= 76) {
1576 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1577 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1578 return -1;
1579 skb->ip_summed = CHECKSUM_UNNECESSARY;
1580 } else {
1581 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1582 &skb->nh.ipv6h->daddr,0);
1583 }
1584 return 0;
1585}
1586
1587/* The socket must have it's spinlock held when we get
1588 * here.
1589 *
1590 * We have a potential double-lock case here, so even when
1591 * doing backlog processing we use the BH locking scheme.
1592 * This is because we cannot sleep with the original spinlock
1593 * held.
1594 */
1595static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1596{
1597 struct ipv6_pinfo *np = inet6_sk(sk);
1598 struct tcp_sock *tp;
1599 struct sk_buff *opt_skb = NULL;
1600
1601 /* Imagine: socket is IPv6. IPv4 packet arrives,
1602 goes to IPv4 receive handler and backlogged.
1603 From backlog it always goes here. Kerboom...
1604 Fortunately, tcp_rcv_established and rcv_established
1605 handle them correctly, but it is not case with
1606 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1607 */
1608
1609 if (skb->protocol == htons(ETH_P_IP))
1610 return tcp_v4_do_rcv(sk, skb);
1611
1612 if (sk_filter(sk, skb, 0))
1613 goto discard;
1614
1615 /*
1616 * socket locking is here for SMP purposes as backlog rcv
1617 * is currently called with bh processing disabled.
1618 */
1619
1620 /* Do Stevens' IPV6_PKTOPTIONS.
1621
1622 Yes, guys, it is the only place in our code, where we
1623 may make it not affecting IPv4.
1624 The rest of code is protocol independent,
1625 and I do not like idea to uglify IPv4.
1626
1627 Actually, all the idea behind IPV6_PKTOPTIONS
1628 looks not very well thought. For now we latch
1629 options, received in the last packet, enqueued
1630 by tcp. Feel free to propose better solution.
1631 --ANK (980728)
1632 */
1633 if (np->rxopt.all)
1634 opt_skb = skb_clone(skb, GFP_ATOMIC);
1635
1636 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1637 TCP_CHECK_TIMER(sk);
1638 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1639 goto reset;
1640 TCP_CHECK_TIMER(sk);
1641 if (opt_skb)
1642 goto ipv6_pktoptions;
1643 return 0;
1644 }
1645
1646 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1647 goto csum_err;
1648
1649 if (sk->sk_state == TCP_LISTEN) {
1650 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1651 if (!nsk)
1652 goto discard;
1653
1654 /*
1655 * Queue it on the new socket if the new socket is active,
1656 * otherwise we just shortcircuit this and continue with
1657 * the new socket..
1658 */
1659 if(nsk != sk) {
1660 if (tcp_child_process(sk, nsk, skb))
1661 goto reset;
1662 if (opt_skb)
1663 __kfree_skb(opt_skb);
1664 return 0;
1665 }
1666 }
1667
1668 TCP_CHECK_TIMER(sk);
1669 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1670 goto reset;
1671 TCP_CHECK_TIMER(sk);
1672 if (opt_skb)
1673 goto ipv6_pktoptions;
1674 return 0;
1675
1676reset:
1677 tcp_v6_send_reset(skb);
1678discard:
1679 if (opt_skb)
1680 __kfree_skb(opt_skb);
1681 kfree_skb(skb);
1682 return 0;
1683csum_err:
1684 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1685 goto discard;
1686
1687
1688ipv6_pktoptions:
1689 /* Do you ask, what is it?
1690
1691 1. skb was enqueued by tcp.
1692 2. skb is added to tail of read queue, rather than out of order.
1693 3. socket is not in passive state.
1694 4. Finally, it really contains options, which user wants to receive.
1695 */
1696 tp = tcp_sk(sk);
1697 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1698 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1699 if (np->rxopt.bits.rxinfo)
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001700 np->mcast_oif = inet6_iif(opt_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 if (np->rxopt.bits.rxhlim)
1702 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1703 if (ipv6_opt_accepted(sk, opt_skb)) {
1704 skb_set_owner_r(opt_skb, sk);
1705 opt_skb = xchg(&np->pktoptions, opt_skb);
1706 } else {
1707 __kfree_skb(opt_skb);
1708 opt_skb = xchg(&np->pktoptions, NULL);
1709 }
1710 }
1711
1712 if (opt_skb)
1713 kfree_skb(opt_skb);
1714 return 0;
1715}
1716
1717static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1718{
1719 struct sk_buff *skb = *pskb;
1720 struct tcphdr *th;
1721 struct sock *sk;
1722 int ret;
1723
1724 if (skb->pkt_type != PACKET_HOST)
1725 goto discard_it;
1726
1727 /*
1728 * Count it even if it's bad.
1729 */
1730 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1731
1732 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1733 goto discard_it;
1734
1735 th = skb->h.th;
1736
1737 if (th->doff < sizeof(struct tcphdr)/4)
1738 goto bad_packet;
1739 if (!pskb_may_pull(skb, th->doff*4))
1740 goto discard_it;
1741
1742 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1743 tcp_v6_checksum_init(skb) < 0))
1744 goto bad_packet;
1745
1746 th = skb->h.th;
1747 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1748 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1749 skb->len - th->doff*4);
1750 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1751 TCP_SKB_CB(skb)->when = 0;
1752 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1753 TCP_SKB_CB(skb)->sacked = 0;
1754
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001755 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1756 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1757 inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758
1759 if (!sk)
1760 goto no_tcp_socket;
1761
1762process:
1763 if (sk->sk_state == TCP_TIME_WAIT)
1764 goto do_time_wait;
1765
1766 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1767 goto discard_and_relse;
1768
1769 if (sk_filter(sk, skb, 0))
1770 goto discard_and_relse;
1771
1772 skb->dev = NULL;
1773
1774 bh_lock_sock(sk);
1775 ret = 0;
1776 if (!sock_owned_by_user(sk)) {
1777 if (!tcp_prequeue(sk, skb))
1778 ret = tcp_v6_do_rcv(sk, skb);
1779 } else
1780 sk_add_backlog(sk, skb);
1781 bh_unlock_sock(sk);
1782
1783 sock_put(sk);
1784 return ret ? -1 : 0;
1785
1786no_tcp_socket:
1787 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1788 goto discard_it;
1789
1790 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1791bad_packet:
1792 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1793 } else {
1794 tcp_v6_send_reset(skb);
1795 }
1796
1797discard_it:
1798
1799 /*
1800 * Discard frame
1801 */
1802
1803 kfree_skb(skb);
1804 return 0;
1805
1806discard_and_relse:
1807 sock_put(sk);
1808 goto discard_it;
1809
1810do_time_wait:
1811 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001812 inet_twsk_put((struct inet_timewait_sock *)sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 goto discard_it;
1814 }
1815
1816 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1817 TCP_INC_STATS_BH(TCP_MIB_INERRS);
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001818 inet_twsk_put((struct inet_timewait_sock *)sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 goto discard_it;
1820 }
1821
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07001822 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1823 skb, th)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 case TCP_TW_SYN:
1825 {
1826 struct sock *sk2;
1827
Arnaldo Carvalho de Melo505cbfc2005-08-12 09:19:38 -03001828 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1829 &skb->nh.ipv6h->daddr,
1830 ntohs(th->dest), inet6_iif(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 if (sk2 != NULL) {
Arnaldo Carvalho de Melo295ff7e2005-08-09 20:44:40 -07001832 struct inet_timewait_sock *tw = inet_twsk(sk);
1833 inet_twsk_deschedule(tw, &tcp_death_row);
1834 inet_twsk_put(tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 sk = sk2;
1836 goto process;
1837 }
1838 /* Fall through to ACK */
1839 }
1840 case TCP_TW_ACK:
1841 tcp_v6_timewait_ack(sk, skb);
1842 break;
1843 case TCP_TW_RST:
1844 goto no_tcp_socket;
1845 case TCP_TW_SUCCESS:;
1846 }
1847 goto discard_it;
1848}
1849
1850static int tcp_v6_rebuild_header(struct sock *sk)
1851{
1852 int err;
1853 struct dst_entry *dst;
1854 struct ipv6_pinfo *np = inet6_sk(sk);
1855
1856 dst = __sk_dst_check(sk, np->dst_cookie);
1857
1858 if (dst == NULL) {
1859 struct inet_sock *inet = inet_sk(sk);
1860 struct in6_addr *final_p = NULL, final;
1861 struct flowi fl;
1862
1863 memset(&fl, 0, sizeof(fl));
1864 fl.proto = IPPROTO_TCP;
1865 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1866 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1867 fl.fl6_flowlabel = np->flow_label;
1868 fl.oif = sk->sk_bound_dev_if;
1869 fl.fl_ip_dport = inet->dport;
1870 fl.fl_ip_sport = inet->sport;
1871
1872 if (np->opt && np->opt->srcrt) {
1873 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1874 ipv6_addr_copy(&final, &fl.fl6_dst);
1875 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1876 final_p = &final;
1877 }
1878
1879 err = ip6_dst_lookup(sk, &dst, &fl);
1880 if (err) {
1881 sk->sk_route_caps = 0;
1882 return err;
1883 }
1884 if (final_p)
1885 ipv6_addr_copy(&fl.fl6_dst, final_p);
1886
1887 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1888 sk->sk_err_soft = -err;
1889 dst_release(dst);
1890 return err;
1891 }
1892
1893 ip6_dst_store(sk, dst, NULL);
1894 sk->sk_route_caps = dst->dev->features &
1895 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1896 }
1897
1898 return 0;
1899}
1900
1901static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1902{
1903 struct sock *sk = skb->sk;
1904 struct inet_sock *inet = inet_sk(sk);
1905 struct ipv6_pinfo *np = inet6_sk(sk);
1906 struct flowi fl;
1907 struct dst_entry *dst;
1908 struct in6_addr *final_p = NULL, final;
1909
1910 memset(&fl, 0, sizeof(fl));
1911 fl.proto = IPPROTO_TCP;
1912 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1913 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1914 fl.fl6_flowlabel = np->flow_label;
1915 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1916 fl.oif = sk->sk_bound_dev_if;
1917 fl.fl_ip_sport = inet->sport;
1918 fl.fl_ip_dport = inet->dport;
1919
1920 if (np->opt && np->opt->srcrt) {
1921 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1922 ipv6_addr_copy(&final, &fl.fl6_dst);
1923 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1924 final_p = &final;
1925 }
1926
1927 dst = __sk_dst_check(sk, np->dst_cookie);
1928
1929 if (dst == NULL) {
1930 int err = ip6_dst_lookup(sk, &dst, &fl);
1931
1932 if (err) {
1933 sk->sk_err_soft = -err;
1934 return err;
1935 }
1936
1937 if (final_p)
1938 ipv6_addr_copy(&fl.fl6_dst, final_p);
1939
1940 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1941 sk->sk_route_caps = 0;
1942 dst_release(dst);
1943 return err;
1944 }
1945
1946 ip6_dst_store(sk, dst, NULL);
1947 sk->sk_route_caps = dst->dev->features &
1948 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1949 }
1950
1951 skb->dst = dst_clone(dst);
1952
1953 /* Restore final destination back after routing done */
1954 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1955
1956 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1957}
1958
1959static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1960{
1961 struct ipv6_pinfo *np = inet6_sk(sk);
1962 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1963
1964 sin6->sin6_family = AF_INET6;
1965 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1966 sin6->sin6_port = inet_sk(sk)->dport;
1967 /* We do not store received flowlabel for TCP */
1968 sin6->sin6_flowinfo = 0;
1969 sin6->sin6_scope_id = 0;
1970 if (sk->sk_bound_dev_if &&
1971 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1972 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1973}
1974
1975static int tcp_v6_remember_stamp(struct sock *sk)
1976{
1977 /* Alas, not yet... */
1978 return 0;
1979}
1980
1981static struct tcp_func ipv6_specific = {
1982 .queue_xmit = tcp_v6_xmit,
1983 .send_check = tcp_v6_send_check,
1984 .rebuild_header = tcp_v6_rebuild_header,
1985 .conn_request = tcp_v6_conn_request,
1986 .syn_recv_sock = tcp_v6_syn_recv_sock,
1987 .remember_stamp = tcp_v6_remember_stamp,
1988 .net_header_len = sizeof(struct ipv6hdr),
1989
1990 .setsockopt = ipv6_setsockopt,
1991 .getsockopt = ipv6_getsockopt,
1992 .addr2sockaddr = v6_addr2sockaddr,
1993 .sockaddr_len = sizeof(struct sockaddr_in6)
1994};
1995
1996/*
1997 * TCP over IPv4 via INET6 API
1998 */
1999
2000static struct tcp_func ipv6_mapped = {
2001 .queue_xmit = ip_queue_xmit,
2002 .send_check = tcp_v4_send_check,
Arnaldo Carvalho de Melo32519f12005-08-09 19:50:02 -07002003 .rebuild_header = inet_sk_rebuild_header,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 .conn_request = tcp_v6_conn_request,
2005 .syn_recv_sock = tcp_v6_syn_recv_sock,
2006 .remember_stamp = tcp_v4_remember_stamp,
2007 .net_header_len = sizeof(struct iphdr),
2008
2009 .setsockopt = ipv6_setsockopt,
2010 .getsockopt = ipv6_getsockopt,
2011 .addr2sockaddr = v6_addr2sockaddr,
2012 .sockaddr_len = sizeof(struct sockaddr_in6)
2013};
2014
2015
2016
2017/* NOTE: A lot of things set to zero explicitly by call to
2018 * sk_alloc() so need not be done here.
2019 */
2020static int tcp_v6_init_sock(struct sock *sk)
2021{
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002022 struct inet_connection_sock *icsk = inet_csk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023 struct tcp_sock *tp = tcp_sk(sk);
2024
2025 skb_queue_head_init(&tp->out_of_order_queue);
2026 tcp_init_xmit_timers(sk);
2027 tcp_prequeue_init(tp);
2028
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002029 icsk->icsk_rto = TCP_TIMEOUT_INIT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 tp->mdev = TCP_TIMEOUT_INIT;
2031
2032 /* So many TCP implementations out there (incorrectly) count the
2033 * initial SYN frame in their delayed-ACK and congestion control
2034 * algorithms that we must have the following bandaid to talk
2035 * efficiently to them. -DaveM
2036 */
2037 tp->snd_cwnd = 2;
2038
2039 /* See draft-stevens-tcpca-spec-01 for discussion of the
2040 * initialization of these values.
2041 */
2042 tp->snd_ssthresh = 0x7fffffff;
2043 tp->snd_cwnd_clamp = ~0;
David S. Millerc1b4a7e2005-07-05 15:24:38 -07002044 tp->mss_cache = 536;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045
2046 tp->reordering = sysctl_tcp_reordering;
2047
2048 sk->sk_state = TCP_CLOSE;
2049
2050 tp->af_specific = &ipv6_specific;
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002051 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052 sk->sk_write_space = sk_stream_write_space;
2053 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2054
2055 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2056 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2057
2058 atomic_inc(&tcp_sockets_allocated);
2059
2060 return 0;
2061}
2062
2063static int tcp_v6_destroy_sock(struct sock *sk)
2064{
2065 extern int tcp_v4_destroy_sock(struct sock *sk);
2066
2067 tcp_v4_destroy_sock(sk);
2068 return inet6_destroy_sock(sk);
2069}
2070
2071/* Proc filesystem TCPv6 sock list dumping. */
2072static void get_openreq6(struct seq_file *seq,
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002073 struct sock *sk, struct request_sock *req, int i, int uid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074{
2075 struct in6_addr *dest, *src;
2076 int ttd = req->expires - jiffies;
2077
2078 if (ttd < 0)
2079 ttd = 0;
2080
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002081 src = &tcp6_rsk(req)->loc_addr;
2082 dest = &tcp6_rsk(req)->rmt_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 seq_printf(seq,
2084 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2085 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2086 i,
2087 src->s6_addr32[0], src->s6_addr32[1],
2088 src->s6_addr32[2], src->s6_addr32[3],
2089 ntohs(inet_sk(sk)->sport),
2090 dest->s6_addr32[0], dest->s6_addr32[1],
2091 dest->s6_addr32[2], dest->s6_addr32[3],
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002092 ntohs(inet_rsk(req)->rmt_port),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093 TCP_SYN_RECV,
2094 0,0, /* could print option size, but that is af dependent. */
2095 1, /* timers active (only the expire timer) */
2096 jiffies_to_clock_t(ttd),
2097 req->retrans,
2098 uid,
2099 0, /* non standard timer */
2100 0, /* open_requests have no inode */
2101 0, req);
2102}
2103
2104static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2105{
2106 struct in6_addr *dest, *src;
2107 __u16 destp, srcp;
2108 int timer_active;
2109 unsigned long timer_expires;
2110 struct inet_sock *inet = inet_sk(sp);
2111 struct tcp_sock *tp = tcp_sk(sp);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002112 const struct inet_connection_sock *icsk = inet_csk(sp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113 struct ipv6_pinfo *np = inet6_sk(sp);
2114
2115 dest = &np->daddr;
2116 src = &np->rcv_saddr;
2117 destp = ntohs(inet->dport);
2118 srcp = ntohs(inet->sport);
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002119
2120 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 timer_active = 1;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002122 timer_expires = icsk->icsk_timeout;
2123 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124 timer_active = 4;
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002125 timer_expires = icsk->icsk_timeout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126 } else if (timer_pending(&sp->sk_timer)) {
2127 timer_active = 2;
2128 timer_expires = sp->sk_timer.expires;
2129 } else {
2130 timer_active = 0;
2131 timer_expires = jiffies;
2132 }
2133
2134 seq_printf(seq,
2135 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2136 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2137 i,
2138 src->s6_addr32[0], src->s6_addr32[1],
2139 src->s6_addr32[2], src->s6_addr32[3], srcp,
2140 dest->s6_addr32[0], dest->s6_addr32[1],
2141 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2142 sp->sk_state,
2143 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2144 timer_active,
2145 jiffies_to_clock_t(timer_expires - jiffies),
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002146 icsk->icsk_retransmits,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 sock_i_uid(sp),
Arnaldo Carvalho de Melo6687e982005-08-10 04:03:31 -03002148 icsk->icsk_probes_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 sock_i_ino(sp),
2150 atomic_read(&sp->sk_refcnt), sp,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002151 icsk->icsk_rto,
2152 icsk->icsk_ack.ato,
2153 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2155 );
2156}
2157
2158static void get_timewait6_sock(struct seq_file *seq,
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002159 struct inet_timewait_sock *tw, int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160{
2161 struct in6_addr *dest, *src;
2162 __u16 destp, srcp;
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002163 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 int ttd = tw->tw_ttd - jiffies;
2165
2166 if (ttd < 0)
2167 ttd = 0;
2168
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002169 dest = &tcp6tw->tw_v6_daddr;
2170 src = &tcp6tw->tw_v6_rcv_saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 destp = ntohs(tw->tw_dport);
2172 srcp = ntohs(tw->tw_sport);
2173
2174 seq_printf(seq,
2175 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2176 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2177 i,
2178 src->s6_addr32[0], src->s6_addr32[1],
2179 src->s6_addr32[2], src->s6_addr32[3], srcp,
2180 dest->s6_addr32[0], dest->s6_addr32[1],
2181 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2182 tw->tw_substate, 0, 0,
2183 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2184 atomic_read(&tw->tw_refcnt), tw);
2185}
2186
2187#ifdef CONFIG_PROC_FS
2188static int tcp6_seq_show(struct seq_file *seq, void *v)
2189{
2190 struct tcp_iter_state *st;
2191
2192 if (v == SEQ_START_TOKEN) {
2193 seq_puts(seq,
2194 " sl "
2195 "local_address "
2196 "remote_address "
2197 "st tx_queue rx_queue tr tm->when retrnsmt"
2198 " uid timeout inode\n");
2199 goto out;
2200 }
2201 st = seq->private;
2202
2203 switch (st->state) {
2204 case TCP_SEQ_STATE_LISTENING:
2205 case TCP_SEQ_STATE_ESTABLISHED:
2206 get_tcp6_sock(seq, v, st->num);
2207 break;
2208 case TCP_SEQ_STATE_OPENREQ:
2209 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2210 break;
2211 case TCP_SEQ_STATE_TIME_WAIT:
2212 get_timewait6_sock(seq, v, st->num);
2213 break;
2214 }
2215out:
2216 return 0;
2217}
2218
2219static struct file_operations tcp6_seq_fops;
2220static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2221 .owner = THIS_MODULE,
2222 .name = "tcp6",
2223 .family = AF_INET6,
2224 .seq_show = tcp6_seq_show,
2225 .seq_fops = &tcp6_seq_fops,
2226};
2227
2228int __init tcp6_proc_init(void)
2229{
2230 return tcp_proc_register(&tcp6_seq_afinfo);
2231}
2232
2233void tcp6_proc_exit(void)
2234{
2235 tcp_proc_unregister(&tcp6_seq_afinfo);
2236}
2237#endif
2238
2239struct proto tcpv6_prot = {
2240 .name = "TCPv6",
2241 .owner = THIS_MODULE,
2242 .close = tcp_close,
2243 .connect = tcp_v6_connect,
2244 .disconnect = tcp_disconnect,
Arnaldo Carvalho de Melo463c84b2005-08-09 20:10:42 -07002245 .accept = inet_csk_accept,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 .ioctl = tcp_ioctl,
2247 .init = tcp_v6_init_sock,
2248 .destroy = tcp_v6_destroy_sock,
2249 .shutdown = tcp_shutdown,
2250 .setsockopt = tcp_setsockopt,
2251 .getsockopt = tcp_getsockopt,
2252 .sendmsg = tcp_sendmsg,
2253 .recvmsg = tcp_recvmsg,
2254 .backlog_rcv = tcp_v6_do_rcv,
2255 .hash = tcp_v6_hash,
2256 .unhash = tcp_unhash,
2257 .get_port = tcp_v6_get_port,
2258 .enter_memory_pressure = tcp_enter_memory_pressure,
2259 .sockets_allocated = &tcp_sockets_allocated,
2260 .memory_allocated = &tcp_memory_allocated,
2261 .memory_pressure = &tcp_memory_pressure,
Arnaldo Carvalho de Melo0a5578c2005-08-09 20:11:41 -07002262 .orphan_count = &tcp_orphan_count,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 .sysctl_mem = sysctl_tcp_mem,
2264 .sysctl_wmem = sysctl_tcp_wmem,
2265 .sysctl_rmem = sysctl_tcp_rmem,
2266 .max_header = MAX_TCP_HEADER,
2267 .obj_size = sizeof(struct tcp6_sock),
Arnaldo Carvalho de Melo8feaf0c2005-08-09 20:09:30 -07002268 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
Arnaldo Carvalho de Melo60236fd2005-06-18 22:47:21 -07002269 .rsk_prot = &tcp6_request_sock_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270};
2271
2272static struct inet6_protocol tcpv6_protocol = {
2273 .handler = tcp_v6_rcv,
2274 .err_handler = tcp_v6_err,
2275 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2276};
2277
2278extern struct proto_ops inet6_stream_ops;
2279
2280static struct inet_protosw tcpv6_protosw = {
2281 .type = SOCK_STREAM,
2282 .protocol = IPPROTO_TCP,
2283 .prot = &tcpv6_prot,
2284 .ops = &inet6_stream_ops,
2285 .capability = -1,
2286 .no_check = 0,
2287 .flags = INET_PROTOSW_PERMANENT,
2288};
2289
2290void __init tcpv6_init(void)
2291{
2292 /* register inet6 protocol */
2293 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2294 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2295 inet6_register_protosw(&tcpv6_protosw);
2296}