blob: 18a0e69c9dc75f709e15be01e7ef6def17de3eb5 [file] [log] [blame]
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -07001/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13#include <linux/dccp.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/if_arp.h>
22#include <linux/init.h>
23#include <linux/random.h>
24#include <net/checksum.h>
25
26#include <net/inet_common.h>
27#include <net/ip.h>
28#include <net/protocol.h>
29#include <net/sock.h>
30#include <net/xfrm.h>
31
32#include <asm/semaphore.h>
33#include <linux/spinlock.h>
34#include <linux/timer.h>
35#include <linux/delay.h>
36#include <linux/poll.h>
37#include <linux/dccp.h>
38
39#include "ccid.h"
40#include "dccp.h"
41
Eric Dumazetba899662005-08-26 12:05:31 -070042DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -070043
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46static struct net_protocol dccp_protocol = {
47 .handler = dccp_v4_rcv,
48 .err_handler = dccp_v4_err,
49};
50
51const char *dccp_packet_name(const int type)
52{
53 static const char *dccp_packet_names[] = {
54 [DCCP_PKT_REQUEST] = "REQUEST",
55 [DCCP_PKT_RESPONSE] = "RESPONSE",
56 [DCCP_PKT_DATA] = "DATA",
57 [DCCP_PKT_ACK] = "ACK",
58 [DCCP_PKT_DATAACK] = "DATAACK",
59 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60 [DCCP_PKT_CLOSE] = "CLOSE",
61 [DCCP_PKT_RESET] = "RESET",
62 [DCCP_PKT_SYNC] = "SYNC",
63 [DCCP_PKT_SYNCACK] = "SYNCACK",
64 };
65
66 if (type >= DCCP_NR_PKT_TYPES)
67 return "INVALID";
68 else
69 return dccp_packet_names[type];
70}
71
72EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74const char *dccp_state_name(const int state)
75{
76 static char *dccp_state_names[] = {
77 [DCCP_OPEN] = "OPEN",
78 [DCCP_REQUESTING] = "REQUESTING",
79 [DCCP_PARTOPEN] = "PARTOPEN",
80 [DCCP_LISTEN] = "LISTEN",
81 [DCCP_RESPOND] = "RESPOND",
82 [DCCP_CLOSING] = "CLOSING",
83 [DCCP_TIME_WAIT] = "TIME_WAIT",
84 [DCCP_CLOSED] = "CLOSED",
85 };
86
87 if (state >= DCCP_MAX_STATES)
88 return "INVALID STATE!";
89 else
90 return dccp_state_names[state];
91}
92
93EXPORT_SYMBOL_GPL(dccp_state_name);
94
95static inline int dccp_listen_start(struct sock *sk)
96{
97 dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
98 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99}
100
101int dccp_disconnect(struct sock *sk, int flags)
102{
103 struct inet_connection_sock *icsk = inet_csk(sk);
104 struct inet_sock *inet = inet_sk(sk);
105 int err = 0;
106 const int old_state = sk->sk_state;
107
108 if (old_state != DCCP_CLOSED)
109 dccp_set_state(sk, DCCP_CLOSED);
110
111 /* ABORT function of RFC793 */
112 if (old_state == DCCP_LISTEN) {
113 inet_csk_listen_stop(sk);
114 /* FIXME: do the active reset thing */
115 } else if (old_state == DCCP_REQUESTING)
116 sk->sk_err = ECONNRESET;
117
118 dccp_clear_xmit_timers(sk);
119 __skb_queue_purge(&sk->sk_receive_queue);
120 if (sk->sk_send_head != NULL) {
121 __kfree_skb(sk->sk_send_head);
122 sk->sk_send_head = NULL;
123 }
124
125 inet->dport = 0;
126
127 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
128 inet_reset_saddr(sk);
129
130 sk->sk_shutdown = 0;
131 sock_reset_flag(sk, SOCK_DONE);
132
133 icsk->icsk_backoff = 0;
134 inet_csk_delack_init(sk);
135 __sk_dst_reset(sk);
136
137 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
138
139 sk->sk_error_report(sk);
140 return err;
141}
142
Arnaldo Carvalho de Melo331968b2005-08-23 21:54:23 -0700143/*
144 * Wait for a DCCP event.
145 *
146 * Note that we don't need to lock the socket, as the upper poll layers
147 * take care of normal races (between the test and the event) and we don't
148 * go look at any of the socket buffers directly.
149 */
150static unsigned int dccp_poll(struct file *file, struct socket *sock,
151 poll_table *wait)
152{
153 unsigned int mask;
154 struct sock *sk = sock->sk;
155
156 poll_wait(file, sk->sk_sleep, wait);
157 if (sk->sk_state == DCCP_LISTEN)
158 return inet_csk_listen_poll(sk);
159
160 /* Socket is not locked. We are protected from async events
161 by poll logic and correct handling of state changes
162 made by another threads is impossible in any case.
163 */
164
165 mask = 0;
166 if (sk->sk_err)
167 mask = POLLERR;
168
169 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
170 mask |= POLLHUP;
171 if (sk->sk_shutdown & RCV_SHUTDOWN)
172 mask |= POLLIN | POLLRDNORM;
173
174 /* Connected? */
175 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
176 if (atomic_read(&sk->sk_rmem_alloc) > 0)
177 mask |= POLLIN | POLLRDNORM;
178
179 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
180 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
181 mask |= POLLOUT | POLLWRNORM;
182 } else { /* send SIGIO later */
183 set_bit(SOCK_ASYNC_NOSPACE,
184 &sk->sk_socket->flags);
185 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
186
187 /* Race breaker. If space is freed after
188 * wspace test but before the flags are set,
189 * IO signal will be lost.
190 */
191 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
192 mask |= POLLOUT | POLLWRNORM;
193 }
194 }
195 }
196 return mask;
197}
198
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700199int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
200{
201 dccp_pr_debug("entry\n");
202 return -ENOIOCTLCMD;
203}
204
205int dccp_setsockopt(struct sock *sk, int level, int optname,
Arnaldo Carvalho de Meloa1d3a352005-08-13 22:42:25 -0300206 char __user *optval, int optlen)
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700207{
Arnaldo Carvalho de Meloa84ffe42005-08-28 04:51:32 -0300208 struct dccp_sock *dp;
209 int err;
210 int val;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700211
212 if (level != SOL_DCCP)
213 return ip_setsockopt(sk, level, optname, optval, optlen);
214
Arnaldo Carvalho de Meloa84ffe42005-08-28 04:51:32 -0300215 if (optlen < sizeof(int))
216 return -EINVAL;
217
218 if (get_user(val, (int __user *)optval))
219 return -EFAULT;
220
221 lock_sock(sk);
222
223 dp = dccp_sk(sk);
224 err = 0;
225
226 switch (optname) {
227 case DCCP_SOCKOPT_PACKET_SIZE:
228 dp->dccps_packet_size = val;
229 break;
230 default:
231 err = -ENOPROTOOPT;
232 break;
233 }
234
235 release_sock(sk);
236 return err;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700237}
238
239int dccp_getsockopt(struct sock *sk, int level, int optname,
Arnaldo Carvalho de Meloa1d3a352005-08-13 22:42:25 -0300240 char __user *optval, int __user *optlen)
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700241{
Arnaldo Carvalho de Meloa84ffe42005-08-28 04:51:32 -0300242 struct dccp_sock *dp;
243 int val, len;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700244
245 if (level != SOL_DCCP)
246 return ip_getsockopt(sk, level, optname, optval, optlen);
247
Arnaldo Carvalho de Meloa84ffe42005-08-28 04:51:32 -0300248 if (get_user(len, optlen))
249 return -EFAULT;
250
251 len = min_t(unsigned int, len, sizeof(int));
252 if (len < 0)
253 return -EINVAL;
254
255 dp = dccp_sk(sk);
256
257 switch (optname) {
258 case DCCP_SOCKOPT_PACKET_SIZE:
259 val = dp->dccps_packet_size;
260 break;
261 default:
262 return -ENOPROTOOPT;
263 }
264
265 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
266 return -EFAULT;
267
268 return 0;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700269}
270
271int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
272 size_t len)
273{
274 const struct dccp_sock *dp = dccp_sk(sk);
275 const int flags = msg->msg_flags;
276 const int noblock = flags & MSG_DONTWAIT;
277 struct sk_buff *skb;
278 int rc, size;
279 long timeo;
280
281 if (len > dp->dccps_mss_cache)
282 return -EMSGSIZE;
283
284 lock_sock(sk);
Arnaldo Carvalho de Melo27258ee2005-08-09 20:30:56 -0700285 timeo = sock_sndtimeo(sk, noblock);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700286
287 /*
288 * We have to use sk_stream_wait_connect here to set sk_write_pending,
289 * so that the trick in dccp_rcv_request_sent_state_process.
290 */
291 /* Wait for a connection to finish. */
292 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
293 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
Arnaldo Carvalho de Melo27258ee2005-08-09 20:30:56 -0700294 goto out_release;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700295
296 size = sk->sk_prot->max_header + len;
297 release_sock(sk);
298 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
299 lock_sock(sk);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700300 if (skb == NULL)
301 goto out_release;
302
303 skb_reserve(skb, sk->sk_prot->max_header);
304 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
Arnaldo Carvalho de Melo27258ee2005-08-09 20:30:56 -0700305 if (rc != 0)
306 goto out_discard;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700307
Arnaldo Carvalho de Melod6809c12005-08-27 03:06:35 -0300308 rc = dccp_write_xmit(sk, skb, &timeo);
Arnaldo Carvalho de Melo20472af2005-08-23 21:50:21 -0700309 /*
310 * XXX we don't use sk_write_queue, so just discard the packet.
311 * Current plan however is to _use_ sk_write_queue with
312 * an algorith similar to tcp_sendmsg, where the main difference
313 * is that in DCCP we have to respect packet boundaries, so
314 * no coalescing of skbs.
315 *
316 * This bug was _quickly_ found & fixed by just looking at an OSTRA
317 * generated callgraph 8) -acme
318 */
319 if (rc != 0)
320 goto out_discard;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700321out_release:
322 release_sock(sk);
323 return rc ? : len;
Arnaldo Carvalho de Melo27258ee2005-08-09 20:30:56 -0700324out_discard:
325 kfree_skb(skb);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700326 goto out_release;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700327}
328
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700329int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
330 size_t len, int nonblock, int flags, int *addr_len)
331{
332 const struct dccp_hdr *dh;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700333 long timeo;
334
335 lock_sock(sk);
336
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300337 if (sk->sk_state == DCCP_LISTEN) {
338 len = -ENOTCONN;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700339 goto out;
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300340 }
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700341
342 timeo = sock_rcvtimeo(sk, nonblock);
343
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700344 do {
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300345 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700346
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300347 if (skb == NULL)
348 goto verify_sock_status;
349
350 dh = dccp_hdr(skb);
351
352 if (dh->dccph_type == DCCP_PKT_DATA ||
353 dh->dccph_type == DCCP_PKT_DATAACK)
354 goto found_ok_skb;
355
356 if (dh->dccph_type == DCCP_PKT_RESET ||
357 dh->dccph_type == DCCP_PKT_CLOSE) {
358 dccp_pr_debug("found fin ok!\n");
359 len = 0;
360 goto found_fin_ok;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700361 }
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300362 dccp_pr_debug("packet_type=%s\n",
363 dccp_packet_name(dh->dccph_type));
364 sk_eat_skb(sk, skb);
365verify_sock_status:
366 if (sock_flag(sk, SOCK_DONE)) {
367 len = 0;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700368 break;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700369 }
370
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300371 if (sk->sk_err) {
372 len = sock_error(sk);
373 break;
374 }
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700375
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300376 if (sk->sk_shutdown & RCV_SHUTDOWN) {
377 len = 0;
378 break;
379 }
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700380
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300381 if (sk->sk_state == DCCP_CLOSED) {
382 if (!sock_flag(sk, SOCK_DONE)) {
383 /* This occurs when user tries to read
384 * from never connected socket.
385 */
386 len = -ENOTCONN;
387 break;
388 }
389 len = 0;
390 break;
391 }
392
393 if (!timeo) {
394 len = -EAGAIN;
395 break;
396 }
397
398 if (signal_pending(current)) {
399 len = sock_intr_errno(timeo);
400 break;
401 }
402
403 sk_wait_data(sk, &timeo);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700404 continue;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700405 found_ok_skb:
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300406 if (len > skb->len)
407 len = skb->len;
408 else if (len < skb->len)
409 msg->msg_flags |= MSG_TRUNC;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700410
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300411 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
412 /* Exception. Bailout! */
413 len = -EFAULT;
414 break;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700415 }
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700416 found_fin_ok:
417 if (!(flags & MSG_PEEK))
418 sk_eat_skb(sk, skb);
419 break;
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300420 } while (1);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700421out:
422 release_sock(sk);
Arnaldo Carvalho de Melo531669a2005-08-13 20:35:17 -0300423 return len;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700424}
425
426static int inet_dccp_listen(struct socket *sock, int backlog)
427{
428 struct sock *sk = sock->sk;
429 unsigned char old_state;
430 int err;
431
432 lock_sock(sk);
433
434 err = -EINVAL;
435 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
436 goto out;
437
438 old_state = sk->sk_state;
439 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
440 goto out;
441
442 /* Really, if the socket is already in listen state
443 * we can only allow the backlog to be adjusted.
444 */
445 if (old_state != DCCP_LISTEN) {
446 /*
447 * FIXME: here it probably should be sk->sk_prot->listen_start
448 * see tcp_listen_start
449 */
450 err = dccp_listen_start(sk);
451 if (err)
452 goto out;
453 }
454 sk->sk_max_ack_backlog = backlog;
455 err = 0;
456
457out:
458 release_sock(sk);
459 return err;
460}
461
462static const unsigned char dccp_new_state[] = {
Arnaldo Carvalho de Melo7690af32005-08-13 20:34:54 -0300463 /* current state: new state: action: */
464 [0] = DCCP_CLOSED,
465 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
466 [DCCP_REQUESTING] = DCCP_CLOSED,
467 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
468 [DCCP_LISTEN] = DCCP_CLOSED,
469 [DCCP_RESPOND] = DCCP_CLOSED,
470 [DCCP_CLOSING] = DCCP_CLOSED,
471 [DCCP_TIME_WAIT] = DCCP_CLOSED,
472 [DCCP_CLOSED] = DCCP_CLOSED,
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700473};
474
475static int dccp_close_state(struct sock *sk)
476{
477 const int next = dccp_new_state[sk->sk_state];
478 const int ns = next & DCCP_STATE_MASK;
479
480 if (ns != sk->sk_state)
481 dccp_set_state(sk, ns);
482
483 return next & DCCP_ACTION_FIN;
484}
485
486void dccp_close(struct sock *sk, long timeout)
487{
488 struct sk_buff *skb;
489
490 lock_sock(sk);
491
492 sk->sk_shutdown = SHUTDOWN_MASK;
493
494 if (sk->sk_state == DCCP_LISTEN) {
495 dccp_set_state(sk, DCCP_CLOSED);
496
497 /* Special case. */
498 inet_csk_listen_stop(sk);
499
500 goto adjudge_to_death;
501 }
502
503 /*
504 * We need to flush the recv. buffs. We do this only on the
505 * descriptor close, not protocol-sourced closes, because the
506 *reader process may not have drained the data yet!
507 */
508 /* FIXME: check for unread data */
509 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
510 __kfree_skb(skb);
511 }
512
513 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
514 /* Check zero linger _after_ checking for unread data. */
515 sk->sk_prot->disconnect(sk, 0);
516 } else if (dccp_close_state(sk)) {
Arnaldo Carvalho de Melo7ad07e72005-08-23 21:50:06 -0700517 dccp_send_close(sk, 1);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700518 }
519
520 sk_stream_wait_close(sk, timeout);
521
522adjudge_to_death:
Arnaldo Carvalho de Melo7ad07e72005-08-23 21:50:06 -0700523 /*
524 * It is the last release_sock in its life. It will remove backlog.
525 */
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700526 release_sock(sk);
527 /*
528 * Now socket is owned by kernel and we acquire BH lock
529 * to finish close. No need to check for user refs.
530 */
531 local_bh_disable();
532 bh_lock_sock(sk);
533 BUG_TRAP(!sock_owned_by_user(sk));
534
535 sock_hold(sk);
536 sock_orphan(sk);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700537
Arnaldo Carvalho de Melo7ad07e72005-08-23 21:50:06 -0700538 /*
539 * The last release_sock may have processed the CLOSE or RESET
540 * packet moving sock to CLOSED state, if not we have to fire
541 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
542 * in draft-ietf-dccp-spec-11. -acme
543 */
544 if (sk->sk_state == DCCP_CLOSING) {
545 /* FIXME: should start at 2 * RTT */
546 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
547 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
548 inet_csk(sk)->icsk_rto,
549 DCCP_RTO_MAX);
550#if 0
551 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
552 dccp_set_state(sk, DCCP_CLOSED);
553#endif
554 }
555
556 atomic_inc(sk->sk_prot->orphan_count);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700557 if (sk->sk_state == DCCP_CLOSED)
558 inet_csk_destroy_sock(sk);
559
560 /* Otherwise, socket is reprieved until protocol close. */
561
562 bh_unlock_sock(sk);
563 local_bh_enable();
564 sock_put(sk);
565}
566
567void dccp_shutdown(struct sock *sk, int how)
568{
569 dccp_pr_debug("entry\n");
570}
571
Arnaldo Carvalho de Meloa1d3a352005-08-13 22:42:25 -0300572static struct proto_ops inet_dccp_ops = {
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700573 .family = PF_INET,
574 .owner = THIS_MODULE,
575 .release = inet_release,
576 .bind = inet_bind,
577 .connect = inet_stream_connect,
578 .socketpair = sock_no_socketpair,
579 .accept = inet_accept,
580 .getname = inet_getname,
Arnaldo Carvalho de Melo331968b2005-08-23 21:54:23 -0700581 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
582 .poll = dccp_poll,
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700583 .ioctl = inet_ioctl,
Arnaldo Carvalho de Melo7690af32005-08-13 20:34:54 -0300584 /* FIXME: work on inet_listen to rename it to sock_common_listen */
585 .listen = inet_dccp_listen,
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700586 .shutdown = inet_shutdown,
587 .setsockopt = sock_common_setsockopt,
588 .getsockopt = sock_common_getsockopt,
589 .sendmsg = inet_sendmsg,
590 .recvmsg = sock_common_recvmsg,
591 .mmap = sock_no_mmap,
592 .sendpage = sock_no_sendpage,
593};
594
595extern struct net_proto_family inet_family_ops;
596
597static struct inet_protosw dccp_v4_protosw = {
598 .type = SOCK_DCCP,
599 .protocol = IPPROTO_DCCP,
600 .prot = &dccp_v4_prot,
601 .ops = &inet_dccp_ops,
602 .capability = -1,
603 .no_check = 0,
604 .flags = 0,
605};
606
607/*
608 * This is the global socket data structure used for responding to
609 * the Out-of-the-blue (OOTB) packets. A control sock will be created
610 * for this socket at the initialization time.
611 */
612struct socket *dccp_ctl_socket;
613
614static char dccp_ctl_socket_err_msg[] __initdata =
615 KERN_ERR "DCCP: Failed to create the control socket.\n";
616
617static int __init dccp_ctl_sock_init(void)
618{
619 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
620 &dccp_ctl_socket);
621 if (rc < 0)
622 printk(dccp_ctl_socket_err_msg);
623 else {
624 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
625 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
626
627 /* Unhash it so that IP input processing does not even
628 * see it, we do not wish this socket to see incoming
629 * packets.
630 */
631 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
632 }
633
634 return rc;
635}
636
Arnaldo Carvalho de Melo725ba8e2005-08-13 20:35:39 -0300637#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
638void dccp_ctl_sock_exit(void)
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700639{
Arnaldo Carvalho de Melo54808552005-08-18 20:47:02 -0300640 if (dccp_ctl_socket != NULL) {
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700641 sock_release(dccp_ctl_socket);
Arnaldo Carvalho de Melo54808552005-08-18 20:47:02 -0300642 dccp_ctl_socket = NULL;
643 }
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700644}
645
Arnaldo Carvalho de Melo725ba8e2005-08-13 20:35:39 -0300646EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
647#endif
648
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700649static int __init init_dccp_v4_mibs(void)
650{
651 int rc = -ENOMEM;
652
653 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
654 if (dccp_statistics[0] == NULL)
655 goto out;
656
657 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
658 if (dccp_statistics[1] == NULL)
659 goto out_free_one;
660
661 rc = 0;
662out:
663 return rc;
664out_free_one:
665 free_percpu(dccp_statistics[0]);
666 dccp_statistics[0] = NULL;
667 goto out;
668
669}
670
671static int thash_entries;
672module_param(thash_entries, int, 0444);
673MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
674
Arnaldo Carvalho de Meloa1d3a352005-08-13 22:42:25 -0300675#ifdef CONFIG_IP_DCCP_DEBUG
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700676int dccp_debug;
677module_param(dccp_debug, int, 0444);
678MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
Arnaldo Carvalho de Meloa1d3a352005-08-13 22:42:25 -0300679#endif
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700680
681static int __init dccp_init(void)
682{
683 unsigned long goal;
684 int ehash_order, bhash_order, i;
685 int rc = proto_register(&dccp_v4_prot, 1);
686
687 if (rc)
688 goto out;
689
Arnaldo Carvalho de Melo7690af32005-08-13 20:34:54 -0300690 dccp_hashinfo.bind_bucket_cachep =
691 kmem_cache_create("dccp_bind_bucket",
692 sizeof(struct inet_bind_bucket), 0,
693 SLAB_HWCACHE_ALIGN, NULL, NULL);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700694 if (!dccp_hashinfo.bind_bucket_cachep)
695 goto out_proto_unregister;
696
697 /*
698 * Size and allocate the main established and bind bucket
699 * hash tables.
700 *
701 * The methodology is similar to that of the buffer cache.
702 */
703 if (num_physpages >= (128 * 1024))
704 goal = num_physpages >> (21 - PAGE_SHIFT);
705 else
706 goal = num_physpages >> (23 - PAGE_SHIFT);
707
708 if (thash_entries)
Arnaldo Carvalho de Melo7690af32005-08-13 20:34:54 -0300709 goal = (thash_entries *
710 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700711 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
712 ;
713 do {
714 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
715 sizeof(struct inet_ehash_bucket);
716 dccp_hashinfo.ehash_size >>= 1;
Arnaldo Carvalho de Melo7690af32005-08-13 20:34:54 -0300717 while (dccp_hashinfo.ehash_size &
718 (dccp_hashinfo.ehash_size - 1))
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700719 dccp_hashinfo.ehash_size--;
720 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
721 __get_free_pages(GFP_ATOMIC, ehash_order);
722 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
723
724 if (!dccp_hashinfo.ehash) {
725 printk(KERN_CRIT "Failed to allocate DCCP "
726 "established hash table\n");
727 goto out_free_bind_bucket_cachep;
728 }
729
730 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
731 rwlock_init(&dccp_hashinfo.ehash[i].lock);
732 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
733 }
734
735 bhash_order = ehash_order;
736
737 do {
738 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
739 sizeof(struct inet_bind_hashbucket);
Arnaldo Carvalho de Melo7690af32005-08-13 20:34:54 -0300740 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
741 bhash_order > 0)
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700742 continue;
743 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
744 __get_free_pages(GFP_ATOMIC, bhash_order);
745 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
746
747 if (!dccp_hashinfo.bhash) {
748 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
749 goto out_free_dccp_ehash;
750 }
751
752 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
753 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
754 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
755 }
756
757 if (init_dccp_v4_mibs())
758 goto out_free_dccp_bhash;
759
760 rc = -EAGAIN;
761 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
762 goto out_free_dccp_v4_mibs;
763
764 inet_register_protosw(&dccp_v4_protosw);
765
766 rc = dccp_ctl_sock_init();
767 if (rc)
768 goto out_unregister_protosw;
769out:
770 return rc;
771out_unregister_protosw:
772 inet_unregister_protosw(&dccp_v4_protosw);
773 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
774out_free_dccp_v4_mibs:
775 free_percpu(dccp_statistics[0]);
776 free_percpu(dccp_statistics[1]);
777 dccp_statistics[0] = dccp_statistics[1] = NULL;
778out_free_dccp_bhash:
779 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
780 dccp_hashinfo.bhash = NULL;
781out_free_dccp_ehash:
782 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
783 dccp_hashinfo.ehash = NULL;
784out_free_bind_bucket_cachep:
785 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
786 dccp_hashinfo.bind_bucket_cachep = NULL;
787out_proto_unregister:
788 proto_unregister(&dccp_v4_prot);
789 goto out;
790}
791
792static const char dccp_del_proto_err_msg[] __exitdata =
793 KERN_ERR "can't remove dccp net_protocol\n";
794
795static void __exit dccp_fini(void)
796{
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700797 inet_unregister_protosw(&dccp_v4_protosw);
798
799 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
800 printk(dccp_del_proto_err_msg);
801
Arnaldo Carvalho de Melo725ba8e2005-08-13 20:35:39 -0300802 free_percpu(dccp_statistics[0]);
803 free_percpu(dccp_statistics[1]);
804 free_pages((unsigned long)dccp_hashinfo.bhash,
805 get_order(dccp_hashinfo.bhash_size *
806 sizeof(struct inet_bind_hashbucket)));
807 free_pages((unsigned long)dccp_hashinfo.ehash,
808 get_order(dccp_hashinfo.ehash_size *
809 sizeof(struct inet_ehash_bucket)));
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700810 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
Arnaldo Carvalho de Melo725ba8e2005-08-13 20:35:39 -0300811 proto_unregister(&dccp_v4_prot);
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700812}
813
814module_init(dccp_init);
815module_exit(dccp_fini);
816
Arnaldo Carvalho de Melobb97d312005-08-09 20:19:14 -0700817/*
818 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
819 * values directly, Also cover the case where the protocol is not specified,
820 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
821 */
822MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
823MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
Arnaldo Carvalho de Melo7c657872005-08-09 20:14:34 -0700824MODULE_LICENSE("GPL");
825MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
826MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");