blob: c53d7673b57d5b68f7b7784daabba08c0b9c4c8b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The User Datagram Protocol (UDP).
7 *
8 * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Alan Cox, <Alan.Cox@linux.org>
14 * Hirokazu Takahashi, <taka@valinux.co.jp>
15 *
16 * Fixes:
17 * Alan Cox : verify_area() calls
18 * Alan Cox : stopped close while in use off icmp
19 * messages. Not a fix but a botch that
20 * for udp at least is 'valid'.
21 * Alan Cox : Fixed icmp handling properly
22 * Alan Cox : Correct error for oversized datagrams
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090023 * Alan Cox : Tidied select() semantics.
24 * Alan Cox : udp_err() fixed properly, also now
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 * select and read wake correctly on errors
26 * Alan Cox : udp_send verify_area moved to avoid mem leak
27 * Alan Cox : UDP can count its memory
28 * Alan Cox : send to an unknown connection causes
29 * an ECONNREFUSED off the icmp, but
30 * does NOT close.
31 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
32 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
33 * bug no longer crashes it.
34 * Fred Van Kempen : Net2e support for sk->broadcast.
35 * Alan Cox : Uses skb_free_datagram
36 * Alan Cox : Added get/set sockopt support.
37 * Alan Cox : Broadcasting without option set returns EACCES.
38 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
39 * Alan Cox : Use ip_tos and ip_ttl
40 * Alan Cox : SNMP Mibs
41 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
42 * Matt Dillon : UDP length checks.
43 * Alan Cox : Smarter af_inet used properly.
44 * Alan Cox : Use new kernel side addressing.
45 * Alan Cox : Incorrect return on truncated datagram receive.
46 * Arnt Gulbrandsen : New udp_send and stuff
47 * Alan Cox : Cache last socket
48 * Alan Cox : Route cache
49 * Jon Peatfield : Minor efficiency fix to sendto().
50 * Mike Shaver : RFC1122 checks.
51 * Alan Cox : Nonblocking error fix.
52 * Willy Konynenberg : Transparent proxying support.
53 * Mike McLagan : Routing by source
54 * David S. Miller : New socket lookup architecture.
55 * Last socket cache retained as it
56 * does have a high hit rate.
57 * Olaf Kirch : Don't linearise iovec on sendmsg.
58 * Andi Kleen : Some cleanups, cache destination entry
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090059 * for connect.
Linus Torvalds1da177e2005-04-16 15:20:36 -070060 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
61 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
62 * return ENOTCONN for unconnected sockets (POSIX)
63 * Janos Farkas : don't deliver multi/broadcasts to a different
64 * bound-to-device socket
65 * Hirokazu Takahashi : HW checksumming for outgoing UDP
66 * datagrams.
67 * Hirokazu Takahashi : sendfile() on UDP works now.
68 * Arnaldo C. Melo : convert /proc/net/udp to seq_file
69 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
70 * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
71 * a single port at the same time.
72 * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
James Chapman342f0232007-06-27 15:37:46 -070073 * James Chapman : Add L2TP encapsulation type.
Linus Torvalds1da177e2005-04-16 15:20:36 -070074 *
75 *
76 * This program is free software; you can redistribute it and/or
77 * modify it under the terms of the GNU General Public License
78 * as published by the Free Software Foundation; either version
79 * 2 of the License, or (at your option) any later version.
80 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090081
Linus Torvalds1da177e2005-04-16 15:20:36 -070082#include <asm/system.h>
83#include <asm/uaccess.h>
84#include <asm/ioctls.h>
Hideo Aoki95766ff2007-12-31 00:29:24 -080085#include <linux/bootmem.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070086#include <linux/types.h>
87#include <linux/fcntl.h>
88#include <linux/module.h>
89#include <linux/socket.h>
90#include <linux/sockios.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020091#include <linux/igmp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070092#include <linux/in.h>
93#include <linux/errno.h>
94#include <linux/timer.h>
95#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <linux/inet.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <linux/netdevice.h>
Arnaldo Carvalho de Meloc752f072005-08-09 20:08:28 -070098#include <net/tcp_states.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <linux/skbuff.h>
100#include <linux/proc_fs.h>
101#include <linux/seq_file.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200102#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103#include <net/icmp.h>
104#include <net/route.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105#include <net/checksum.h>
106#include <net/xfrm.h>
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800107#include "udp_impl.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
109/*
110 * Snmp MIB for the UDP layer
111 */
112
Eric Dumazetba899662005-08-26 12:05:31 -0700113DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
Herbert Xu1781f7f2007-12-11 11:30:32 -0800114EXPORT_SYMBOL(udp_statistics);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
Herbert Xu9055e052007-12-14 11:25:26 -0800116DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
117EXPORT_SYMBOL(udp_stats_in6);
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119struct hlist_head udp_hash[UDP_HTABLE_SIZE];
120DEFINE_RWLOCK(udp_hash_lock);
121
Hideo Aoki95766ff2007-12-31 00:29:24 -0800122int sysctl_udp_mem[3] __read_mostly;
123int sysctl_udp_rmem_min __read_mostly;
124int sysctl_udp_wmem_min __read_mostly;
125
126EXPORT_SYMBOL(sysctl_udp_mem);
127EXPORT_SYMBOL(sysctl_udp_rmem_min);
128EXPORT_SYMBOL(sysctl_udp_wmem_min);
129
130atomic_t udp_memory_allocated;
131EXPORT_SYMBOL(udp_memory_allocated);
132
Pavel Emelyanovfa4d3c62008-01-31 05:07:57 -0800133static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700134 const struct hlist_head udptable[])
Gerrit Renker25030a72006-08-26 20:06:05 -0700135{
136 struct sock *sk;
137 struct hlist_node *node;
138
David S. Millerdf2bc452007-06-05 15:18:43 -0700139 sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
Pavel Emelyanovfa4d3c62008-01-31 05:07:57 -0800140 if (sk->sk_net == net && sk->sk_hash == num)
Gerrit Renker25030a72006-08-26 20:06:05 -0700141 return 1;
142 return 0;
143}
144
145/**
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800146 * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
Gerrit Renker25030a72006-08-26 20:06:05 -0700147 *
148 * @sk: socket struct in question
149 * @snum: port number to look up
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800150 * @udptable: hash list table, must be of UDP_HTABLE_SIZE
David S. Millerdf2bc452007-06-05 15:18:43 -0700151 * @saddr_comp: AF-dependent comparison of bound local IP addresses
Gerrit Renker25030a72006-08-26 20:06:05 -0700152 */
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800153int __udp_lib_get_port(struct sock *sk, unsigned short snum,
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700154 struct hlist_head udptable[],
David S. Millerdf2bc452007-06-05 15:18:43 -0700155 int (*saddr_comp)(const struct sock *sk1,
156 const struct sock *sk2 ) )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157{
158 struct hlist_node *node;
Gerrit Renker25030a72006-08-26 20:06:05 -0700159 struct hlist_head *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 struct sock *sk2;
Gerrit Renker25030a72006-08-26 20:06:05 -0700161 int error = 1;
Pavel Emelyanovfa4d3c62008-01-31 05:07:57 -0800162 struct net *net = sk->sk_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
164 write_lock_bh(&udp_hash_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700166 if (!snum) {
Anton Arapova25de532007-10-18 22:00:17 -0700167 int i, low, high, remaining;
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700168 unsigned rover, best, best_size_so_far;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169
Stephen Hemminger227b60f2007-10-10 17:30:46 -0700170 inet_get_local_port_range(&low, &high);
Anton Arapova25de532007-10-18 22:00:17 -0700171 remaining = (high - low) + 1;
Stephen Hemminger227b60f2007-10-10 17:30:46 -0700172
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700173 best_size_so_far = UINT_MAX;
Anton Arapova25de532007-10-18 22:00:17 -0700174 best = rover = net_random() % remaining + low;
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700175
176 /* 1st pass: look for empty (or shortest) hash chain */
177 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
178 int size = 0;
179
180 head = &udptable[rover & (UDP_HTABLE_SIZE - 1)];
181 if (hlist_empty(head))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 goto gotit;
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700183
David S. Miller5c668702006-12-22 11:42:26 -0800184 sk_for_each(sk2, node, head) {
185 if (++size >= best_size_so_far)
186 goto next;
187 }
188 best_size_so_far = size;
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700189 best = rover;
David S. Miller5c668702006-12-22 11:42:26 -0800190 next:
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700191 /* fold back if end of range */
192 if (++rover > high)
193 rover = low + ((rover - low)
194 & (UDP_HTABLE_SIZE - 1));
195
196
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 }
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700198
199 /* 2nd pass: find hole in shortest hash chain */
200 rover = best;
201 for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) {
Pavel Emelyanovfa4d3c62008-01-31 05:07:57 -0800202 if (! __udp_lib_lport_inuse(net, rover, udptable))
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700203 goto gotit;
204 rover += UDP_HTABLE_SIZE;
205 if (rover > high)
206 rover = low + ((rover - low)
207 & (UDP_HTABLE_SIZE - 1));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 }
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700209
210
211 /* All ports in use! */
212 goto fail;
213
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214gotit:
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700215 snum = rover;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 } else {
David S. Millerdf2bc452007-06-05 15:18:43 -0700217 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218
Gerrit Renker25030a72006-08-26 20:06:05 -0700219 sk_for_each(sk2, node, head)
David S. Millerdf2bc452007-06-05 15:18:43 -0700220 if (sk2->sk_hash == snum &&
221 sk2 != sk &&
Pavel Emelyanovfa4d3c62008-01-31 05:07:57 -0800222 sk2->sk_net == net &&
David S. Millerdf2bc452007-06-05 15:18:43 -0700223 (!sk2->sk_reuse || !sk->sk_reuse) &&
224 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
225 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
226 (*saddr_comp)(sk, sk2) )
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 goto fail;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 }
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700229
Gerrit Renker25030a72006-08-26 20:06:05 -0700230 inet_sk(sk)->num = snum;
David S. Millerdf2bc452007-06-05 15:18:43 -0700231 sk->sk_hash = snum;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 if (sk_unhashed(sk)) {
David S. Millerdf2bc452007-06-05 15:18:43 -0700233 head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
Gerrit Renker25030a72006-08-26 20:06:05 -0700234 sk_add_node(sk, head);
Eric Dumazet65f76512008-01-03 20:46:48 -0800235 sock_prot_inuse_add(sk->sk_prot, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 }
Gerrit Renker25030a72006-08-26 20:06:05 -0700237 error = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238fail:
239 write_unlock_bh(&udp_hash_lock);
Gerrit Renker25030a72006-08-26 20:06:05 -0700240 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241}
242
Stephen Hemminger3fbe0702007-03-08 20:46:41 -0800243int udp_get_port(struct sock *sk, unsigned short snum,
David S. Millerdf2bc452007-06-05 15:18:43 -0700244 int (*scmp)(const struct sock *, const struct sock *))
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800245{
Stephen Hemminger32c1da72007-08-24 23:09:41 -0700246 return __udp_lib_get_port(sk, snum, udp_hash, scmp);
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800247}
248
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249/*
250 * IOCTL requests applicable to the UDP protocol
251 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900252
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
254{
Stephen Hemminger6516c652007-03-08 20:41:55 -0800255 switch (cmd) {
256 case SIOCOUTQ:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 {
Stephen Hemminger6516c652007-03-08 20:41:55 -0800258 int amount = atomic_read(&sk->sk_wmem_alloc);
259 return put_user(amount, (int __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 }
Stephen Hemminger6516c652007-03-08 20:41:55 -0800261
262 case SIOCINQ:
263 {
264 struct sk_buff *skb;
265 unsigned long amount;
266
267 amount = 0;
268 spin_lock_bh(&sk->sk_receive_queue.lock);
269 skb = skb_peek(&sk->sk_receive_queue);
270 if (skb != NULL) {
271 /*
272 * We will only return the amount
273 * of this packet since that is all
274 * that will be read.
275 */
276 amount = skb->len - sizeof(struct udphdr);
277 }
278 spin_unlock_bh(&sk->sk_receive_queue.lock);
279 return put_user(amount, (int __user *)arg);
280 }
281
282 default:
283 return -ENOIOCTLCMD;
284 }
285
286 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287}
288
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289int udp_disconnect(struct sock *sk, int flags)
290{
291 struct inet_sock *inet = inet_sk(sk);
292 /*
293 * 1003.1g - break association.
294 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900295
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 sk->sk_state = TCP_CLOSE;
297 inet->daddr = 0;
298 inet->dport = 0;
299 sk->sk_bound_dev_if = 0;
300 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
301 inet_reset_saddr(sk);
302
303 if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) {
304 sk->sk_prot->unhash(sk);
305 inet->sport = 0;
306 }
307 sk_dst_reset(sk);
308 return 0;
309}
310
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311/*
312 * Socket option code for UDP
313 */
Gerrit Renker4c0a6cb2006-11-27 09:29:59 -0800314int udp_lib_setsockopt(struct sock *sk, int level, int optname,
315 char __user *optval, int optlen,
316 int (*push_pending_frames)(struct sock *))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317{
318 struct udp_sock *up = udp_sk(sk);
319 int val;
320 int err = 0;
YOSHIFUJI Hideakie898d4d2008-03-01 01:06:47 +0900321#ifdef CONFIG_IP_UDPLITE
Wang Chenb2bf1e22007-12-03 22:34:16 +1100322 int is_udplite = IS_UDPLITE(sk);
YOSHIFUJI Hideakie898d4d2008-03-01 01:06:47 +0900323#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324
Stephen Hemminger6516c652007-03-08 20:41:55 -0800325 if (optlen<sizeof(int))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326 return -EINVAL;
327
328 if (get_user(val, (int __user *)optval))
329 return -EFAULT;
330
Stephen Hemminger6516c652007-03-08 20:41:55 -0800331 switch (optname) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 case UDP_CORK:
333 if (val != 0) {
334 up->corkflag = 1;
335 } else {
336 up->corkflag = 0;
337 lock_sock(sk);
Gerrit Renker4c0a6cb2006-11-27 09:29:59 -0800338 (*push_pending_frames)(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 release_sock(sk);
340 }
341 break;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900342
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 case UDP_ENCAP:
344 switch (val) {
345 case 0:
346 case UDP_ENCAP_ESPINUDP:
347 case UDP_ENCAP_ESPINUDP_NON_IKE:
James Chapman067b2072007-07-05 17:08:05 -0700348 up->encap_rcv = xfrm4_udp_encap_rcv;
349 /* FALLTHROUGH */
James Chapman342f0232007-06-27 15:37:46 -0700350 case UDP_ENCAP_L2TPINUDP:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 up->encap_type = val;
352 break;
353 default:
354 err = -ENOPROTOOPT;
355 break;
356 }
357 break;
358
YOSHIFUJI Hideakie898d4d2008-03-01 01:06:47 +0900359#ifdef CONFIG_IP_UDPLITE
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800360 /*
361 * UDP-Lite's partial checksum coverage (RFC 3828).
362 */
363 /* The sender sets actual checksum coverage length via this option.
364 * The case coverage > packet length is handled by send module. */
365 case UDPLITE_SEND_CSCOV:
Wang Chenb2bf1e22007-12-03 22:34:16 +1100366 if (!is_udplite) /* Disable the option on UDP sockets */
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800367 return -ENOPROTOOPT;
368 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
369 val = 8;
370 up->pcslen = val;
371 up->pcflag |= UDPLITE_SEND_CC;
372 break;
373
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900374 /* The receiver specifies a minimum checksum coverage value. To make
375 * sense, this should be set to at least 8 (as done below). If zero is
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800376 * used, this again means full checksum coverage. */
377 case UDPLITE_RECV_CSCOV:
Wang Chenb2bf1e22007-12-03 22:34:16 +1100378 if (!is_udplite) /* Disable the option on UDP sockets */
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800379 return -ENOPROTOOPT;
380 if (val != 0 && val < 8) /* Avoid silly minimal values. */
381 val = 8;
382 up->pcrlen = val;
383 up->pcflag |= UDPLITE_RECV_CC;
384 break;
YOSHIFUJI Hideakie898d4d2008-03-01 01:06:47 +0900385#endif
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800386
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 default:
388 err = -ENOPROTOOPT;
389 break;
Stephen Hemminger6516c652007-03-08 20:41:55 -0800390 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391
392 return err;
393}
394
Gerrit Renker4c0a6cb2006-11-27 09:29:59 -0800395int udp_lib_getsockopt(struct sock *sk, int level, int optname,
396 char __user *optval, int __user *optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397{
398 struct udp_sock *up = udp_sk(sk);
399 int val, len;
400
Stephen Hemminger6516c652007-03-08 20:41:55 -0800401 if (get_user(len,optlen))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 return -EFAULT;
403
404 len = min_t(unsigned int, len, sizeof(int));
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900405
Stephen Hemminger6516c652007-03-08 20:41:55 -0800406 if (len < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 return -EINVAL;
408
Stephen Hemminger6516c652007-03-08 20:41:55 -0800409 switch (optname) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 case UDP_CORK:
411 val = up->corkflag;
412 break;
413
414 case UDP_ENCAP:
415 val = up->encap_type;
416 break;
417
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800418 /* The following two cannot be changed on UDP sockets, the return is
419 * always 0 (which corresponds to the full checksum coverage of UDP). */
420 case UDPLITE_SEND_CSCOV:
421 val = up->pcslen;
422 break;
423
424 case UDPLITE_RECV_CSCOV:
425 val = up->pcrlen;
426 break;
427
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 default:
429 return -ENOPROTOOPT;
Stephen Hemminger6516c652007-03-08 20:41:55 -0800430 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
Stephen Hemminger6516c652007-03-08 20:41:55 -0800432 if (put_user(len, optlen))
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900433 return -EFAULT;
Stephen Hemminger6516c652007-03-08 20:41:55 -0800434 if (copy_to_user(optval, &val,len))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 return -EFAULT;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900436 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437}
438
439/**
440 * udp_poll - wait for a UDP event.
441 * @file - file struct
442 * @sock - socket
443 * @wait - poll table
444 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900445 * This is same as datagram poll, except for the special case of
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 * blocking sockets. If application is using a blocking fd
447 * and a packet with checksum error is in the queue;
448 * then it could get return from select indicating data available
449 * but then block when reading it. Add special case code
450 * to work around these arguably broken applications.
451 */
452unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
453{
454 unsigned int mask = datagram_poll(file, sock, wait);
455 struct sock *sk = sock->sk;
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800456 int is_lite = IS_UDPLITE(sk);
457
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 /* Check for false positives due to checksum errors */
459 if ( (mask & POLLRDNORM) &&
460 !(file->f_flags & O_NONBLOCK) &&
461 !(sk->sk_shutdown & RCV_SHUTDOWN)){
462 struct sk_buff_head *rcvq = &sk->sk_receive_queue;
463 struct sk_buff *skb;
464
Herbert Xu208d8982005-05-30 15:50:15 -0700465 spin_lock_bh(&rcvq->lock);
Herbert Xu759e5d02007-03-25 20:10:56 -0700466 while ((skb = skb_peek(rcvq)) != NULL &&
467 udp_lib_checksum_complete(skb)) {
468 UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
469 __skb_unlink(skb, rcvq);
470 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 }
Herbert Xu208d8982005-05-30 15:50:15 -0700472 spin_unlock_bh(&rcvq->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473
474 /* nothing to see, move along */
475 if (skb == NULL)
476 mask &= ~(POLLIN | POLLRDNORM);
477 }
478
479 return mask;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900480
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481}
482
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483
484/* ------------------------------------------------------------------------ */
485#ifdef CONFIG_PROC_FS
486
487static struct sock *udp_get_first(struct seq_file *seq)
488{
489 struct sock *sk;
490 struct udp_iter_state *state = seq->private;
491
492 for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) {
493 struct hlist_node *node;
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800494 sk_for_each(sk, node, state->hashtable + state->bucket) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 if (sk->sk_family == state->family)
496 goto found;
497 }
498 }
499 sk = NULL;
500found:
501 return sk;
502}
503
504static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
505{
506 struct udp_iter_state *state = seq->private;
507
508 do {
509 sk = sk_next(sk);
510try_again:
511 ;
512 } while (sk && sk->sk_family != state->family);
513
514 if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800515 sk = sk_head(state->hashtable + state->bucket);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 goto try_again;
517 }
518 return sk;
519}
520
521static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
522{
523 struct sock *sk = udp_get_first(seq);
524
525 if (sk)
Stephen Hemminger6516c652007-03-08 20:41:55 -0800526 while (pos && (sk = udp_get_next(seq, sk)) != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 --pos;
528 return pos ? NULL : sk;
529}
530
531static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -0800532 __acquires(udp_hash_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533{
534 read_lock(&udp_hash_lock);
535 return *pos ? udp_get_idx(seq, *pos-1) : (void *)1;
536}
537
538static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
539{
540 struct sock *sk;
541
542 if (v == (void *)1)
543 sk = udp_get_idx(seq, 0);
544 else
545 sk = udp_get_next(seq, v);
546
547 ++*pos;
548 return sk;
549}
550
551static void udp_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -0800552 __releases(udp_hash_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553{
554 read_unlock(&udp_hash_lock);
555}
556
557static int udp_seq_open(struct inode *inode, struct file *file)
558{
559 struct udp_seq_afinfo *afinfo = PDE(inode)->data;
560 struct seq_file *seq;
561 int rc = -ENOMEM;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700562 struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
564 if (!s)
565 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 s->family = afinfo->family;
Gerrit Renkerba4e58e2006-11-27 11:10:57 -0800567 s->hashtable = afinfo->hashtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 s->seq_ops.start = udp_seq_start;
569 s->seq_ops.next = udp_seq_next;
570 s->seq_ops.show = afinfo->seq_show;
571 s->seq_ops.stop = udp_seq_stop;
572
573 rc = seq_open(file, &s->seq_ops);
574 if (rc)
575 goto out_kfree;
576
577 seq = file->private_data;
578 seq->private = s;
579out:
580 return rc;
581out_kfree:
582 kfree(s);
583 goto out;
584}
585
586/* ------------------------------------------------------------------------ */
587int udp_proc_register(struct udp_seq_afinfo *afinfo)
588{
589 struct proc_dir_entry *p;
590 int rc = 0;
591
592 if (!afinfo)
593 return -EINVAL;
594 afinfo->seq_fops->owner = afinfo->owner;
595 afinfo->seq_fops->open = udp_seq_open;
596 afinfo->seq_fops->read = seq_read;
597 afinfo->seq_fops->llseek = seq_lseek;
598 afinfo->seq_fops->release = seq_release_private;
599
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200600 p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 if (p)
602 p->data = afinfo;
603 else
604 rc = -ENOMEM;
605 return rc;
606}
607
608void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
609{
610 if (!afinfo)
611 return;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200612 proc_net_remove(&init_net, afinfo->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
614}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615#endif /* CONFIG_PROC_FS */
616
Hideo Aoki95766ff2007-12-31 00:29:24 -0800617void __init udp_init(void)
618{
619 unsigned long limit;
620
621 /* Set the pressure threshold up by the same strategy of TCP. It is a
622 * fraction of global memory that is up to 1/2 at 256 MB, decreasing
623 * toward zero with the amount of memory, with a floor of 128 pages.
624 */
625 limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
626 limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
627 limit = max(limit, 128UL);
628 sysctl_udp_mem[0] = limit / 4 * 3;
629 sysctl_udp_mem[1] = limit;
630 sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2;
631
632 sysctl_udp_rmem_min = SK_MEM_QUANTUM;
633 sysctl_udp_wmem_min = SK_MEM_QUANTUM;
634}
635
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636EXPORT_SYMBOL(udp_disconnect);
637EXPORT_SYMBOL(udp_hash);
638EXPORT_SYMBOL(udp_hash_lock);
639EXPORT_SYMBOL(udp_ioctl);
Gerrit Renker25030a72006-08-26 20:06:05 -0700640EXPORT_SYMBOL(udp_get_port);
Gerrit Renker4c0a6cb2006-11-27 09:29:59 -0800641EXPORT_SYMBOL(udp_lib_getsockopt);
642EXPORT_SYMBOL(udp_lib_setsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643EXPORT_SYMBOL(udp_poll);
644
645#ifdef CONFIG_PROC_FS
646EXPORT_SYMBOL(udp_proc_register);
647EXPORT_SYMBOL(udp_proc_unregister);
648#endif