blob: 190de61cd648db8400ec852e40a652a6897a9e7d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
10 * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
11 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070012 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Alan Cox, <A.Cox@swansea.ac.uk>
16 *
17 * Fixes:
18 * Alan Cox : Numerous verify_area() problems
19 * Alan Cox : Connecting on a connecting socket
20 * now returns an error for tcp.
21 * Alan Cox : sock->protocol is set correctly.
22 * and is not sometimes left as 0.
23 * Alan Cox : connect handles icmp errors on a
24 * connect properly. Unfortunately there
25 * is a restart syscall nasty there. I
26 * can't match BSD without hacking the C
27 * library. Ideas urgently sought!
28 * Alan Cox : Disallow bind() to addresses that are
29 * not ours - especially broadcast ones!!
30 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
31 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
32 * instead they leave that for the DESTROY timer.
33 * Alan Cox : Clean up error flag in accept
34 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
35 * was buggy. Put a remove_sock() in the handler
36 * for memory when we hit 0. Also altered the timer
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +090037 * code. The ACK stuff can wait and needs major
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * TCP layer surgery.
39 * Alan Cox : Fixed TCP ack bug, removed remove sock
40 * and fixed timer/inet_bh race.
41 * Alan Cox : Added zapped flag for TCP
42 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
43 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
44 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
45 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
46 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
47 * Rick Sladkey : Relaxed UDP rules for matching packets.
48 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
49 * Pauline Middelink : identd support
50 * Alan Cox : Fixed connect() taking signals I think.
51 * Alan Cox : SO_LINGER supported
52 * Alan Cox : Error reporting fixes
53 * Anonymous : inet_create tidied up (sk->reuse setting)
54 * Alan Cox : inet sockets don't set sk->type!
55 * Alan Cox : Split socket option code
56 * Alan Cox : Callbacks
57 * Alan Cox : Nagle flag for Charles & Johannes stuff
58 * Alex : Removed restriction on inet fioctl
59 * Alan Cox : Splitting INET from NET core
60 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
61 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
62 * Alan Cox : Split IP from generic code
63 * Alan Cox : New kfree_skbmem()
64 * Alan Cox : Make SO_DEBUG superuser only.
65 * Alan Cox : Allow anyone to clear SO_DEBUG
66 * (compatibility fix)
67 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
68 * Alan Cox : Allocator for a socket is settable.
69 * Alan Cox : SO_ERROR includes soft errors.
70 * Alan Cox : Allow NULL arguments on some SO_ opts
71 * Alan Cox : Generic socket allocation to make hooks
72 * easier (suggested by Craig Metz).
73 * Michael Pall : SO_ERROR returns positive errno again
74 * Steve Whitehouse: Added default destructor to free
75 * protocol private data.
76 * Steve Whitehouse: Added various other default routines
77 * common to several socket families.
78 * Chris Evans : Call suser() check last on F_SETOWN
79 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
80 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
81 * Andi Kleen : Fix write_space callback
82 * Chris Evans : Security fixes - signedness again
83 * Arnaldo C. Melo : cleanups, use skb_queue_purge
84 *
85 * To Fix:
86 *
87 *
88 * This program is free software; you can redistribute it and/or
89 * modify it under the terms of the GNU General Public License
90 * as published by the Free Software Foundation; either version
91 * 2 of the License, or (at your option) any later version.
92 */
93
Randy Dunlap4fc268d2006-01-11 12:17:47 -080094#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070095#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -0400114#include <linux/highmem.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
116#include <asm/uaccess.h>
117#include <asm/system.h>
118
119#include <linux/netdevice.h>
120#include <net/protocol.h>
121#include <linux/skbuff.h>
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700122#include <net/request_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123#include <net/sock.h>
124#include <net/xfrm.h>
125#include <linux/ipsec.h>
126
127#include <linux/filter.h>
128
129#ifdef CONFIG_INET
130#include <net/tcp.h>
131#endif
132
Ingo Molnarda21f242006-07-03 00:25:12 -0700133/*
134 * Each address family might have different locking rules, so we have
135 * one slock key per address family:
136 */
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700137static struct lock_class_key af_family_keys[AF_MAX];
138static struct lock_class_key af_family_slock_keys[AF_MAX];
139
140#ifdef CONFIG_DEBUG_LOCK_ALLOC
141/*
142 * Make lock validator output more readable. (we pre-construct these
143 * strings build-time, so that runtime initialization of socket
144 * locks is fast):
145 */
146static const char *af_family_key_strings[AF_MAX+1] = {
147 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
148 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
149 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
150 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
151 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
152 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
153 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
154 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
155 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
156 "sk_lock-27" , "sk_lock-28" , "sk_lock-29" ,
David Howells17926a72007-04-26 15:48:28 -0700157 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
158 "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700159};
160static const char *af_family_slock_key_strings[AF_MAX+1] = {
161 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
162 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
163 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
164 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
165 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
166 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
167 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
168 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
169 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
170 "slock-27" , "slock-28" , "slock-29" ,
David Howells17926a72007-04-26 15:48:28 -0700171 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
172 "slock-AF_RXRPC" , "slock-AF_MAX"
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700173};
Peter Zijlstra443aef02007-07-19 01:49:00 -0700174static const char *af_family_clock_key_strings[AF_MAX+1] = {
175 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
176 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
177 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
178 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
179 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
180 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
181 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
182 "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" ,
183 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
184 "clock-27" , "clock-28" , "clock-29" ,
David Howellse51f8022007-07-21 19:30:16 -0700185 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
186 "clock-AF_RXRPC" , "clock-AF_MAX"
Peter Zijlstra443aef02007-07-19 01:49:00 -0700187};
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700188#endif
Ingo Molnarda21f242006-07-03 00:25:12 -0700189
190/*
191 * sk_callback_lock locking rules are per-address-family,
192 * so split the lock classes by using a per-AF key:
193 */
194static struct lock_class_key af_callback_keys[AF_MAX];
195
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196/* Take into consideration the size of the struct sk_buff overhead in the
197 * determination of these values, since that is non-constant across
198 * platforms. This makes socket queueing behavior and performance
199 * not depend upon such differences.
200 */
201#define _SK_MEM_PACKETS 256
202#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
203#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
204#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205
206/* Run time adjustable parameters. */
Brian Haleyab32ea52006-09-22 14:15:41 -0700207__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
208__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
209__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
210__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
212/* Maximal space eaten by iovec or ancilliary data plus some space */
Brian Haleyab32ea52006-09-22 14:15:41 -0700213int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214
215static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
216{
217 struct timeval tv;
218
219 if (optlen < sizeof(tv))
220 return -EINVAL;
221 if (copy_from_user(&tv, optval, sizeof(tv)))
222 return -EFAULT;
Vasily Averinba780732007-05-24 16:58:54 -0700223 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
224 return -EDOM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225
Vasily Averinba780732007-05-24 16:58:54 -0700226 if (tv.tv_sec < 0) {
Andrew Morton6f11df82007-07-09 13:16:00 -0700227 static int warned __read_mostly;
228
Vasily Averinba780732007-05-24 16:58:54 -0700229 *timeo_p = 0;
230 if (warned < 10 && net_ratelimit())
231 warned++;
232 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
233 "tries to set negative timeout\n",
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +0900234 current->comm, current->pid);
Vasily Averinba780732007-05-24 16:58:54 -0700235 return 0;
236 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 *timeo_p = MAX_SCHEDULE_TIMEOUT;
238 if (tv.tv_sec == 0 && tv.tv_usec == 0)
239 return 0;
240 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
241 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
242 return 0;
243}
244
245static void sock_warn_obsolete_bsdism(const char *name)
246{
247 static int warned;
248 static char warncomm[TASK_COMM_LEN];
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900249 if (strcmp(warncomm, current->comm) && warned < 5) {
250 strcpy(warncomm, current->comm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 printk(KERN_WARNING "process `%s' is using obsolete "
252 "%s SO_BSDCOMPAT\n", warncomm, name);
253 warned++;
254 }
255}
256
257static void sock_disable_timestamp(struct sock *sk)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900258{
259 if (sock_flag(sk, SOCK_TIMESTAMP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 sock_reset_flag(sk, SOCK_TIMESTAMP);
261 net_disable_timestamp();
262 }
263}
264
265
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800266int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
267{
268 int err = 0;
269 int skb_len;
270
271 /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
272 number of warnings when compiling with -W --ANK
273 */
274 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
275 (unsigned)sk->sk_rcvbuf) {
276 err = -ENOMEM;
277 goto out;
278 }
279
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700280 err = sk_filter(sk, skb);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800281 if (err)
282 goto out;
283
284 skb->dev = NULL;
285 skb_set_owner_r(skb, sk);
286
287 /* Cache the SKB length before we tack it onto the receive
288 * queue. Once it is added it no longer belongs to us and
289 * may be freed by other threads of control pulling packets
290 * from the queue.
291 */
292 skb_len = skb->len;
293
294 skb_queue_tail(&sk->sk_receive_queue, skb);
295
296 if (!sock_flag(sk, SOCK_DEAD))
297 sk->sk_data_ready(sk, skb_len);
298out:
299 return err;
300}
301EXPORT_SYMBOL(sock_queue_rcv_skb);
302
Arnaldo Carvalho de Melo58a5a7b2006-11-16 14:06:06 -0200303int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800304{
305 int rc = NET_RX_SUCCESS;
306
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700307 if (sk_filter(sk, skb))
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800308 goto discard_and_relse;
309
310 skb->dev = NULL;
311
Arnaldo Carvalho de Melo58a5a7b2006-11-16 14:06:06 -0200312 if (nested)
313 bh_lock_sock_nested(sk);
314 else
315 bh_lock_sock(sk);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700316 if (!sock_owned_by_user(sk)) {
317 /*
318 * trylock + unlock semantics:
319 */
320 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
321
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800322 rc = sk->sk_backlog_rcv(sk, skb);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700323
324 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
325 } else
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800326 sk_add_backlog(sk, skb);
327 bh_unlock_sock(sk);
328out:
329 sock_put(sk);
330 return rc;
331discard_and_relse:
332 kfree_skb(skb);
333 goto out;
334}
335EXPORT_SYMBOL(sk_receive_skb);
336
337struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
338{
339 struct dst_entry *dst = sk->sk_dst_cache;
340
341 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
342 sk->sk_dst_cache = NULL;
343 dst_release(dst);
344 return NULL;
345 }
346
347 return dst;
348}
349EXPORT_SYMBOL(__sk_dst_check);
350
351struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
352{
353 struct dst_entry *dst = sk_dst_get(sk);
354
355 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
356 sk_dst_reset(sk);
357 dst_release(dst);
358 return NULL;
359 }
360
361 return dst;
362}
363EXPORT_SYMBOL(sk_dst_check);
364
David S. Miller48788092007-09-14 16:41:03 -0700365static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
366{
367 int ret = -ENOPROTOOPT;
368#ifdef CONFIG_NETDEVICES
369 char devname[IFNAMSIZ];
370 int index;
371
372 /* Sorry... */
373 ret = -EPERM;
374 if (!capable(CAP_NET_RAW))
375 goto out;
376
377 ret = -EINVAL;
378 if (optlen < 0)
379 goto out;
380
381 /* Bind this socket to a particular device like "eth0",
382 * as specified in the passed interface name. If the
383 * name is "" or the option length is zero the socket
384 * is not bound.
385 */
386 if (optlen > IFNAMSIZ - 1)
387 optlen = IFNAMSIZ - 1;
388 memset(devname, 0, sizeof(devname));
389
390 ret = -EFAULT;
391 if (copy_from_user(devname, optval, optlen))
392 goto out;
393
394 if (devname[0] == '\0') {
395 index = 0;
396 } else {
397 struct net_device *dev = dev_get_by_name(devname);
398
399 ret = -ENODEV;
400 if (!dev)
401 goto out;
402
403 index = dev->ifindex;
404 dev_put(dev);
405 }
406
407 lock_sock(sk);
408 sk->sk_bound_dev_if = index;
409 sk_dst_reset(sk);
410 release_sock(sk);
411
412 ret = 0;
413
414out:
415#endif
416
417 return ret;
418}
419
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420/*
421 * This is meant for all protocols to use and covers goings on
422 * at the socket level. Everything here is generic.
423 */
424
425int sock_setsockopt(struct socket *sock, int level, int optname,
426 char __user *optval, int optlen)
427{
428 struct sock *sk=sock->sk;
429 struct sk_filter *filter;
430 int val;
431 int valbool;
432 struct linger ling;
433 int ret = 0;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900434
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 /*
436 * Options without arguments
437 */
438
439#ifdef SO_DONTLINGER /* Compatibility item... */
Kyle Moffetta77be812005-07-27 14:22:30 -0700440 if (optname == SO_DONTLINGER) {
441 lock_sock(sk);
442 sock_reset_flag(sk, SOCK_LINGER);
443 release_sock(sk);
444 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 }
Kyle Moffetta77be812005-07-27 14:22:30 -0700446#endif
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900447
David S. Miller48788092007-09-14 16:41:03 -0700448 if (optname == SO_BINDTODEVICE)
449 return sock_bindtodevice(sk, optval, optlen);
450
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700451 if (optlen < sizeof(int))
452 return -EINVAL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900453
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 if (get_user(val, (int __user *)optval))
455 return -EFAULT;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900456
457 valbool = val?1:0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458
459 lock_sock(sk);
460
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700461 switch(optname) {
462 case SO_DEBUG:
463 if (val && !capable(CAP_NET_ADMIN)) {
464 ret = -EACCES;
465 }
466 else if (valbool)
467 sock_set_flag(sk, SOCK_DBG);
468 else
469 sock_reset_flag(sk, SOCK_DBG);
470 break;
471 case SO_REUSEADDR:
472 sk->sk_reuse = valbool;
473 break;
474 case SO_TYPE:
475 case SO_ERROR:
476 ret = -ENOPROTOOPT;
477 break;
478 case SO_DONTROUTE:
479 if (valbool)
480 sock_set_flag(sk, SOCK_LOCALROUTE);
481 else
482 sock_reset_flag(sk, SOCK_LOCALROUTE);
483 break;
484 case SO_BROADCAST:
485 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
486 break;
487 case SO_SNDBUF:
488 /* Don't error on this BSD doesn't and if you think
489 about it this is right. Otherwise apps have to
490 play 'guess the biggest size' games. RCVBUF/SNDBUF
491 are treated in BSD as hints */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900492
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700493 if (val > sysctl_wmem_max)
494 val = sysctl_wmem_max;
Patrick McHardyb0573de2005-08-09 19:30:51 -0700495set_sndbuf:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700496 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
497 if ((val * 2) < SOCK_MIN_SNDBUF)
498 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
499 else
500 sk->sk_sndbuf = val * 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700502 /*
503 * Wake up sending tasks if we
504 * upped the value.
505 */
506 sk->sk_write_space(sk);
507 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700509 case SO_SNDBUFFORCE:
510 if (!capable(CAP_NET_ADMIN)) {
511 ret = -EPERM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 break;
513 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700514 goto set_sndbuf;
515
516 case SO_RCVBUF:
517 /* Don't error on this BSD doesn't and if you think
518 about it this is right. Otherwise apps have to
519 play 'guess the biggest size' games. RCVBUF/SNDBUF
520 are treated in BSD as hints */
521
522 if (val > sysctl_rmem_max)
523 val = sysctl_rmem_max;
524set_rcvbuf:
525 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
526 /*
527 * We double it on the way in to account for
528 * "struct sk_buff" etc. overhead. Applications
529 * assume that the SO_RCVBUF setting they make will
530 * allow that much actual data to be received on that
531 * socket.
532 *
533 * Applications are unaware that "struct sk_buff" and
534 * other overheads allocate from the receive buffer
535 * during socket buffer allocation.
536 *
537 * And after considering the possible alternatives,
538 * returning the value we actually used in getsockopt
539 * is the most desirable behavior.
540 */
541 if ((val * 2) < SOCK_MIN_RCVBUF)
542 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
543 else
544 sk->sk_rcvbuf = val * 2;
545 break;
546
547 case SO_RCVBUFFORCE:
548 if (!capable(CAP_NET_ADMIN)) {
549 ret = -EPERM;
550 break;
551 }
552 goto set_rcvbuf;
553
554 case SO_KEEPALIVE:
555#ifdef CONFIG_INET
556 if (sk->sk_protocol == IPPROTO_TCP)
557 tcp_set_keepalive(sk, valbool);
558#endif
559 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
560 break;
561
562 case SO_OOBINLINE:
563 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
564 break;
565
566 case SO_NO_CHECK:
567 sk->sk_no_check = valbool;
568 break;
569
570 case SO_PRIORITY:
571 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
572 sk->sk_priority = val;
573 else
574 ret = -EPERM;
575 break;
576
577 case SO_LINGER:
578 if (optlen < sizeof(ling)) {
579 ret = -EINVAL; /* 1003.1g */
580 break;
581 }
582 if (copy_from_user(&ling,optval,sizeof(ling))) {
583 ret = -EFAULT;
584 break;
585 }
586 if (!ling.l_onoff)
587 sock_reset_flag(sk, SOCK_LINGER);
588 else {
589#if (BITS_PER_LONG == 32)
590 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
591 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
592 else
593#endif
594 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
595 sock_set_flag(sk, SOCK_LINGER);
596 }
597 break;
598
599 case SO_BSDCOMPAT:
600 sock_warn_obsolete_bsdism("setsockopt");
601 break;
602
603 case SO_PASSCRED:
604 if (valbool)
605 set_bit(SOCK_PASSCRED, &sock->flags);
606 else
607 clear_bit(SOCK_PASSCRED, &sock->flags);
608 break;
609
610 case SO_TIMESTAMP:
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700611 case SO_TIMESTAMPNS:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700612 if (valbool) {
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700613 if (optname == SO_TIMESTAMP)
614 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
615 else
616 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700617 sock_set_flag(sk, SOCK_RCVTSTAMP);
618 sock_enable_timestamp(sk);
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700619 } else {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700620 sock_reset_flag(sk, SOCK_RCVTSTAMP);
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700621 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
622 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700623 break;
624
625 case SO_RCVLOWAT:
626 if (val < 0)
627 val = INT_MAX;
628 sk->sk_rcvlowat = val ? : 1;
629 break;
630
631 case SO_RCVTIMEO:
632 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
633 break;
634
635 case SO_SNDTIMEO:
636 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
637 break;
638
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700639 case SO_ATTACH_FILTER:
640 ret = -EINVAL;
641 if (optlen == sizeof(struct sock_fprog)) {
642 struct sock_fprog fprog;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700644 ret = -EFAULT;
645 if (copy_from_user(&fprog, optval, sizeof(fprog)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700648 ret = sk_attach_filter(&fprog, sk);
649 }
650 break;
651
652 case SO_DETACH_FILTER:
653 rcu_read_lock_bh();
654 filter = rcu_dereference(sk->sk_filter);
655 if (filter) {
656 rcu_assign_pointer(sk->sk_filter, NULL);
657 sk_filter_release(sk, filter);
658 rcu_read_unlock_bh();
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700659 break;
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700660 }
661 rcu_read_unlock_bh();
662 ret = -ENONET;
663 break;
664
665 case SO_PASSSEC:
666 if (valbool)
667 set_bit(SOCK_PASSSEC, &sock->flags);
668 else
669 clear_bit(SOCK_PASSSEC, &sock->flags);
670 break;
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700671
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 /* We implement the SO_SNDLOWAT etc to
673 not be settable (1003.1g 5.3) */
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700674 default:
675 ret = -ENOPROTOOPT;
676 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900677 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 release_sock(sk);
679 return ret;
680}
681
682
683int sock_getsockopt(struct socket *sock, int level, int optname,
684 char __user *optval, int __user *optlen)
685{
686 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900687
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700688 union {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900689 int val;
690 struct linger ling;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 struct timeval tm;
692 } v;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900693
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 unsigned int lv = sizeof(int);
695 int len;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900696
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700697 if (get_user(len, optlen))
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900698 return -EFAULT;
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700699 if (len < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 return -EINVAL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900701
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700702 switch(optname) {
703 case SO_DEBUG:
704 v.val = sock_flag(sk, SOCK_DBG);
705 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900706
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700707 case SO_DONTROUTE:
708 v.val = sock_flag(sk, SOCK_LOCALROUTE);
709 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900710
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700711 case SO_BROADCAST:
712 v.val = !!sock_flag(sk, SOCK_BROADCAST);
713 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700715 case SO_SNDBUF:
716 v.val = sk->sk_sndbuf;
717 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900718
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700719 case SO_RCVBUF:
720 v.val = sk->sk_rcvbuf;
721 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700723 case SO_REUSEADDR:
724 v.val = sk->sk_reuse;
725 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700727 case SO_KEEPALIVE:
728 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
729 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700731 case SO_TYPE:
732 v.val = sk->sk_type;
733 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700735 case SO_ERROR:
736 v.val = -sock_error(sk);
737 if (v.val==0)
738 v.val = xchg(&sk->sk_err_soft, 0);
739 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700741 case SO_OOBINLINE:
742 v.val = !!sock_flag(sk, SOCK_URGINLINE);
743 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900744
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700745 case SO_NO_CHECK:
746 v.val = sk->sk_no_check;
747 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700749 case SO_PRIORITY:
750 v.val = sk->sk_priority;
751 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900752
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700753 case SO_LINGER:
754 lv = sizeof(v.ling);
755 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
756 v.ling.l_linger = sk->sk_lingertime / HZ;
757 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900758
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700759 case SO_BSDCOMPAT:
760 sock_warn_obsolete_bsdism("getsockopt");
761 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700763 case SO_TIMESTAMP:
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700764 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
765 !sock_flag(sk, SOCK_RCVTSTAMPNS);
766 break;
767
768 case SO_TIMESTAMPNS:
769 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700770 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700772 case SO_RCVTIMEO:
773 lv=sizeof(struct timeval);
774 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
775 v.tm.tv_sec = 0;
776 v.tm.tv_usec = 0;
777 } else {
778 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
779 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700781 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700783 case SO_SNDTIMEO:
784 lv=sizeof(struct timeval);
785 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
786 v.tm.tv_sec = 0;
787 v.tm.tv_usec = 0;
788 } else {
789 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
790 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
791 }
792 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700794 case SO_RCVLOWAT:
795 v.val = sk->sk_rcvlowat;
796 break;
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700797
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700798 case SO_SNDLOWAT:
799 v.val=1;
800 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700802 case SO_PASSCRED:
803 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
804 break;
805
806 case SO_PEERCRED:
807 if (len > sizeof(sk->sk_peercred))
808 len = sizeof(sk->sk_peercred);
809 if (copy_to_user(optval, &sk->sk_peercred, len))
810 return -EFAULT;
811 goto lenout;
812
813 case SO_PEERNAME:
814 {
815 char address[128];
816
817 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
818 return -ENOTCONN;
819 if (lv < len)
820 return -EINVAL;
821 if (copy_to_user(optval, address, len))
822 return -EFAULT;
823 goto lenout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700825
826 /* Dubious BSD thing... Probably nobody even uses it, but
827 * the UNIX standard wants it for whatever reason... -DaveM
828 */
829 case SO_ACCEPTCONN:
830 v.val = sk->sk_state == TCP_LISTEN;
831 break;
832
833 case SO_PASSSEC:
834 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
835 break;
836
837 case SO_PEERSEC:
838 return security_socket_getpeersec_stream(sock, optval, optlen, len);
839
840 default:
841 return -ENOPROTOOPT;
842 }
843
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844 if (len > lv)
845 len = lv;
846 if (copy_to_user(optval, &v, len))
847 return -EFAULT;
848lenout:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900849 if (put_user(len, optlen))
850 return -EFAULT;
851 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852}
853
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700854/*
855 * Initialize an sk_lock.
856 *
857 * (We also register the sk_lock with the lock validator.)
858 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700859static inline void sock_lock_init(struct sock *sk)
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700860{
Peter Zijlstraed075362006-12-06 20:35:24 -0800861 sock_lock_init_class_and_name(sk,
862 af_family_slock_key_strings[sk->sk_family],
863 af_family_slock_keys + sk->sk_family,
864 af_family_key_strings[sk->sk_family],
865 af_family_keys + sk->sk_family);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700866}
867
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868/**
869 * sk_alloc - All socket objects are allocated here
Pavel Pisa4dc3b162005-05-01 08:59:25 -0700870 * @family: protocol family
871 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
872 * @prot: struct proto associated with this new sock instance
873 * @zero_it: if we should zero the newly allocated sock
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 */
Al Virodd0fc662005-10-07 07:46:04 +0100875struct sock *sk_alloc(int family, gfp_t priority,
Victor Fusco86a76ca2005-07-08 14:57:47 -0700876 struct proto *prot, int zero_it)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877{
878 struct sock *sk = NULL;
Christoph Lametere18b8902006-12-06 20:33:20 -0800879 struct kmem_cache *slab = prot->slab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880
881 if (slab != NULL)
882 sk = kmem_cache_alloc(slab, priority);
883 else
884 sk = kmalloc(prot->obj_size, priority);
885
886 if (sk) {
887 if (zero_it) {
888 memset(sk, 0, prot->obj_size);
889 sk->sk_family = family;
Arnaldo Carvalho de Melo476e19c2005-05-05 13:35:15 -0700890 /*
891 * See comment in struct sock definition to understand
892 * why we need sk_prot_creator -acme
893 */
894 sk->sk_prot = sk->sk_prot_creator = prot;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 sock_lock_init(sk);
896 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900897
Frank Filza79af592005-09-27 15:23:38 -0700898 if (security_sk_alloc(sk, family, priority))
899 goto out_free;
900
901 if (!try_module_get(prot->owner))
902 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 }
904 return sk;
Frank Filza79af592005-09-27 15:23:38 -0700905
906out_free:
907 if (slab != NULL)
908 kmem_cache_free(slab, sk);
909 else
910 kfree(sk);
911 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912}
913
914void sk_free(struct sock *sk)
915{
916 struct sk_filter *filter;
Arnaldo Carvalho de Melo476e19c2005-05-05 13:35:15 -0700917 struct module *owner = sk->sk_prot_creator->owner;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918
919 if (sk->sk_destruct)
920 sk->sk_destruct(sk);
921
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700922 filter = rcu_dereference(sk->sk_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 if (filter) {
924 sk_filter_release(sk, filter);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700925 rcu_assign_pointer(sk->sk_filter, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 }
927
928 sock_disable_timestamp(sk);
929
930 if (atomic_read(&sk->sk_omem_alloc))
931 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
932 __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
933
934 security_sk_free(sk);
Arnaldo Carvalho de Melo476e19c2005-05-05 13:35:15 -0700935 if (sk->sk_prot_creator->slab != NULL)
936 kmem_cache_free(sk->sk_prot_creator->slab, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 else
938 kfree(sk);
939 module_put(owner);
940}
941
Al Virodd0fc662005-10-07 07:46:04 +0100942struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -0700943{
944 struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
945
946 if (newsk != NULL) {
947 struct sk_filter *filter;
948
Venkat Yekkirala892c1412006-08-04 23:08:56 -0700949 sock_copy(newsk, sk);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -0700950
951 /* SANITY */
952 sk_node_init(&newsk->sk_node);
953 sock_lock_init(newsk);
954 bh_lock_sock(newsk);
Eric Dumazetfa438cc2007-03-04 16:05:44 -0800955 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -0700956
957 atomic_set(&newsk->sk_rmem_alloc, 0);
958 atomic_set(&newsk->sk_wmem_alloc, 0);
959 atomic_set(&newsk->sk_omem_alloc, 0);
960 skb_queue_head_init(&newsk->sk_receive_queue);
961 skb_queue_head_init(&newsk->sk_write_queue);
Chris Leech97fc2f02006-05-23 17:55:33 -0700962#ifdef CONFIG_NET_DMA
963 skb_queue_head_init(&newsk->sk_async_wait_queue);
964#endif
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -0700965
966 rwlock_init(&newsk->sk_dst_lock);
967 rwlock_init(&newsk->sk_callback_lock);
Peter Zijlstra443aef02007-07-19 01:49:00 -0700968 lockdep_set_class_and_name(&newsk->sk_callback_lock,
969 af_callback_keys + newsk->sk_family,
970 af_family_clock_key_strings[newsk->sk_family]);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -0700971
972 newsk->sk_dst_cache = NULL;
973 newsk->sk_wmem_queued = 0;
974 newsk->sk_forward_alloc = 0;
975 newsk->sk_send_head = NULL;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -0700976 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
977
978 sock_reset_flag(newsk, SOCK_DONE);
979 skb_queue_head_init(&newsk->sk_error_queue);
980
981 filter = newsk->sk_filter;
982 if (filter != NULL)
983 sk_filter_charge(newsk, filter);
984
985 if (unlikely(xfrm_sk_clone_policy(newsk))) {
986 /* It is still raw copy of parent, so invalidate
987 * destructor and make plain sk_free() */
988 newsk->sk_destruct = NULL;
989 sk_free(newsk);
990 newsk = NULL;
991 goto out;
992 }
993
994 newsk->sk_err = 0;
995 newsk->sk_priority = 0;
996 atomic_set(&newsk->sk_refcnt, 2);
997
998 /*
999 * Increment the counter in the same struct proto as the master
1000 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1001 * is the same as sk->sk_prot->socks, as this field was copied
1002 * with memcpy).
1003 *
1004 * This _changes_ the previous behaviour, where
1005 * tcp_create_openreq_child always was incrementing the
1006 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1007 * to be taken into account in all callers. -acme
1008 */
1009 sk_refcnt_debug_inc(newsk);
1010 newsk->sk_socket = NULL;
1011 newsk->sk_sleep = NULL;
1012
1013 if (newsk->sk_prot->sockets_allocated)
1014 atomic_inc(newsk->sk_prot->sockets_allocated);
1015 }
1016out:
1017 return newsk;
1018}
1019
1020EXPORT_SYMBOL_GPL(sk_clone);
1021
Andi Kleen99580892007-04-20 17:12:43 -07001022void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1023{
1024 __sk_dst_set(sk, dst);
1025 sk->sk_route_caps = dst->dev->features;
1026 if (sk->sk_route_caps & NETIF_F_GSO)
Herbert Xu4fcd6b92007-05-31 22:15:50 -07001027 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
Andi Kleen99580892007-04-20 17:12:43 -07001028 if (sk_can_gso(sk)) {
1029 if (dst->header_len)
1030 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1031 else
1032 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1033 }
1034}
1035EXPORT_SYMBOL_GPL(sk_setup_caps);
1036
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037void __init sk_init(void)
1038{
1039 if (num_physpages <= 4096) {
1040 sysctl_wmem_max = 32767;
1041 sysctl_rmem_max = 32767;
1042 sysctl_wmem_default = 32767;
1043 sysctl_rmem_default = 32767;
1044 } else if (num_physpages >= 131072) {
1045 sysctl_wmem_max = 131071;
1046 sysctl_rmem_max = 131071;
1047 }
1048}
1049
1050/*
1051 * Simple resource managers for sockets.
1052 */
1053
1054
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001055/*
1056 * Write buffer destructor automatically called from kfree_skb.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 */
1058void sock_wfree(struct sk_buff *skb)
1059{
1060 struct sock *sk = skb->sk;
1061
1062 /* In case it might be waiting for more memory. */
1063 atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1064 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1065 sk->sk_write_space(sk);
1066 sock_put(sk);
1067}
1068
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001069/*
1070 * Read buffer destructor automatically called from kfree_skb.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 */
1072void sock_rfree(struct sk_buff *skb)
1073{
1074 struct sock *sk = skb->sk;
1075
1076 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
1077}
1078
1079
1080int sock_i_uid(struct sock *sk)
1081{
1082 int uid;
1083
1084 read_lock(&sk->sk_callback_lock);
1085 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1086 read_unlock(&sk->sk_callback_lock);
1087 return uid;
1088}
1089
1090unsigned long sock_i_ino(struct sock *sk)
1091{
1092 unsigned long ino;
1093
1094 read_lock(&sk->sk_callback_lock);
1095 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1096 read_unlock(&sk->sk_callback_lock);
1097 return ino;
1098}
1099
1100/*
1101 * Allocate a skb from the socket's send buffer.
1102 */
Victor Fusco86a76ca2005-07-08 14:57:47 -07001103struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
Al Virodd0fc662005-10-07 07:46:04 +01001104 gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105{
1106 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1107 struct sk_buff * skb = alloc_skb(size, priority);
1108 if (skb) {
1109 skb_set_owner_w(skb, sk);
1110 return skb;
1111 }
1112 }
1113 return NULL;
1114}
1115
1116/*
1117 * Allocate a skb from the socket's receive buffer.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001118 */
Victor Fusco86a76ca2005-07-08 14:57:47 -07001119struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
Al Virodd0fc662005-10-07 07:46:04 +01001120 gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121{
1122 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1123 struct sk_buff *skb = alloc_skb(size, priority);
1124 if (skb) {
1125 skb_set_owner_r(skb, sk);
1126 return skb;
1127 }
1128 }
1129 return NULL;
1130}
1131
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001132/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 * Allocate a memory block from the socket's option memory buffer.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001134 */
Al Virodd0fc662005-10-07 07:46:04 +01001135void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136{
1137 if ((unsigned)size <= sysctl_optmem_max &&
1138 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1139 void *mem;
1140 /* First do the add, to avoid the race if kmalloc
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001141 * might sleep.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 */
1143 atomic_add(size, &sk->sk_omem_alloc);
1144 mem = kmalloc(size, priority);
1145 if (mem)
1146 return mem;
1147 atomic_sub(size, &sk->sk_omem_alloc);
1148 }
1149 return NULL;
1150}
1151
1152/*
1153 * Free an option memory block.
1154 */
1155void sock_kfree_s(struct sock *sk, void *mem, int size)
1156{
1157 kfree(mem);
1158 atomic_sub(size, &sk->sk_omem_alloc);
1159}
1160
1161/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1162 I think, these locks should be removed for datagram sockets.
1163 */
1164static long sock_wait_for_wmem(struct sock * sk, long timeo)
1165{
1166 DEFINE_WAIT(wait);
1167
1168 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1169 for (;;) {
1170 if (!timeo)
1171 break;
1172 if (signal_pending(current))
1173 break;
1174 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1175 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1176 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1177 break;
1178 if (sk->sk_shutdown & SEND_SHUTDOWN)
1179 break;
1180 if (sk->sk_err)
1181 break;
1182 timeo = schedule_timeout(timeo);
1183 }
1184 finish_wait(sk->sk_sleep, &wait);
1185 return timeo;
1186}
1187
1188
1189/*
1190 * Generic send/receive buffer handlers
1191 */
1192
1193static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
1194 unsigned long header_len,
1195 unsigned long data_len,
1196 int noblock, int *errcode)
1197{
1198 struct sk_buff *skb;
Al Viro7d877f32005-10-21 03:20:43 -04001199 gfp_t gfp_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 long timeo;
1201 int err;
1202
1203 gfp_mask = sk->sk_allocation;
1204 if (gfp_mask & __GFP_WAIT)
1205 gfp_mask |= __GFP_REPEAT;
1206
1207 timeo = sock_sndtimeo(sk, noblock);
1208 while (1) {
1209 err = sock_error(sk);
1210 if (err != 0)
1211 goto failure;
1212
1213 err = -EPIPE;
1214 if (sk->sk_shutdown & SEND_SHUTDOWN)
1215 goto failure;
1216
1217 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
Larry Woodmandb38c1792006-11-03 16:05:45 -08001218 skb = alloc_skb(header_len, gfp_mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 if (skb) {
1220 int npages;
1221 int i;
1222
1223 /* No pages, we're done... */
1224 if (!data_len)
1225 break;
1226
1227 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1228 skb->truesize += data_len;
1229 skb_shinfo(skb)->nr_frags = npages;
1230 for (i = 0; i < npages; i++) {
1231 struct page *page;
1232 skb_frag_t *frag;
1233
1234 page = alloc_pages(sk->sk_allocation, 0);
1235 if (!page) {
1236 err = -ENOBUFS;
1237 skb_shinfo(skb)->nr_frags = i;
1238 kfree_skb(skb);
1239 goto failure;
1240 }
1241
1242 frag = &skb_shinfo(skb)->frags[i];
1243 frag->page = page;
1244 frag->page_offset = 0;
1245 frag->size = (data_len >= PAGE_SIZE ?
1246 PAGE_SIZE :
1247 data_len);
1248 data_len -= PAGE_SIZE;
1249 }
1250
1251 /* Full success... */
1252 break;
1253 }
1254 err = -ENOBUFS;
1255 goto failure;
1256 }
1257 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1258 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1259 err = -EAGAIN;
1260 if (!timeo)
1261 goto failure;
1262 if (signal_pending(current))
1263 goto interrupted;
1264 timeo = sock_wait_for_wmem(sk, timeo);
1265 }
1266
1267 skb_set_owner_w(skb, sk);
1268 return skb;
1269
1270interrupted:
1271 err = sock_intr_errno(timeo);
1272failure:
1273 *errcode = err;
1274 return NULL;
1275}
1276
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001277struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 int noblock, int *errcode)
1279{
1280 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1281}
1282
1283static void __lock_sock(struct sock *sk)
1284{
1285 DEFINE_WAIT(wait);
1286
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001287 for (;;) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1289 TASK_UNINTERRUPTIBLE);
1290 spin_unlock_bh(&sk->sk_lock.slock);
1291 schedule();
1292 spin_lock_bh(&sk->sk_lock.slock);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001293 if (!sock_owned_by_user(sk))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 break;
1295 }
1296 finish_wait(&sk->sk_lock.wq, &wait);
1297}
1298
1299static void __release_sock(struct sock *sk)
1300{
1301 struct sk_buff *skb = sk->sk_backlog.head;
1302
1303 do {
1304 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1305 bh_unlock_sock(sk);
1306
1307 do {
1308 struct sk_buff *next = skb->next;
1309
1310 skb->next = NULL;
1311 sk->sk_backlog_rcv(sk, skb);
1312
1313 /*
1314 * We are in process context here with softirqs
1315 * disabled, use cond_resched_softirq() to preempt.
1316 * This is safe to do because we've taken the backlog
1317 * queue private:
1318 */
1319 cond_resched_softirq();
1320
1321 skb = next;
1322 } while (skb != NULL);
1323
1324 bh_lock_sock(sk);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001325 } while ((skb = sk->sk_backlog.head) != NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326}
1327
1328/**
1329 * sk_wait_data - wait for data to arrive at sk_receive_queue
Pavel Pisa4dc3b162005-05-01 08:59:25 -07001330 * @sk: sock to wait on
1331 * @timeo: for how long
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 *
1333 * Now socket state including sk->sk_err is changed only under lock,
1334 * hence we may omit checks after joining wait queue.
1335 * We check receive queue before schedule() only as optimization;
1336 * it is very likely that release_sock() added new data.
1337 */
1338int sk_wait_data(struct sock *sk, long *timeo)
1339{
1340 int rc;
1341 DEFINE_WAIT(wait);
1342
1343 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1344 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1345 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1346 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1347 finish_wait(sk->sk_sleep, &wait);
1348 return rc;
1349}
1350
1351EXPORT_SYMBOL(sk_wait_data);
1352
1353/*
1354 * Set of default routines for initialising struct proto_ops when
1355 * the protocol does not support a particular function. In certain
1356 * cases where it makes no sense for a protocol to have a "do nothing"
1357 * function, some default processing is provided.
1358 */
1359
1360int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1361{
1362 return -EOPNOTSUPP;
1363}
1364
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001365int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 int len, int flags)
1367{
1368 return -EOPNOTSUPP;
1369}
1370
1371int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1372{
1373 return -EOPNOTSUPP;
1374}
1375
1376int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1377{
1378 return -EOPNOTSUPP;
1379}
1380
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001381int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 int *len, int peer)
1383{
1384 return -EOPNOTSUPP;
1385}
1386
1387unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt)
1388{
1389 return 0;
1390}
1391
1392int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1393{
1394 return -EOPNOTSUPP;
1395}
1396
1397int sock_no_listen(struct socket *sock, int backlog)
1398{
1399 return -EOPNOTSUPP;
1400}
1401
1402int sock_no_shutdown(struct socket *sock, int how)
1403{
1404 return -EOPNOTSUPP;
1405}
1406
1407int sock_no_setsockopt(struct socket *sock, int level, int optname,
1408 char __user *optval, int optlen)
1409{
1410 return -EOPNOTSUPP;
1411}
1412
1413int sock_no_getsockopt(struct socket *sock, int level, int optname,
1414 char __user *optval, int __user *optlen)
1415{
1416 return -EOPNOTSUPP;
1417}
1418
1419int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1420 size_t len)
1421{
1422 return -EOPNOTSUPP;
1423}
1424
1425int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1426 size_t len, int flags)
1427{
1428 return -EOPNOTSUPP;
1429}
1430
1431int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1432{
1433 /* Mirror missing mmap method error code */
1434 return -ENODEV;
1435}
1436
1437ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1438{
1439 ssize_t res;
1440 struct msghdr msg = {.msg_flags = flags};
1441 struct kvec iov;
1442 char *kaddr = kmap(page);
1443 iov.iov_base = kaddr + offset;
1444 iov.iov_len = size;
1445 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1446 kunmap(page);
1447 return res;
1448}
1449
1450/*
1451 * Default Socket Callbacks
1452 */
1453
1454static void sock_def_wakeup(struct sock *sk)
1455{
1456 read_lock(&sk->sk_callback_lock);
1457 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1458 wake_up_interruptible_all(sk->sk_sleep);
1459 read_unlock(&sk->sk_callback_lock);
1460}
1461
1462static void sock_def_error_report(struct sock *sk)
1463{
1464 read_lock(&sk->sk_callback_lock);
1465 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1466 wake_up_interruptible(sk->sk_sleep);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001467 sk_wake_async(sk,0,POLL_ERR);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 read_unlock(&sk->sk_callback_lock);
1469}
1470
1471static void sock_def_readable(struct sock *sk, int len)
1472{
1473 read_lock(&sk->sk_callback_lock);
1474 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1475 wake_up_interruptible(sk->sk_sleep);
1476 sk_wake_async(sk,1,POLL_IN);
1477 read_unlock(&sk->sk_callback_lock);
1478}
1479
1480static void sock_def_write_space(struct sock *sk)
1481{
1482 read_lock(&sk->sk_callback_lock);
1483
1484 /* Do not wake up a writer until he can make "significant"
1485 * progress. --DaveM
1486 */
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001487 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1489 wake_up_interruptible(sk->sk_sleep);
1490
1491 /* Should agree with poll, otherwise some programs break */
1492 if (sock_writeable(sk))
1493 sk_wake_async(sk, 2, POLL_OUT);
1494 }
1495
1496 read_unlock(&sk->sk_callback_lock);
1497}
1498
1499static void sock_def_destruct(struct sock *sk)
1500{
Jesper Juhla51482b2005-11-08 09:41:34 -08001501 kfree(sk->sk_protinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502}
1503
1504void sk_send_sigurg(struct sock *sk)
1505{
1506 if (sk->sk_socket && sk->sk_socket->file)
1507 if (send_sigurg(&sk->sk_socket->file->f_owner))
1508 sk_wake_async(sk, 3, POLL_PRI);
1509}
1510
1511void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1512 unsigned long expires)
1513{
1514 if (!mod_timer(timer, expires))
1515 sock_hold(sk);
1516}
1517
1518EXPORT_SYMBOL(sk_reset_timer);
1519
1520void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1521{
1522 if (timer_pending(timer) && del_timer(timer))
1523 __sock_put(sk);
1524}
1525
1526EXPORT_SYMBOL(sk_stop_timer);
1527
1528void sock_init_data(struct socket *sock, struct sock *sk)
1529{
1530 skb_queue_head_init(&sk->sk_receive_queue);
1531 skb_queue_head_init(&sk->sk_write_queue);
1532 skb_queue_head_init(&sk->sk_error_queue);
Chris Leech97fc2f02006-05-23 17:55:33 -07001533#ifdef CONFIG_NET_DMA
1534 skb_queue_head_init(&sk->sk_async_wait_queue);
1535#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536
1537 sk->sk_send_head = NULL;
1538
1539 init_timer(&sk->sk_timer);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001540
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 sk->sk_allocation = GFP_KERNEL;
1542 sk->sk_rcvbuf = sysctl_rmem_default;
1543 sk->sk_sndbuf = sysctl_wmem_default;
1544 sk->sk_state = TCP_CLOSE;
1545 sk->sk_socket = sock;
1546
1547 sock_set_flag(sk, SOCK_ZAPPED);
1548
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001549 if (sock) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 sk->sk_type = sock->type;
1551 sk->sk_sleep = &sock->wait;
1552 sock->sk = sk;
1553 } else
1554 sk->sk_sleep = NULL;
1555
1556 rwlock_init(&sk->sk_dst_lock);
1557 rwlock_init(&sk->sk_callback_lock);
Peter Zijlstra443aef02007-07-19 01:49:00 -07001558 lockdep_set_class_and_name(&sk->sk_callback_lock,
1559 af_callback_keys + sk->sk_family,
1560 af_family_clock_key_strings[sk->sk_family]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561
1562 sk->sk_state_change = sock_def_wakeup;
1563 sk->sk_data_ready = sock_def_readable;
1564 sk->sk_write_space = sock_def_write_space;
1565 sk->sk_error_report = sock_def_error_report;
1566 sk->sk_destruct = sock_def_destruct;
1567
1568 sk->sk_sndmsg_page = NULL;
1569 sk->sk_sndmsg_off = 0;
1570
1571 sk->sk_peercred.pid = 0;
1572 sk->sk_peercred.uid = -1;
1573 sk->sk_peercred.gid = -1;
1574 sk->sk_write_pending = 0;
1575 sk->sk_rcvlowat = 1;
1576 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1577 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1578
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001579 sk->sk_stamp = ktime_set(-1L, -1L);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580
1581 atomic_set(&sk->sk_refcnt, 1);
1582}
1583
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08001584void fastcall lock_sock_nested(struct sock *sk, int subclass)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585{
1586 might_sleep();
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001587 spin_lock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 if (sk->sk_lock.owner)
1589 __lock_sock(sk);
1590 sk->sk_lock.owner = (void *)1;
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001591 spin_unlock(&sk->sk_lock.slock);
1592 /*
1593 * The sk_lock has mutex_lock() semantics here:
1594 */
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08001595 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001596 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597}
1598
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08001599EXPORT_SYMBOL(lock_sock_nested);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600
1601void fastcall release_sock(struct sock *sk)
1602{
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001603 /*
1604 * The sk_lock has mutex_unlock() semantics:
1605 */
1606 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1607
1608 spin_lock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 if (sk->sk_backlog.tail)
1610 __release_sock(sk);
1611 sk->sk_lock.owner = NULL;
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001612 if (waitqueue_active(&sk->sk_lock.wq))
1613 wake_up(&sk->sk_lock.wq);
1614 spin_unlock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615}
1616EXPORT_SYMBOL(release_sock);
1617
1618int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001619{
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001620 struct timeval tv;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 if (!sock_flag(sk, SOCK_TIMESTAMP))
1622 sock_enable_timestamp(sk);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001623 tv = ktime_to_timeval(sk->sk_stamp);
1624 if (tv.tv_sec == -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 return -ENOENT;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001626 if (tv.tv_sec == 0) {
1627 sk->sk_stamp = ktime_get_real();
1628 tv = ktime_to_timeval(sk->sk_stamp);
1629 }
1630 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001631}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632EXPORT_SYMBOL(sock_get_timestamp);
1633
Eric Dumazetae40eb12007-03-18 17:33:16 -07001634int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1635{
1636 struct timespec ts;
1637 if (!sock_flag(sk, SOCK_TIMESTAMP))
1638 sock_enable_timestamp(sk);
1639 ts = ktime_to_timespec(sk->sk_stamp);
1640 if (ts.tv_sec == -1)
1641 return -ENOENT;
1642 if (ts.tv_sec == 0) {
1643 sk->sk_stamp = ktime_get_real();
1644 ts = ktime_to_timespec(sk->sk_stamp);
1645 }
1646 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1647}
1648EXPORT_SYMBOL(sock_get_timestampns);
1649
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650void sock_enable_timestamp(struct sock *sk)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001651{
1652 if (!sock_flag(sk, SOCK_TIMESTAMP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 sock_set_flag(sk, SOCK_TIMESTAMP);
1654 net_enable_timestamp();
1655 }
1656}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001657EXPORT_SYMBOL(sock_enable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
1659/*
1660 * Get a socket option on an socket.
1661 *
1662 * FIX: POSIX 1003.1g is very ambiguous here. It states that
1663 * asynchronous errors should be reported by getsockopt. We assume
1664 * this means if you specify SO_ERROR (otherwise whats the point of it).
1665 */
1666int sock_common_getsockopt(struct socket *sock, int level, int optname,
1667 char __user *optval, int __user *optlen)
1668{
1669 struct sock *sk = sock->sk;
1670
1671 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1672}
1673
1674EXPORT_SYMBOL(sock_common_getsockopt);
1675
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001676#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001677int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1678 char __user *optval, int __user *optlen)
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001679{
1680 struct sock *sk = sock->sk;
1681
Johannes Berg1e51f952007-03-06 13:44:06 -08001682 if (sk->sk_prot->compat_getsockopt != NULL)
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001683 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1684 optval, optlen);
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001685 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1686}
1687EXPORT_SYMBOL(compat_sock_common_getsockopt);
1688#endif
1689
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1691 struct msghdr *msg, size_t size, int flags)
1692{
1693 struct sock *sk = sock->sk;
1694 int addr_len = 0;
1695 int err;
1696
1697 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1698 flags & ~MSG_DONTWAIT, &addr_len);
1699 if (err >= 0)
1700 msg->msg_namelen = addr_len;
1701 return err;
1702}
1703
1704EXPORT_SYMBOL(sock_common_recvmsg);
1705
1706/*
1707 * Set socket options on an inet socket.
1708 */
1709int sock_common_setsockopt(struct socket *sock, int level, int optname,
1710 char __user *optval, int optlen)
1711{
1712 struct sock *sk = sock->sk;
1713
1714 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1715}
1716
1717EXPORT_SYMBOL(sock_common_setsockopt);
1718
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001719#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001720int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1721 char __user *optval, int optlen)
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001722{
1723 struct sock *sk = sock->sk;
1724
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001725 if (sk->sk_prot->compat_setsockopt != NULL)
1726 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1727 optval, optlen);
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001728 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1729}
1730EXPORT_SYMBOL(compat_sock_common_setsockopt);
1731#endif
1732
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733void sk_common_release(struct sock *sk)
1734{
1735 if (sk->sk_prot->destroy)
1736 sk->sk_prot->destroy(sk);
1737
1738 /*
1739 * Observation: when sock_common_release is called, processes have
1740 * no access to socket. But net still has.
1741 * Step one, detach it from networking:
1742 *
1743 * A. Remove from hash tables.
1744 */
1745
1746 sk->sk_prot->unhash(sk);
1747
1748 /*
1749 * In this point socket cannot receive new packets, but it is possible
1750 * that some packets are in flight because some CPU runs receiver and
1751 * did hash table lookup before we unhashed socket. They will achieve
1752 * receive queue and will be purged by socket destructor.
1753 *
1754 * Also we still have packets pending on receive queue and probably,
1755 * our own packets waiting in device queues. sock_destroy will drain
1756 * receive queue, but transmitted packets will delay socket destruction
1757 * until the last reference will be released.
1758 */
1759
1760 sock_orphan(sk);
1761
1762 xfrm_sk_free_policy(sk);
1763
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07001764 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 sock_put(sk);
1766}
1767
1768EXPORT_SYMBOL(sk_common_release);
1769
1770static DEFINE_RWLOCK(proto_list_lock);
1771static LIST_HEAD(proto_list);
1772
1773int proto_register(struct proto *prot, int alloc_slab)
1774{
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001775 char *request_sock_slab_name = NULL;
1776 char *timewait_sock_slab_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 int rc = -ENOBUFS;
1778
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 if (alloc_slab) {
1780 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09001781 SLAB_HWCACHE_ALIGN, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782
1783 if (prot->slab == NULL) {
1784 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
1785 prot->name);
Arnaldo Carvalho de Melo2a278052005-04-16 15:24:09 -07001786 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001788
1789 if (prot->rsk_prot != NULL) {
1790 static const char mask[] = "request_sock_%s";
1791
1792 request_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1793 if (request_sock_slab_name == NULL)
1794 goto out_free_sock_slab;
1795
1796 sprintf(request_sock_slab_name, mask, prot->name);
1797 prot->rsk_prot->slab = kmem_cache_create(request_sock_slab_name,
1798 prot->rsk_prot->obj_size, 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09001799 SLAB_HWCACHE_ALIGN, NULL);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001800
1801 if (prot->rsk_prot->slab == NULL) {
1802 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
1803 prot->name);
1804 goto out_free_request_sock_slab_name;
1805 }
1806 }
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001807
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001808 if (prot->twsk_prot != NULL) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001809 static const char mask[] = "tw_sock_%s";
1810
1811 timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
1812
1813 if (timewait_sock_slab_name == NULL)
1814 goto out_free_request_sock_slab;
1815
1816 sprintf(timewait_sock_slab_name, mask, prot->name);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001817 prot->twsk_prot->twsk_slab =
1818 kmem_cache_create(timewait_sock_slab_name,
1819 prot->twsk_prot->twsk_obj_size,
1820 0, SLAB_HWCACHE_ALIGN,
Paul Mundt20c2df82007-07-20 10:11:58 +09001821 NULL);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001822 if (prot->twsk_prot->twsk_slab == NULL)
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001823 goto out_free_timewait_sock_slab_name;
1824 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 }
1826
Arnaldo Carvalho de Melo2a278052005-04-16 15:24:09 -07001827 write_lock(&proto_list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 list_add(&prot->node, &proto_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 write_unlock(&proto_list_lock);
Arnaldo Carvalho de Melo2a278052005-04-16 15:24:09 -07001830 rc = 0;
1831out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 return rc;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001833out_free_timewait_sock_slab_name:
1834 kfree(timewait_sock_slab_name);
1835out_free_request_sock_slab:
1836 if (prot->rsk_prot && prot->rsk_prot->slab) {
1837 kmem_cache_destroy(prot->rsk_prot->slab);
1838 prot->rsk_prot->slab = NULL;
1839 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001840out_free_request_sock_slab_name:
1841 kfree(request_sock_slab_name);
1842out_free_sock_slab:
1843 kmem_cache_destroy(prot->slab);
1844 prot->slab = NULL;
1845 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846}
1847
1848EXPORT_SYMBOL(proto_register);
1849
1850void proto_unregister(struct proto *prot)
1851{
1852 write_lock(&proto_list_lock);
Patrick McHardy0a3f4352005-09-06 19:47:50 -07001853 list_del(&prot->node);
1854 write_unlock(&proto_list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855
1856 if (prot->slab != NULL) {
1857 kmem_cache_destroy(prot->slab);
1858 prot->slab = NULL;
1859 }
1860
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07001861 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
1862 const char *name = kmem_cache_name(prot->rsk_prot->slab);
1863
1864 kmem_cache_destroy(prot->rsk_prot->slab);
1865 kfree(name);
1866 prot->rsk_prot->slab = NULL;
1867 }
1868
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001869 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
1870 const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001871
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001872 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001873 kfree(name);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08001874 prot->twsk_prot->twsk_slab = NULL;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07001875 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876}
1877
1878EXPORT_SYMBOL(proto_unregister);
1879
1880#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
1882{
1883 read_lock(&proto_list_lock);
Pavel Emelianov60f04382007-07-09 13:15:14 -07001884 return seq_list_start_head(&proto_list, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885}
1886
1887static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1888{
Pavel Emelianov60f04382007-07-09 13:15:14 -07001889 return seq_list_next(v, &proto_list, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890}
1891
1892static void proto_seq_stop(struct seq_file *seq, void *v)
1893{
1894 read_unlock(&proto_list_lock);
1895}
1896
1897static char proto_method_implemented(const void *method)
1898{
1899 return method == NULL ? 'n' : 'y';
1900}
1901
1902static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
1903{
1904 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
1905 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
1906 proto->name,
1907 proto->obj_size,
1908 proto->sockets_allocated != NULL ? atomic_read(proto->sockets_allocated) : -1,
1909 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
1910 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
1911 proto->max_header,
1912 proto->slab == NULL ? "no" : "yes",
1913 module_name(proto->owner),
1914 proto_method_implemented(proto->close),
1915 proto_method_implemented(proto->connect),
1916 proto_method_implemented(proto->disconnect),
1917 proto_method_implemented(proto->accept),
1918 proto_method_implemented(proto->ioctl),
1919 proto_method_implemented(proto->init),
1920 proto_method_implemented(proto->destroy),
1921 proto_method_implemented(proto->shutdown),
1922 proto_method_implemented(proto->setsockopt),
1923 proto_method_implemented(proto->getsockopt),
1924 proto_method_implemented(proto->sendmsg),
1925 proto_method_implemented(proto->recvmsg),
1926 proto_method_implemented(proto->sendpage),
1927 proto_method_implemented(proto->bind),
1928 proto_method_implemented(proto->backlog_rcv),
1929 proto_method_implemented(proto->hash),
1930 proto_method_implemented(proto->unhash),
1931 proto_method_implemented(proto->get_port),
1932 proto_method_implemented(proto->enter_memory_pressure));
1933}
1934
1935static int proto_seq_show(struct seq_file *seq, void *v)
1936{
Pavel Emelianov60f04382007-07-09 13:15:14 -07001937 if (v == &proto_list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
1939 "protocol",
1940 "size",
1941 "sockets",
1942 "memory",
1943 "press",
1944 "maxhdr",
1945 "slab",
1946 "module",
1947 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
1948 else
Pavel Emelianov60f04382007-07-09 13:15:14 -07001949 proto_seq_printf(seq, list_entry(v, struct proto, node));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 return 0;
1951}
1952
Stephen Hemmingerf6908082007-03-12 14:34:29 -07001953static const struct seq_operations proto_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 .start = proto_seq_start,
1955 .next = proto_seq_next,
1956 .stop = proto_seq_stop,
1957 .show = proto_seq_show,
1958};
1959
1960static int proto_seq_open(struct inode *inode, struct file *file)
1961{
1962 return seq_open(file, &proto_seq_ops);
1963}
1964
Arjan van de Ven9a321442007-02-12 00:55:35 -08001965static const struct file_operations proto_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 .owner = THIS_MODULE,
1967 .open = proto_seq_open,
1968 .read = seq_read,
1969 .llseek = seq_lseek,
1970 .release = seq_release,
1971};
1972
1973static int __init proto_init(void)
1974{
1975 /* register /proc/net/protocols */
1976 return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
1977}
1978
1979subsys_initcall(proto_init);
1980
1981#endif /* PROC_FS */
1982
1983EXPORT_SYMBOL(sk_alloc);
1984EXPORT_SYMBOL(sk_free);
1985EXPORT_SYMBOL(sk_send_sigurg);
1986EXPORT_SYMBOL(sock_alloc_send_skb);
1987EXPORT_SYMBOL(sock_init_data);
1988EXPORT_SYMBOL(sock_kfree_s);
1989EXPORT_SYMBOL(sock_kmalloc);
1990EXPORT_SYMBOL(sock_no_accept);
1991EXPORT_SYMBOL(sock_no_bind);
1992EXPORT_SYMBOL(sock_no_connect);
1993EXPORT_SYMBOL(sock_no_getname);
1994EXPORT_SYMBOL(sock_no_getsockopt);
1995EXPORT_SYMBOL(sock_no_ioctl);
1996EXPORT_SYMBOL(sock_no_listen);
1997EXPORT_SYMBOL(sock_no_mmap);
1998EXPORT_SYMBOL(sock_no_poll);
1999EXPORT_SYMBOL(sock_no_recvmsg);
2000EXPORT_SYMBOL(sock_no_sendmsg);
2001EXPORT_SYMBOL(sock_no_sendpage);
2002EXPORT_SYMBOL(sock_no_setsockopt);
2003EXPORT_SYMBOL(sock_no_shutdown);
2004EXPORT_SYMBOL(sock_no_socketpair);
2005EXPORT_SYMBOL(sock_rfree);
2006EXPORT_SYMBOL(sock_setsockopt);
2007EXPORT_SYMBOL(sock_wfree);
2008EXPORT_SYMBOL(sock_wmalloc);
2009EXPORT_SYMBOL(sock_i_uid);
2010EXPORT_SYMBOL(sock_i_ino);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011EXPORT_SYMBOL(sysctl_optmem_max);
David S. Miller6baf1f42005-09-05 18:14:11 -07002012#ifdef CONFIG_SYSCTL
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013EXPORT_SYMBOL(sysctl_rmem_max);
2014EXPORT_SYMBOL(sysctl_wmem_max);
2015#endif