blob: 58dec9dff99acd34f42f1a318da1c14a28b623e1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +090035 * code. The ACK stuff can wait and needs major
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
Randy Dunlap4fc268d2006-01-11 12:17:47 -080092#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070093#include <linux/errno.h>
94#include <linux/types.h>
95#include <linux/socket.h>
96#include <linux/in.h>
97#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070098#include <linux/module.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/sched.h>
102#include <linux/timer.h>
103#include <linux/string.h>
104#include <linux/sockios.h>
105#include <linux/net.h>
106#include <linux/mm.h>
107#include <linux/slab.h>
108#include <linux/interrupt.h>
109#include <linux/poll.h>
110#include <linux/tcp.h>
111#include <linux/init.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -0400112#include <linux/highmem.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113
114#include <asm/uaccess.h>
115#include <asm/system.h>
116
117#include <linux/netdevice.h>
118#include <net/protocol.h>
119#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200120#include <net/net_namespace.h>
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700121#include <net/request_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122#include <net/sock.h>
Patrick Ohly20d49472009-02-12 05:03:38 +0000123#include <linux/net_tstamp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124#include <net/xfrm.h>
125#include <linux/ipsec.h>
126
127#include <linux/filter.h>
128
129#ifdef CONFIG_INET
130#include <net/tcp.h>
131#endif
132
Ingo Molnarda21f242006-07-03 00:25:12 -0700133/*
134 * Each address family might have different locking rules, so we have
135 * one slock key per address family:
136 */
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700137static struct lock_class_key af_family_keys[AF_MAX];
138static struct lock_class_key af_family_slock_keys[AF_MAX];
139
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700140/*
141 * Make lock validator output more readable. (we pre-construct these
142 * strings build-time, so that runtime initialization of socket
143 * locks is fast):
144 */
145static const char *af_family_key_strings[AF_MAX+1] = {
146 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
147 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
148 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
149 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
150 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
151 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
152 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800153 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700154 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
Oliver Hartkoppcd05acf2007-12-16 15:59:24 -0800155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
David Howells17926a72007-04-26 15:48:28 -0700156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700157 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
158 "sk_lock-AF_MAX"
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700159};
160static const char *af_family_slock_key_strings[AF_MAX+1] = {
161 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
162 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
163 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
164 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
165 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
166 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
167 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800168 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700169 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
Oliver Hartkoppcd05acf2007-12-16 15:59:24 -0800170 "slock-27" , "slock-28" , "slock-AF_CAN" ,
David Howells17926a72007-04-26 15:48:28 -0700171 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700172 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
173 "slock-AF_MAX"
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700174};
Peter Zijlstra443aef02007-07-19 01:49:00 -0700175static const char *af_family_clock_key_strings[AF_MAX+1] = {
176 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
177 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
178 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
179 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
180 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
181 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
182 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800183 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
Peter Zijlstra443aef02007-07-19 01:49:00 -0700184 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
Oliver Hartkoppb4942af2008-07-23 14:06:04 -0700185 "clock-27" , "clock-28" , "clock-AF_CAN" ,
David Howellse51f8022007-07-21 19:30:16 -0700186 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700187 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
188 "clock-AF_MAX"
Peter Zijlstra443aef02007-07-19 01:49:00 -0700189};
Ingo Molnarda21f242006-07-03 00:25:12 -0700190
191/*
192 * sk_callback_lock locking rules are per-address-family,
193 * so split the lock classes by using a per-AF key:
194 */
195static struct lock_class_key af_callback_keys[AF_MAX];
196
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197/* Take into consideration the size of the struct sk_buff overhead in the
198 * determination of these values, since that is non-constant across
199 * platforms. This makes socket queueing behavior and performance
200 * not depend upon such differences.
201 */
202#define _SK_MEM_PACKETS 256
203#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256)
204#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
205#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
206
207/* Run time adjustable parameters. */
Brian Haleyab32ea52006-09-22 14:15:41 -0700208__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
209__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
210__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
211__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212
213/* Maximal space eaten by iovec or ancilliary data plus some space */
Brian Haleyab32ea52006-09-22 14:15:41 -0700214int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
Eric Dumazet2a915252009-05-27 11:30:05 +0000215EXPORT_SYMBOL(sysctl_optmem_max);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216
217static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
218{
219 struct timeval tv;
220
221 if (optlen < sizeof(tv))
222 return -EINVAL;
223 if (copy_from_user(&tv, optval, sizeof(tv)))
224 return -EFAULT;
Vasily Averinba780732007-05-24 16:58:54 -0700225 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
226 return -EDOM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227
Vasily Averinba780732007-05-24 16:58:54 -0700228 if (tv.tv_sec < 0) {
Andrew Morton6f11df82007-07-09 13:16:00 -0700229 static int warned __read_mostly;
230
Vasily Averinba780732007-05-24 16:58:54 -0700231 *timeo_p = 0;
Ilpo Järvinen50aab542008-05-02 16:20:10 -0700232 if (warned < 10 && net_ratelimit()) {
Vasily Averinba780732007-05-24 16:58:54 -0700233 warned++;
234 printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) "
235 "tries to set negative timeout\n",
Pavel Emelyanovba25f9d2007-10-18 23:40:40 -0700236 current->comm, task_pid_nr(current));
Ilpo Järvinen50aab542008-05-02 16:20:10 -0700237 }
Vasily Averinba780732007-05-24 16:58:54 -0700238 return 0;
239 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 *timeo_p = MAX_SCHEDULE_TIMEOUT;
241 if (tv.tv_sec == 0 && tv.tv_usec == 0)
242 return 0;
243 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
244 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
245 return 0;
246}
247
248static void sock_warn_obsolete_bsdism(const char *name)
249{
250 static int warned;
251 static char warncomm[TASK_COMM_LEN];
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900252 if (strcmp(warncomm, current->comm) && warned < 5) {
253 strcpy(warncomm, current->comm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 printk(KERN_WARNING "process `%s' is using obsolete "
255 "%s SO_BSDCOMPAT\n", warncomm, name);
256 warned++;
257 }
258}
259
Patrick Ohly20d49472009-02-12 05:03:38 +0000260static void sock_disable_timestamp(struct sock *sk, int flag)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900261{
Patrick Ohly20d49472009-02-12 05:03:38 +0000262 if (sock_flag(sk, flag)) {
263 sock_reset_flag(sk, flag);
264 if (!sock_flag(sk, SOCK_TIMESTAMP) &&
265 !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
266 net_disable_timestamp();
267 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 }
269}
270
271
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800272int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
273{
274 int err = 0;
275 int skb_len;
276
Rami Rosen9ee6b7f2008-05-14 03:50:03 -0700277 /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800278 number of warnings when compiling with -W --ANK
279 */
280 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
281 (unsigned)sk->sk_rcvbuf) {
282 err = -ENOMEM;
283 goto out;
284 }
285
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700286 err = sk_filter(sk, skb);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800287 if (err)
288 goto out;
289
Hideo Aoki3ab224b2007-12-31 00:11:19 -0800290 if (!sk_rmem_schedule(sk, skb->truesize)) {
291 err = -ENOBUFS;
292 goto out;
293 }
294
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800295 skb->dev = NULL;
296 skb_set_owner_r(skb, sk);
David S. Miller49ad9592008-12-17 22:11:38 -0800297
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800298 /* Cache the SKB length before we tack it onto the receive
299 * queue. Once it is added it no longer belongs to us and
300 * may be freed by other threads of control pulling packets
301 * from the queue.
302 */
303 skb_len = skb->len;
304
305 skb_queue_tail(&sk->sk_receive_queue, skb);
306
307 if (!sock_flag(sk, SOCK_DEAD))
308 sk->sk_data_ready(sk, skb_len);
309out:
310 return err;
311}
312EXPORT_SYMBOL(sock_queue_rcv_skb);
313
Arnaldo Carvalho de Melo58a5a7b2006-11-16 14:06:06 -0200314int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800315{
316 int rc = NET_RX_SUCCESS;
317
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700318 if (sk_filter(sk, skb))
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800319 goto discard_and_relse;
320
321 skb->dev = NULL;
322
Arnaldo Carvalho de Melo58a5a7b2006-11-16 14:06:06 -0200323 if (nested)
324 bh_lock_sock_nested(sk);
325 else
326 bh_lock_sock(sk);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700327 if (!sock_owned_by_user(sk)) {
328 /*
329 * trylock + unlock semantics:
330 */
331 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
332
Peter Zijlstrac57943a2008-10-07 14:18:42 -0700333 rc = sk_backlog_rcv(sk, skb);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700334
335 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
336 } else
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800337 sk_add_backlog(sk, skb);
338 bh_unlock_sock(sk);
339out:
340 sock_put(sk);
341 return rc;
342discard_and_relse:
343 kfree_skb(skb);
344 goto out;
345}
346EXPORT_SYMBOL(sk_receive_skb);
347
348struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
349{
350 struct dst_entry *dst = sk->sk_dst_cache;
351
352 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
353 sk->sk_dst_cache = NULL;
354 dst_release(dst);
355 return NULL;
356 }
357
358 return dst;
359}
360EXPORT_SYMBOL(__sk_dst_check);
361
362struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
363{
364 struct dst_entry *dst = sk_dst_get(sk);
365
366 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
367 sk_dst_reset(sk);
368 dst_release(dst);
369 return NULL;
370 }
371
372 return dst;
373}
374EXPORT_SYMBOL(sk_dst_check);
375
David S. Miller48788092007-09-14 16:41:03 -0700376static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
377{
378 int ret = -ENOPROTOOPT;
379#ifdef CONFIG_NETDEVICES
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900380 struct net *net = sock_net(sk);
David S. Miller48788092007-09-14 16:41:03 -0700381 char devname[IFNAMSIZ];
382 int index;
383
384 /* Sorry... */
385 ret = -EPERM;
386 if (!capable(CAP_NET_RAW))
387 goto out;
388
389 ret = -EINVAL;
390 if (optlen < 0)
391 goto out;
392
393 /* Bind this socket to a particular device like "eth0",
394 * as specified in the passed interface name. If the
395 * name is "" or the option length is zero the socket
396 * is not bound.
397 */
398 if (optlen > IFNAMSIZ - 1)
399 optlen = IFNAMSIZ - 1;
400 memset(devname, 0, sizeof(devname));
401
402 ret = -EFAULT;
403 if (copy_from_user(devname, optval, optlen))
404 goto out;
405
406 if (devname[0] == '\0') {
407 index = 0;
408 } else {
Eric W. Biederman881d9662007-09-17 11:56:21 -0700409 struct net_device *dev = dev_get_by_name(net, devname);
David S. Miller48788092007-09-14 16:41:03 -0700410
411 ret = -ENODEV;
412 if (!dev)
413 goto out;
414
415 index = dev->ifindex;
416 dev_put(dev);
417 }
418
419 lock_sock(sk);
420 sk->sk_bound_dev_if = index;
421 sk_dst_reset(sk);
422 release_sock(sk);
423
424 ret = 0;
425
426out:
427#endif
428
429 return ret;
430}
431
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800432static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
433{
434 if (valbool)
435 sock_set_flag(sk, bit);
436 else
437 sock_reset_flag(sk, bit);
438}
439
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440/*
441 * This is meant for all protocols to use and covers goings on
442 * at the socket level. Everything here is generic.
443 */
444
445int sock_setsockopt(struct socket *sock, int level, int optname,
446 char __user *optval, int optlen)
447{
Eric Dumazet2a915252009-05-27 11:30:05 +0000448 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 int val;
450 int valbool;
451 struct linger ling;
452 int ret = 0;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900453
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 /*
455 * Options without arguments
456 */
457
David S. Miller48788092007-09-14 16:41:03 -0700458 if (optname == SO_BINDTODEVICE)
459 return sock_bindtodevice(sk, optval, optlen);
460
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700461 if (optlen < sizeof(int))
462 return -EINVAL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900463
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 if (get_user(val, (int __user *)optval))
465 return -EFAULT;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900466
Eric Dumazet2a915252009-05-27 11:30:05 +0000467 valbool = val ? 1 : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468
469 lock_sock(sk);
470
Eric Dumazet2a915252009-05-27 11:30:05 +0000471 switch (optname) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700472 case SO_DEBUG:
Eric Dumazet2a915252009-05-27 11:30:05 +0000473 if (val && !capable(CAP_NET_ADMIN))
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700474 ret = -EACCES;
Eric Dumazet2a915252009-05-27 11:30:05 +0000475 else
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800476 sock_valbool_flag(sk, SOCK_DBG, valbool);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700477 break;
478 case SO_REUSEADDR:
479 sk->sk_reuse = valbool;
480 break;
481 case SO_TYPE:
482 case SO_ERROR:
483 ret = -ENOPROTOOPT;
484 break;
485 case SO_DONTROUTE:
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800486 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700487 break;
488 case SO_BROADCAST:
489 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
490 break;
491 case SO_SNDBUF:
492 /* Don't error on this BSD doesn't and if you think
493 about it this is right. Otherwise apps have to
494 play 'guess the biggest size' games. RCVBUF/SNDBUF
495 are treated in BSD as hints */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900496
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700497 if (val > sysctl_wmem_max)
498 val = sysctl_wmem_max;
Patrick McHardyb0573de2005-08-09 19:30:51 -0700499set_sndbuf:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700500 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
501 if ((val * 2) < SOCK_MIN_SNDBUF)
502 sk->sk_sndbuf = SOCK_MIN_SNDBUF;
503 else
504 sk->sk_sndbuf = val * 2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700506 /*
507 * Wake up sending tasks if we
508 * upped the value.
509 */
510 sk->sk_write_space(sk);
511 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700513 case SO_SNDBUFFORCE:
514 if (!capable(CAP_NET_ADMIN)) {
515 ret = -EPERM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 break;
517 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700518 goto set_sndbuf;
519
520 case SO_RCVBUF:
521 /* Don't error on this BSD doesn't and if you think
522 about it this is right. Otherwise apps have to
523 play 'guess the biggest size' games. RCVBUF/SNDBUF
524 are treated in BSD as hints */
525
526 if (val > sysctl_rmem_max)
527 val = sysctl_rmem_max;
528set_rcvbuf:
529 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
530 /*
531 * We double it on the way in to account for
532 * "struct sk_buff" etc. overhead. Applications
533 * assume that the SO_RCVBUF setting they make will
534 * allow that much actual data to be received on that
535 * socket.
536 *
537 * Applications are unaware that "struct sk_buff" and
538 * other overheads allocate from the receive buffer
539 * during socket buffer allocation.
540 *
541 * And after considering the possible alternatives,
542 * returning the value we actually used in getsockopt
543 * is the most desirable behavior.
544 */
545 if ((val * 2) < SOCK_MIN_RCVBUF)
546 sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
547 else
548 sk->sk_rcvbuf = val * 2;
549 break;
550
551 case SO_RCVBUFFORCE:
552 if (!capable(CAP_NET_ADMIN)) {
553 ret = -EPERM;
554 break;
555 }
556 goto set_rcvbuf;
557
558 case SO_KEEPALIVE:
559#ifdef CONFIG_INET
560 if (sk->sk_protocol == IPPROTO_TCP)
561 tcp_set_keepalive(sk, valbool);
562#endif
563 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
564 break;
565
566 case SO_OOBINLINE:
567 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
568 break;
569
570 case SO_NO_CHECK:
571 sk->sk_no_check = valbool;
572 break;
573
574 case SO_PRIORITY:
575 if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
576 sk->sk_priority = val;
577 else
578 ret = -EPERM;
579 break;
580
581 case SO_LINGER:
582 if (optlen < sizeof(ling)) {
583 ret = -EINVAL; /* 1003.1g */
584 break;
585 }
Eric Dumazet2a915252009-05-27 11:30:05 +0000586 if (copy_from_user(&ling, optval, sizeof(ling))) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700587 ret = -EFAULT;
588 break;
589 }
590 if (!ling.l_onoff)
591 sock_reset_flag(sk, SOCK_LINGER);
592 else {
593#if (BITS_PER_LONG == 32)
594 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
595 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
596 else
597#endif
598 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
599 sock_set_flag(sk, SOCK_LINGER);
600 }
601 break;
602
603 case SO_BSDCOMPAT:
604 sock_warn_obsolete_bsdism("setsockopt");
605 break;
606
607 case SO_PASSCRED:
608 if (valbool)
609 set_bit(SOCK_PASSCRED, &sock->flags);
610 else
611 clear_bit(SOCK_PASSCRED, &sock->flags);
612 break;
613
614 case SO_TIMESTAMP:
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700615 case SO_TIMESTAMPNS:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700616 if (valbool) {
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700617 if (optname == SO_TIMESTAMP)
618 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
619 else
620 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700621 sock_set_flag(sk, SOCK_RCVTSTAMP);
Patrick Ohly20d49472009-02-12 05:03:38 +0000622 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700623 } else {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700624 sock_reset_flag(sk, SOCK_RCVTSTAMP);
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700625 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
626 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700627 break;
628
Patrick Ohly20d49472009-02-12 05:03:38 +0000629 case SO_TIMESTAMPING:
630 if (val & ~SOF_TIMESTAMPING_MASK) {
631 ret = EINVAL;
632 break;
633 }
634 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
635 val & SOF_TIMESTAMPING_TX_HARDWARE);
636 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
637 val & SOF_TIMESTAMPING_TX_SOFTWARE);
638 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
639 val & SOF_TIMESTAMPING_RX_HARDWARE);
640 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
641 sock_enable_timestamp(sk,
642 SOCK_TIMESTAMPING_RX_SOFTWARE);
643 else
644 sock_disable_timestamp(sk,
645 SOCK_TIMESTAMPING_RX_SOFTWARE);
646 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
647 val & SOF_TIMESTAMPING_SOFTWARE);
648 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
649 val & SOF_TIMESTAMPING_SYS_HARDWARE);
650 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
651 val & SOF_TIMESTAMPING_RAW_HARDWARE);
652 break;
653
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700654 case SO_RCVLOWAT:
655 if (val < 0)
656 val = INT_MAX;
657 sk->sk_rcvlowat = val ? : 1;
658 break;
659
660 case SO_RCVTIMEO:
661 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
662 break;
663
664 case SO_SNDTIMEO:
665 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
666 break;
667
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700668 case SO_ATTACH_FILTER:
669 ret = -EINVAL;
670 if (optlen == sizeof(struct sock_fprog)) {
671 struct sock_fprog fprog;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700673 ret = -EFAULT;
674 if (copy_from_user(&fprog, optval, sizeof(fprog)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700677 ret = sk_attach_filter(&fprog, sk);
678 }
679 break;
680
681 case SO_DETACH_FILTER:
Pavel Emelyanov55b33322007-10-17 21:21:26 -0700682 ret = sk_detach_filter(sk);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700683 break;
684
685 case SO_PASSSEC:
686 if (valbool)
687 set_bit(SOCK_PASSSEC, &sock->flags);
688 else
689 clear_bit(SOCK_PASSSEC, &sock->flags);
690 break;
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -0800691 case SO_MARK:
692 if (!capable(CAP_NET_ADMIN))
693 ret = -EPERM;
Eric Dumazet2a915252009-05-27 11:30:05 +0000694 else
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -0800695 sk->sk_mark = val;
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -0800696 break;
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700697
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 /* We implement the SO_SNDLOWAT etc to
699 not be settable (1003.1g 5.3) */
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700700 default:
701 ret = -ENOPROTOOPT;
702 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900703 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 release_sock(sk);
705 return ret;
706}
Eric Dumazet2a915252009-05-27 11:30:05 +0000707EXPORT_SYMBOL(sock_setsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708
709
710int sock_getsockopt(struct socket *sock, int level, int optname,
711 char __user *optval, int __user *optlen)
712{
713 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900714
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700715 union {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900716 int val;
717 struct linger ling;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718 struct timeval tm;
719 } v;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900720
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 unsigned int lv = sizeof(int);
722 int len;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900723
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700724 if (get_user(len, optlen))
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900725 return -EFAULT;
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700726 if (len < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 return -EINVAL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900728
Eugene Teo50fee1d2009-02-23 15:38:41 -0800729 memset(&v, 0, sizeof(v));
Clément Lecignedf0bca02009-02-12 16:59:09 -0800730
Eric Dumazet2a915252009-05-27 11:30:05 +0000731 switch (optname) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700732 case SO_DEBUG:
733 v.val = sock_flag(sk, SOCK_DBG);
734 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900735
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700736 case SO_DONTROUTE:
737 v.val = sock_flag(sk, SOCK_LOCALROUTE);
738 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900739
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700740 case SO_BROADCAST:
741 v.val = !!sock_flag(sk, SOCK_BROADCAST);
742 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700744 case SO_SNDBUF:
745 v.val = sk->sk_sndbuf;
746 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900747
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700748 case SO_RCVBUF:
749 v.val = sk->sk_rcvbuf;
750 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700752 case SO_REUSEADDR:
753 v.val = sk->sk_reuse;
754 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700756 case SO_KEEPALIVE:
757 v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
758 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700760 case SO_TYPE:
761 v.val = sk->sk_type;
762 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700764 case SO_ERROR:
765 v.val = -sock_error(sk);
Eric Dumazet2a915252009-05-27 11:30:05 +0000766 if (v.val == 0)
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700767 v.val = xchg(&sk->sk_err_soft, 0);
768 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700770 case SO_OOBINLINE:
771 v.val = !!sock_flag(sk, SOCK_URGINLINE);
772 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900773
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700774 case SO_NO_CHECK:
775 v.val = sk->sk_no_check;
776 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700778 case SO_PRIORITY:
779 v.val = sk->sk_priority;
780 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900781
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700782 case SO_LINGER:
783 lv = sizeof(v.ling);
784 v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER);
785 v.ling.l_linger = sk->sk_lingertime / HZ;
786 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900787
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700788 case SO_BSDCOMPAT:
789 sock_warn_obsolete_bsdism("getsockopt");
790 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700792 case SO_TIMESTAMP:
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700793 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
794 !sock_flag(sk, SOCK_RCVTSTAMPNS);
795 break;
796
797 case SO_TIMESTAMPNS:
798 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700799 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800
Patrick Ohly20d49472009-02-12 05:03:38 +0000801 case SO_TIMESTAMPING:
802 v.val = 0;
803 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
804 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
805 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
806 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
807 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
808 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
809 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
810 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
811 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
812 v.val |= SOF_TIMESTAMPING_SOFTWARE;
813 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
814 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
815 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
816 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
817 break;
818
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700819 case SO_RCVTIMEO:
Eric Dumazet2a915252009-05-27 11:30:05 +0000820 lv = sizeof(struct timeval);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700821 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
822 v.tm.tv_sec = 0;
823 v.tm.tv_usec = 0;
824 } else {
825 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
826 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700828 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700830 case SO_SNDTIMEO:
Eric Dumazet2a915252009-05-27 11:30:05 +0000831 lv = sizeof(struct timeval);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700832 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
833 v.tm.tv_sec = 0;
834 v.tm.tv_usec = 0;
835 } else {
836 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
837 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
838 }
839 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700841 case SO_RCVLOWAT:
842 v.val = sk->sk_rcvlowat;
843 break;
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700844
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700845 case SO_SNDLOWAT:
Eric Dumazet2a915252009-05-27 11:30:05 +0000846 v.val = 1;
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700847 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700849 case SO_PASSCRED:
850 v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
851 break;
852
853 case SO_PEERCRED:
854 if (len > sizeof(sk->sk_peercred))
855 len = sizeof(sk->sk_peercred);
856 if (copy_to_user(optval, &sk->sk_peercred, len))
857 return -EFAULT;
858 goto lenout;
859
860 case SO_PEERNAME:
861 {
862 char address[128];
863
864 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
865 return -ENOTCONN;
866 if (lv < len)
867 return -EINVAL;
868 if (copy_to_user(optval, address, len))
869 return -EFAULT;
870 goto lenout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700872
873 /* Dubious BSD thing... Probably nobody even uses it, but
874 * the UNIX standard wants it for whatever reason... -DaveM
875 */
876 case SO_ACCEPTCONN:
877 v.val = sk->sk_state == TCP_LISTEN;
878 break;
879
880 case SO_PASSSEC:
881 v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
882 break;
883
884 case SO_PEERSEC:
885 return security_socket_getpeersec_stream(sock, optval, optlen, len);
886
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -0800887 case SO_MARK:
888 v.val = sk->sk_mark;
889 break;
890
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700891 default:
892 return -ENOPROTOOPT;
893 }
894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 if (len > lv)
896 len = lv;
897 if (copy_to_user(optval, &v, len))
898 return -EFAULT;
899lenout:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900900 if (put_user(len, optlen))
901 return -EFAULT;
902 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903}
904
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700905/*
906 * Initialize an sk_lock.
907 *
908 * (We also register the sk_lock with the lock validator.)
909 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700910static inline void sock_lock_init(struct sock *sk)
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700911{
Peter Zijlstraed075362006-12-06 20:35:24 -0800912 sock_lock_init_class_and_name(sk,
913 af_family_slock_key_strings[sk->sk_family],
914 af_family_slock_keys + sk->sk_family,
915 af_family_key_strings[sk->sk_family],
916 af_family_keys + sk->sk_family);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700917}
918
Pavel Emelyanovf1a6c4d2007-11-01 00:29:45 -0700919static void sock_copy(struct sock *nsk, const struct sock *osk)
920{
921#ifdef CONFIG_SECURITY_NETWORK
922 void *sptr = nsk->sk_security;
923#endif
924
925 memcpy(nsk, osk, osk->sk_prot->obj_size);
Pavel Emelyanovf1a6c4d2007-11-01 00:29:45 -0700926#ifdef CONFIG_SECURITY_NETWORK
927 nsk->sk_security = sptr;
928 security_sk_clone(osk, nsk);
929#endif
930}
931
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -0700932static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
933 int family)
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -0700934{
935 struct sock *sk;
936 struct kmem_cache *slab;
937
938 slab = prot->slab;
939 if (slab != NULL)
940 sk = kmem_cache_alloc(slab, priority);
941 else
942 sk = kmalloc(prot->obj_size, priority);
943
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -0700944 if (sk != NULL) {
945 if (security_sk_alloc(sk, family, priority))
946 goto out_free;
947
948 if (!try_module_get(prot->owner))
949 goto out_free_sec;
950 }
951
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -0700952 return sk;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -0700953
954out_free_sec:
955 security_sk_free(sk);
956out_free:
957 if (slab != NULL)
958 kmem_cache_free(slab, sk);
959 else
960 kfree(sk);
961 return NULL;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -0700962}
963
964static void sk_prot_free(struct proto *prot, struct sock *sk)
965{
966 struct kmem_cache *slab;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -0700967 struct module *owner;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -0700968
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -0700969 owner = prot->owner;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -0700970 slab = prot->slab;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -0700971
972 security_sk_free(sk);
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -0700973 if (slab != NULL)
974 kmem_cache_free(slab, sk);
975 else
976 kfree(sk);
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -0700977 module_put(owner);
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -0700978}
979
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980/**
981 * sk_alloc - All socket objects are allocated here
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700982 * @net: the applicable net namespace
Pavel Pisa4dc3b162005-05-01 08:59:25 -0700983 * @family: protocol family
984 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
985 * @prot: struct proto associated with this new sock instance
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 */
Eric W. Biederman1b8d7ae2007-10-08 23:24:22 -0700987struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
Pavel Emelyanov6257ff22007-11-01 00:39:31 -0700988 struct proto *prot)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989{
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -0700990 struct sock *sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991
Pavel Emelyanov154adbc2007-11-01 00:38:43 -0700992 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 if (sk) {
Pavel Emelyanov154adbc2007-11-01 00:38:43 -0700994 sk->sk_family = family;
995 /*
996 * See comment in struct sock definition to understand
997 * why we need sk_prot_creator -acme
998 */
999 sk->sk_prot = sk->sk_prot_creator = prot;
1000 sock_lock_init(sk);
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001001 sock_net_set(sk, get_net(net));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 }
Frank Filza79af592005-09-27 15:23:38 -07001003
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001004 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005}
Eric Dumazet2a915252009-05-27 11:30:05 +00001006EXPORT_SYMBOL(sk_alloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007
1008void sk_free(struct sock *sk)
1009{
1010 struct sk_filter *filter;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
1012 if (sk->sk_destruct)
1013 sk->sk_destruct(sk);
1014
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001015 filter = rcu_dereference(sk->sk_filter);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 if (filter) {
Pavel Emelyanov309dd5f2007-10-17 21:21:51 -07001017 sk_filter_uncharge(sk, filter);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -07001018 rcu_assign_pointer(sk->sk_filter, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 }
1020
Patrick Ohly20d49472009-02-12 05:03:38 +00001021 sock_disable_timestamp(sk, SOCK_TIMESTAMP);
1022 sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023
1024 if (atomic_read(&sk->sk_omem_alloc))
1025 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -08001026 __func__, atomic_read(&sk->sk_omem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001028 put_net(sock_net(sk));
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001029 sk_prot_free(sk->sk_prot_creator, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030}
Eric Dumazet2a915252009-05-27 11:30:05 +00001031EXPORT_SYMBOL(sk_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
Denis V. Lunevedf02082008-02-29 11:18:32 -08001033/*
1034 * Last sock_put should drop referrence to sk->sk_net. It has already
1035 * been dropped in sk_change_net. Taking referrence to stopping namespace
1036 * is not an option.
1037 * Take referrence to a socket to remove it from hash _alive_ and after that
1038 * destroy it in the context of init_net.
1039 */
1040void sk_release_kernel(struct sock *sk)
1041{
1042 if (sk == NULL || sk->sk_socket == NULL)
1043 return;
1044
1045 sock_hold(sk);
1046 sock_release(sk->sk_socket);
Denis V. Lunev65a18ec2008-04-16 01:59:46 -07001047 release_net(sock_net(sk));
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001048 sock_net_set(sk, get_net(&init_net));
Denis V. Lunevedf02082008-02-29 11:18:32 -08001049 sock_put(sk);
1050}
David S. Miller45af1752008-02-29 11:33:19 -08001051EXPORT_SYMBOL(sk_release_kernel);
Denis V. Lunevedf02082008-02-29 11:18:32 -08001052
Al Virodd0fc662005-10-07 07:46:04 +01001053struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001054{
Pavel Emelyanov8fd1d172007-11-01 00:37:32 -07001055 struct sock *newsk;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001056
Pavel Emelyanov8fd1d172007-11-01 00:37:32 -07001057 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001058 if (newsk != NULL) {
1059 struct sk_filter *filter;
1060
Venkat Yekkirala892c1412006-08-04 23:08:56 -07001061 sock_copy(newsk, sk);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001062
1063 /* SANITY */
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001064 get_net(sock_net(newsk));
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001065 sk_node_init(&newsk->sk_node);
1066 sock_lock_init(newsk);
1067 bh_lock_sock(newsk);
Eric Dumazetfa438cc2007-03-04 16:05:44 -08001068 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001069
1070 atomic_set(&newsk->sk_rmem_alloc, 0);
1071 atomic_set(&newsk->sk_wmem_alloc, 0);
1072 atomic_set(&newsk->sk_omem_alloc, 0);
1073 skb_queue_head_init(&newsk->sk_receive_queue);
1074 skb_queue_head_init(&newsk->sk_write_queue);
Chris Leech97fc2f02006-05-23 17:55:33 -07001075#ifdef CONFIG_NET_DMA
1076 skb_queue_head_init(&newsk->sk_async_wait_queue);
1077#endif
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001078
1079 rwlock_init(&newsk->sk_dst_lock);
1080 rwlock_init(&newsk->sk_callback_lock);
Peter Zijlstra443aef02007-07-19 01:49:00 -07001081 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1082 af_callback_keys + newsk->sk_family,
1083 af_family_clock_key_strings[newsk->sk_family]);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001084
1085 newsk->sk_dst_cache = NULL;
1086 newsk->sk_wmem_queued = 0;
1087 newsk->sk_forward_alloc = 0;
1088 newsk->sk_send_head = NULL;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001089 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1090
1091 sock_reset_flag(newsk, SOCK_DONE);
1092 skb_queue_head_init(&newsk->sk_error_queue);
1093
1094 filter = newsk->sk_filter;
1095 if (filter != NULL)
1096 sk_filter_charge(newsk, filter);
1097
1098 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1099 /* It is still raw copy of parent, so invalidate
1100 * destructor and make plain sk_free() */
1101 newsk->sk_destruct = NULL;
1102 sk_free(newsk);
1103 newsk = NULL;
1104 goto out;
1105 }
1106
1107 newsk->sk_err = 0;
1108 newsk->sk_priority = 0;
1109 atomic_set(&newsk->sk_refcnt, 2);
1110
1111 /*
1112 * Increment the counter in the same struct proto as the master
1113 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1114 * is the same as sk->sk_prot->socks, as this field was copied
1115 * with memcpy).
1116 *
1117 * This _changes_ the previous behaviour, where
1118 * tcp_create_openreq_child always was incrementing the
1119 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1120 * to be taken into account in all callers. -acme
1121 */
1122 sk_refcnt_debug_inc(newsk);
David S. Miller972692e2008-06-17 22:41:38 -07001123 sk_set_socket(newsk, NULL);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001124 newsk->sk_sleep = NULL;
1125
1126 if (newsk->sk_prot->sockets_allocated)
Eric Dumazet17483762008-11-25 21:16:35 -08001127 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001128 }
1129out:
1130 return newsk;
1131}
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001132EXPORT_SYMBOL_GPL(sk_clone);
1133
Andi Kleen99580892007-04-20 17:12:43 -07001134void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1135{
1136 __sk_dst_set(sk, dst);
1137 sk->sk_route_caps = dst->dev->features;
1138 if (sk->sk_route_caps & NETIF_F_GSO)
Herbert Xu4fcd6b92007-05-31 22:15:50 -07001139 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
Andi Kleen99580892007-04-20 17:12:43 -07001140 if (sk_can_gso(sk)) {
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001141 if (dst->header_len) {
Andi Kleen99580892007-04-20 17:12:43 -07001142 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001143 } else {
Andi Kleen99580892007-04-20 17:12:43 -07001144 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001145 sk->sk_gso_max_size = dst->dev->gso_max_size;
1146 }
Andi Kleen99580892007-04-20 17:12:43 -07001147 }
1148}
1149EXPORT_SYMBOL_GPL(sk_setup_caps);
1150
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151void __init sk_init(void)
1152{
1153 if (num_physpages <= 4096) {
1154 sysctl_wmem_max = 32767;
1155 sysctl_rmem_max = 32767;
1156 sysctl_wmem_default = 32767;
1157 sysctl_rmem_default = 32767;
1158 } else if (num_physpages >= 131072) {
1159 sysctl_wmem_max = 131071;
1160 sysctl_rmem_max = 131071;
1161 }
1162}
1163
1164/*
1165 * Simple resource managers for sockets.
1166 */
1167
1168
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001169/*
1170 * Write buffer destructor automatically called from kfree_skb.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 */
1172void sock_wfree(struct sk_buff *skb)
1173{
1174 struct sock *sk = skb->sk;
1175
1176 /* In case it might be waiting for more memory. */
1177 atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1178 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE))
1179 sk->sk_write_space(sk);
1180 sock_put(sk);
1181}
Eric Dumazet2a915252009-05-27 11:30:05 +00001182EXPORT_SYMBOL(sock_wfree);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001184/*
1185 * Read buffer destructor automatically called from kfree_skb.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 */
1187void sock_rfree(struct sk_buff *skb)
1188{
1189 struct sock *sk = skb->sk;
1190
1191 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001192 sk_mem_uncharge(skb->sk, skb->truesize);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193}
Eric Dumazet2a915252009-05-27 11:30:05 +00001194EXPORT_SYMBOL(sock_rfree);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195
1196
1197int sock_i_uid(struct sock *sk)
1198{
1199 int uid;
1200
1201 read_lock(&sk->sk_callback_lock);
1202 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0;
1203 read_unlock(&sk->sk_callback_lock);
1204 return uid;
1205}
Eric Dumazet2a915252009-05-27 11:30:05 +00001206EXPORT_SYMBOL(sock_i_uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207
1208unsigned long sock_i_ino(struct sock *sk)
1209{
1210 unsigned long ino;
1211
1212 read_lock(&sk->sk_callback_lock);
1213 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1214 read_unlock(&sk->sk_callback_lock);
1215 return ino;
1216}
Eric Dumazet2a915252009-05-27 11:30:05 +00001217EXPORT_SYMBOL(sock_i_ino);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218
1219/*
1220 * Allocate a skb from the socket's send buffer.
1221 */
Victor Fusco86a76ca2005-07-08 14:57:47 -07001222struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
Al Virodd0fc662005-10-07 07:46:04 +01001223 gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224{
1225 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
Eric Dumazet2a915252009-05-27 11:30:05 +00001226 struct sk_buff *skb = alloc_skb(size, priority);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 if (skb) {
1228 skb_set_owner_w(skb, sk);
1229 return skb;
1230 }
1231 }
1232 return NULL;
1233}
Eric Dumazet2a915252009-05-27 11:30:05 +00001234EXPORT_SYMBOL(sock_wmalloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235
1236/*
1237 * Allocate a skb from the socket's receive buffer.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001238 */
Victor Fusco86a76ca2005-07-08 14:57:47 -07001239struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
Al Virodd0fc662005-10-07 07:46:04 +01001240 gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241{
1242 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1243 struct sk_buff *skb = alloc_skb(size, priority);
1244 if (skb) {
1245 skb_set_owner_r(skb, sk);
1246 return skb;
1247 }
1248 }
1249 return NULL;
1250}
1251
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001252/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 * Allocate a memory block from the socket's option memory buffer.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001254 */
Al Virodd0fc662005-10-07 07:46:04 +01001255void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256{
1257 if ((unsigned)size <= sysctl_optmem_max &&
1258 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1259 void *mem;
1260 /* First do the add, to avoid the race if kmalloc
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001261 * might sleep.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 */
1263 atomic_add(size, &sk->sk_omem_alloc);
1264 mem = kmalloc(size, priority);
1265 if (mem)
1266 return mem;
1267 atomic_sub(size, &sk->sk_omem_alloc);
1268 }
1269 return NULL;
1270}
Eric Dumazet2a915252009-05-27 11:30:05 +00001271EXPORT_SYMBOL(sock_kmalloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272
1273/*
1274 * Free an option memory block.
1275 */
1276void sock_kfree_s(struct sock *sk, void *mem, int size)
1277{
1278 kfree(mem);
1279 atomic_sub(size, &sk->sk_omem_alloc);
1280}
Eric Dumazet2a915252009-05-27 11:30:05 +00001281EXPORT_SYMBOL(sock_kfree_s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
1283/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1284 I think, these locks should be removed for datagram sockets.
1285 */
Eric Dumazet2a915252009-05-27 11:30:05 +00001286static long sock_wait_for_wmem(struct sock *sk, long timeo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287{
1288 DEFINE_WAIT(wait);
1289
1290 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1291 for (;;) {
1292 if (!timeo)
1293 break;
1294 if (signal_pending(current))
1295 break;
1296 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1297 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1298 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1299 break;
1300 if (sk->sk_shutdown & SEND_SHUTDOWN)
1301 break;
1302 if (sk->sk_err)
1303 break;
1304 timeo = schedule_timeout(timeo);
1305 }
1306 finish_wait(sk->sk_sleep, &wait);
1307 return timeo;
1308}
1309
1310
1311/*
1312 * Generic send/receive buffer handlers
1313 */
1314
Herbert Xu4cc7f682009-02-04 16:55:54 -08001315struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1316 unsigned long data_len, int noblock,
1317 int *errcode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318{
1319 struct sk_buff *skb;
Al Viro7d877f32005-10-21 03:20:43 -04001320 gfp_t gfp_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 long timeo;
1322 int err;
1323
1324 gfp_mask = sk->sk_allocation;
1325 if (gfp_mask & __GFP_WAIT)
1326 gfp_mask |= __GFP_REPEAT;
1327
1328 timeo = sock_sndtimeo(sk, noblock);
1329 while (1) {
1330 err = sock_error(sk);
1331 if (err != 0)
1332 goto failure;
1333
1334 err = -EPIPE;
1335 if (sk->sk_shutdown & SEND_SHUTDOWN)
1336 goto failure;
1337
1338 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
Larry Woodmandb38c1792006-11-03 16:05:45 -08001339 skb = alloc_skb(header_len, gfp_mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 if (skb) {
1341 int npages;
1342 int i;
1343
1344 /* No pages, we're done... */
1345 if (!data_len)
1346 break;
1347
1348 npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1349 skb->truesize += data_len;
1350 skb_shinfo(skb)->nr_frags = npages;
1351 for (i = 0; i < npages; i++) {
1352 struct page *page;
1353 skb_frag_t *frag;
1354
1355 page = alloc_pages(sk->sk_allocation, 0);
1356 if (!page) {
1357 err = -ENOBUFS;
1358 skb_shinfo(skb)->nr_frags = i;
1359 kfree_skb(skb);
1360 goto failure;
1361 }
1362
1363 frag = &skb_shinfo(skb)->frags[i];
1364 frag->page = page;
1365 frag->page_offset = 0;
1366 frag->size = (data_len >= PAGE_SIZE ?
1367 PAGE_SIZE :
1368 data_len);
1369 data_len -= PAGE_SIZE;
1370 }
1371
1372 /* Full success... */
1373 break;
1374 }
1375 err = -ENOBUFS;
1376 goto failure;
1377 }
1378 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1379 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1380 err = -EAGAIN;
1381 if (!timeo)
1382 goto failure;
1383 if (signal_pending(current))
1384 goto interrupted;
1385 timeo = sock_wait_for_wmem(sk, timeo);
1386 }
1387
1388 skb_set_owner_w(skb, sk);
1389 return skb;
1390
1391interrupted:
1392 err = sock_intr_errno(timeo);
1393failure:
1394 *errcode = err;
1395 return NULL;
1396}
Herbert Xu4cc7f682009-02-04 16:55:54 -08001397EXPORT_SYMBOL(sock_alloc_send_pskb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001399struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 int noblock, int *errcode)
1401{
1402 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1403}
Eric Dumazet2a915252009-05-27 11:30:05 +00001404EXPORT_SYMBOL(sock_alloc_send_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405
1406static void __lock_sock(struct sock *sk)
1407{
1408 DEFINE_WAIT(wait);
1409
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001410 for (;;) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1412 TASK_UNINTERRUPTIBLE);
1413 spin_unlock_bh(&sk->sk_lock.slock);
1414 schedule();
1415 spin_lock_bh(&sk->sk_lock.slock);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001416 if (!sock_owned_by_user(sk))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 break;
1418 }
1419 finish_wait(&sk->sk_lock.wq, &wait);
1420}
1421
1422static void __release_sock(struct sock *sk)
1423{
1424 struct sk_buff *skb = sk->sk_backlog.head;
1425
1426 do {
1427 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1428 bh_unlock_sock(sk);
1429
1430 do {
1431 struct sk_buff *next = skb->next;
1432
1433 skb->next = NULL;
Peter Zijlstrac57943a2008-10-07 14:18:42 -07001434 sk_backlog_rcv(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435
1436 /*
1437 * We are in process context here with softirqs
1438 * disabled, use cond_resched_softirq() to preempt.
1439 * This is safe to do because we've taken the backlog
1440 * queue private:
1441 */
1442 cond_resched_softirq();
1443
1444 skb = next;
1445 } while (skb != NULL);
1446
1447 bh_lock_sock(sk);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001448 } while ((skb = sk->sk_backlog.head) != NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449}
1450
1451/**
1452 * sk_wait_data - wait for data to arrive at sk_receive_queue
Pavel Pisa4dc3b162005-05-01 08:59:25 -07001453 * @sk: sock to wait on
1454 * @timeo: for how long
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 *
1456 * Now socket state including sk->sk_err is changed only under lock,
1457 * hence we may omit checks after joining wait queue.
1458 * We check receive queue before schedule() only as optimization;
1459 * it is very likely that release_sock() added new data.
1460 */
1461int sk_wait_data(struct sock *sk, long *timeo)
1462{
1463 int rc;
1464 DEFINE_WAIT(wait);
1465
1466 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1467 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1468 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1469 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1470 finish_wait(sk->sk_sleep, &wait);
1471 return rc;
1472}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473EXPORT_SYMBOL(sk_wait_data);
1474
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001475/**
1476 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1477 * @sk: socket
1478 * @size: memory size to allocate
1479 * @kind: allocation type
1480 *
1481 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1482 * rmem allocation. This function assumes that protocols which have
1483 * memory_pressure use sk_wmem_queued as write buffer accounting.
1484 */
1485int __sk_mem_schedule(struct sock *sk, int size, int kind)
1486{
1487 struct proto *prot = sk->sk_prot;
1488 int amt = sk_mem_pages(size);
1489 int allocated;
1490
1491 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1492 allocated = atomic_add_return(amt, prot->memory_allocated);
1493
1494 /* Under limit. */
1495 if (allocated <= prot->sysctl_mem[0]) {
1496 if (prot->memory_pressure && *prot->memory_pressure)
1497 *prot->memory_pressure = 0;
1498 return 1;
1499 }
1500
1501 /* Under pressure. */
1502 if (allocated > prot->sysctl_mem[1])
1503 if (prot->enter_memory_pressure)
Pavel Emelyanov5c52ba12008-07-16 20:28:10 -07001504 prot->enter_memory_pressure(sk);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001505
1506 /* Over hard limit. */
1507 if (allocated > prot->sysctl_mem[2])
1508 goto suppress_allocation;
1509
1510 /* guarantee minimum buffer size under pressure */
1511 if (kind == SK_MEM_RECV) {
1512 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1513 return 1;
1514 } else { /* SK_MEM_SEND */
1515 if (sk->sk_type == SOCK_STREAM) {
1516 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1517 return 1;
1518 } else if (atomic_read(&sk->sk_wmem_alloc) <
1519 prot->sysctl_wmem[0])
1520 return 1;
1521 }
1522
1523 if (prot->memory_pressure) {
Eric Dumazet17483762008-11-25 21:16:35 -08001524 int alloc;
1525
1526 if (!*prot->memory_pressure)
1527 return 1;
1528 alloc = percpu_counter_read_positive(prot->sockets_allocated);
1529 if (prot->sysctl_mem[2] > alloc *
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001530 sk_mem_pages(sk->sk_wmem_queued +
1531 atomic_read(&sk->sk_rmem_alloc) +
1532 sk->sk_forward_alloc))
1533 return 1;
1534 }
1535
1536suppress_allocation:
1537
1538 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
1539 sk_stream_moderate_sndbuf(sk);
1540
1541 /* Fail only if socket is _under_ its sndbuf.
1542 * In this case we cannot block, so that we have to fail.
1543 */
1544 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
1545 return 1;
1546 }
1547
1548 /* Alas. Undo changes. */
1549 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1550 atomic_sub(amt, prot->memory_allocated);
1551 return 0;
1552}
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001553EXPORT_SYMBOL(__sk_mem_schedule);
1554
1555/**
1556 * __sk_reclaim - reclaim memory_allocated
1557 * @sk: socket
1558 */
1559void __sk_mem_reclaim(struct sock *sk)
1560{
1561 struct proto *prot = sk->sk_prot;
1562
Eric Dumazet680a5a52007-12-31 15:00:50 -08001563 atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001564 prot->memory_allocated);
1565 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
1566
1567 if (prot->memory_pressure && *prot->memory_pressure &&
1568 (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
1569 *prot->memory_pressure = 0;
1570}
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001571EXPORT_SYMBOL(__sk_mem_reclaim);
1572
1573
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574/*
1575 * Set of default routines for initialising struct proto_ops when
1576 * the protocol does not support a particular function. In certain
1577 * cases where it makes no sense for a protocol to have a "do nothing"
1578 * function, some default processing is provided.
1579 */
1580
1581int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
1582{
1583 return -EOPNOTSUPP;
1584}
Eric Dumazet2a915252009-05-27 11:30:05 +00001585EXPORT_SYMBOL(sock_no_bind);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001587int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 int len, int flags)
1589{
1590 return -EOPNOTSUPP;
1591}
Eric Dumazet2a915252009-05-27 11:30:05 +00001592EXPORT_SYMBOL(sock_no_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593
1594int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
1595{
1596 return -EOPNOTSUPP;
1597}
Eric Dumazet2a915252009-05-27 11:30:05 +00001598EXPORT_SYMBOL(sock_no_socketpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599
1600int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
1601{
1602 return -EOPNOTSUPP;
1603}
Eric Dumazet2a915252009-05-27 11:30:05 +00001604EXPORT_SYMBOL(sock_no_accept);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001606int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 int *len, int peer)
1608{
1609 return -EOPNOTSUPP;
1610}
Eric Dumazet2a915252009-05-27 11:30:05 +00001611EXPORT_SYMBOL(sock_no_getname);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
Eric Dumazet2a915252009-05-27 11:30:05 +00001613unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614{
1615 return 0;
1616}
Eric Dumazet2a915252009-05-27 11:30:05 +00001617EXPORT_SYMBOL(sock_no_poll);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618
1619int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1620{
1621 return -EOPNOTSUPP;
1622}
Eric Dumazet2a915252009-05-27 11:30:05 +00001623EXPORT_SYMBOL(sock_no_ioctl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624
1625int sock_no_listen(struct socket *sock, int backlog)
1626{
1627 return -EOPNOTSUPP;
1628}
Eric Dumazet2a915252009-05-27 11:30:05 +00001629EXPORT_SYMBOL(sock_no_listen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630
1631int sock_no_shutdown(struct socket *sock, int how)
1632{
1633 return -EOPNOTSUPP;
1634}
Eric Dumazet2a915252009-05-27 11:30:05 +00001635EXPORT_SYMBOL(sock_no_shutdown);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636
1637int sock_no_setsockopt(struct socket *sock, int level, int optname,
1638 char __user *optval, int optlen)
1639{
1640 return -EOPNOTSUPP;
1641}
Eric Dumazet2a915252009-05-27 11:30:05 +00001642EXPORT_SYMBOL(sock_no_setsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643
1644int sock_no_getsockopt(struct socket *sock, int level, int optname,
1645 char __user *optval, int __user *optlen)
1646{
1647 return -EOPNOTSUPP;
1648}
Eric Dumazet2a915252009-05-27 11:30:05 +00001649EXPORT_SYMBOL(sock_no_getsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650
1651int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1652 size_t len)
1653{
1654 return -EOPNOTSUPP;
1655}
Eric Dumazet2a915252009-05-27 11:30:05 +00001656EXPORT_SYMBOL(sock_no_sendmsg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657
1658int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
1659 size_t len, int flags)
1660{
1661 return -EOPNOTSUPP;
1662}
Eric Dumazet2a915252009-05-27 11:30:05 +00001663EXPORT_SYMBOL(sock_no_recvmsg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664
1665int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1666{
1667 /* Mirror missing mmap method error code */
1668 return -ENODEV;
1669}
Eric Dumazet2a915252009-05-27 11:30:05 +00001670EXPORT_SYMBOL(sock_no_mmap);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671
1672ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
1673{
1674 ssize_t res;
1675 struct msghdr msg = {.msg_flags = flags};
1676 struct kvec iov;
1677 char *kaddr = kmap(page);
1678 iov.iov_base = kaddr + offset;
1679 iov.iov_len = size;
1680 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
1681 kunmap(page);
1682 return res;
1683}
Eric Dumazet2a915252009-05-27 11:30:05 +00001684EXPORT_SYMBOL(sock_no_sendpage);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685
1686/*
1687 * Default Socket Callbacks
1688 */
1689
1690static void sock_def_wakeup(struct sock *sk)
1691{
1692 read_lock(&sk->sk_callback_lock);
1693 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1694 wake_up_interruptible_all(sk->sk_sleep);
1695 read_unlock(&sk->sk_callback_lock);
1696}
1697
1698static void sock_def_error_report(struct sock *sk)
1699{
1700 read_lock(&sk->sk_callback_lock);
1701 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
Davide Libenzi37e55402009-03-31 15:24:21 -07001702 wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08001703 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 read_unlock(&sk->sk_callback_lock);
1705}
1706
1707static void sock_def_readable(struct sock *sk, int len)
1708{
1709 read_lock(&sk->sk_callback_lock);
1710 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
Davide Libenzi37e55402009-03-31 15:24:21 -07001711 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
1712 POLLRDNORM | POLLRDBAND);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08001713 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 read_unlock(&sk->sk_callback_lock);
1715}
1716
1717static void sock_def_write_space(struct sock *sk)
1718{
1719 read_lock(&sk->sk_callback_lock);
1720
1721 /* Do not wake up a writer until he can make "significant"
1722 * progress. --DaveM
1723 */
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001724 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
Davide Libenzi37e55402009-03-31 15:24:21 -07001726 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
1727 POLLWRNORM | POLLWRBAND);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728
1729 /* Should agree with poll, otherwise some programs break */
1730 if (sock_writeable(sk))
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08001731 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 }
1733
1734 read_unlock(&sk->sk_callback_lock);
1735}
1736
1737static void sock_def_destruct(struct sock *sk)
1738{
Jesper Juhla51482b2005-11-08 09:41:34 -08001739 kfree(sk->sk_protinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740}
1741
1742void sk_send_sigurg(struct sock *sk)
1743{
1744 if (sk->sk_socket && sk->sk_socket->file)
1745 if (send_sigurg(&sk->sk_socket->file->f_owner))
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08001746 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747}
Eric Dumazet2a915252009-05-27 11:30:05 +00001748EXPORT_SYMBOL(sk_send_sigurg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749
1750void sk_reset_timer(struct sock *sk, struct timer_list* timer,
1751 unsigned long expires)
1752{
1753 if (!mod_timer(timer, expires))
1754 sock_hold(sk);
1755}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756EXPORT_SYMBOL(sk_reset_timer);
1757
1758void sk_stop_timer(struct sock *sk, struct timer_list* timer)
1759{
1760 if (timer_pending(timer) && del_timer(timer))
1761 __sock_put(sk);
1762}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763EXPORT_SYMBOL(sk_stop_timer);
1764
1765void sock_init_data(struct socket *sock, struct sock *sk)
1766{
1767 skb_queue_head_init(&sk->sk_receive_queue);
1768 skb_queue_head_init(&sk->sk_write_queue);
1769 skb_queue_head_init(&sk->sk_error_queue);
Chris Leech97fc2f02006-05-23 17:55:33 -07001770#ifdef CONFIG_NET_DMA
1771 skb_queue_head_init(&sk->sk_async_wait_queue);
1772#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
1774 sk->sk_send_head = NULL;
1775
1776 init_timer(&sk->sk_timer);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001777
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 sk->sk_allocation = GFP_KERNEL;
1779 sk->sk_rcvbuf = sysctl_rmem_default;
1780 sk->sk_sndbuf = sysctl_wmem_default;
1781 sk->sk_state = TCP_CLOSE;
David S. Miller972692e2008-06-17 22:41:38 -07001782 sk_set_socket(sk, sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783
1784 sock_set_flag(sk, SOCK_ZAPPED);
1785
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001786 if (sock) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 sk->sk_type = sock->type;
1788 sk->sk_sleep = &sock->wait;
1789 sock->sk = sk;
1790 } else
1791 sk->sk_sleep = NULL;
1792
1793 rwlock_init(&sk->sk_dst_lock);
1794 rwlock_init(&sk->sk_callback_lock);
Peter Zijlstra443aef02007-07-19 01:49:00 -07001795 lockdep_set_class_and_name(&sk->sk_callback_lock,
1796 af_callback_keys + sk->sk_family,
1797 af_family_clock_key_strings[sk->sk_family]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798
1799 sk->sk_state_change = sock_def_wakeup;
1800 sk->sk_data_ready = sock_def_readable;
1801 sk->sk_write_space = sock_def_write_space;
1802 sk->sk_error_report = sock_def_error_report;
1803 sk->sk_destruct = sock_def_destruct;
1804
1805 sk->sk_sndmsg_page = NULL;
1806 sk->sk_sndmsg_off = 0;
1807
1808 sk->sk_peercred.pid = 0;
1809 sk->sk_peercred.uid = -1;
1810 sk->sk_peercred.gid = -1;
1811 sk->sk_write_pending = 0;
1812 sk->sk_rcvlowat = 1;
1813 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
1814 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
1815
Eric Dumazetf37f0af2008-04-13 21:39:26 -07001816 sk->sk_stamp = ktime_set(-1L, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817
1818 atomic_set(&sk->sk_refcnt, 1);
Wang Chen33c732c2007-11-13 20:30:01 -08001819 atomic_set(&sk->sk_drops, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820}
Eric Dumazet2a915252009-05-27 11:30:05 +00001821EXPORT_SYMBOL(sock_init_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822
Harvey Harrisonb5606c22008-02-13 15:03:16 -08001823void lock_sock_nested(struct sock *sk, int subclass)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824{
1825 might_sleep();
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001826 spin_lock_bh(&sk->sk_lock.slock);
John Heffnerd2e91172007-09-12 10:44:19 +02001827 if (sk->sk_lock.owned)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 __lock_sock(sk);
John Heffnerd2e91172007-09-12 10:44:19 +02001829 sk->sk_lock.owned = 1;
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001830 spin_unlock(&sk->sk_lock.slock);
1831 /*
1832 * The sk_lock has mutex_lock() semantics here:
1833 */
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08001834 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001835 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836}
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08001837EXPORT_SYMBOL(lock_sock_nested);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838
Harvey Harrisonb5606c22008-02-13 15:03:16 -08001839void release_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840{
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001841 /*
1842 * The sk_lock has mutex_unlock() semantics:
1843 */
1844 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
1845
1846 spin_lock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847 if (sk->sk_backlog.tail)
1848 __release_sock(sk);
John Heffnerd2e91172007-09-12 10:44:19 +02001849 sk->sk_lock.owned = 0;
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001850 if (waitqueue_active(&sk->sk_lock.wq))
1851 wake_up(&sk->sk_lock.wq);
1852 spin_unlock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853}
1854EXPORT_SYMBOL(release_sock);
1855
1856int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001857{
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001858 struct timeval tv;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 if (!sock_flag(sk, SOCK_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +00001860 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001861 tv = ktime_to_timeval(sk->sk_stamp);
1862 if (tv.tv_sec == -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 return -ENOENT;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001864 if (tv.tv_sec == 0) {
1865 sk->sk_stamp = ktime_get_real();
1866 tv = ktime_to_timeval(sk->sk_stamp);
1867 }
1868 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001869}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870EXPORT_SYMBOL(sock_get_timestamp);
1871
Eric Dumazetae40eb12007-03-18 17:33:16 -07001872int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1873{
1874 struct timespec ts;
1875 if (!sock_flag(sk, SOCK_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +00001876 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazetae40eb12007-03-18 17:33:16 -07001877 ts = ktime_to_timespec(sk->sk_stamp);
1878 if (ts.tv_sec == -1)
1879 return -ENOENT;
1880 if (ts.tv_sec == 0) {
1881 sk->sk_stamp = ktime_get_real();
1882 ts = ktime_to_timespec(sk->sk_stamp);
1883 }
1884 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
1885}
1886EXPORT_SYMBOL(sock_get_timestampns);
1887
Patrick Ohly20d49472009-02-12 05:03:38 +00001888void sock_enable_timestamp(struct sock *sk, int flag)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001889{
Patrick Ohly20d49472009-02-12 05:03:38 +00001890 if (!sock_flag(sk, flag)) {
1891 sock_set_flag(sk, flag);
1892 /*
1893 * we just set one of the two flags which require net
1894 * time stamping, but time stamping might have been on
1895 * already because of the other one
1896 */
1897 if (!sock_flag(sk,
1898 flag == SOCK_TIMESTAMP ?
1899 SOCK_TIMESTAMPING_RX_SOFTWARE :
1900 SOCK_TIMESTAMP))
1901 net_enable_timestamp();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 }
1903}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904
1905/*
1906 * Get a socket option on an socket.
1907 *
1908 * FIX: POSIX 1003.1g is very ambiguous here. It states that
1909 * asynchronous errors should be reported by getsockopt. We assume
1910 * this means if you specify SO_ERROR (otherwise whats the point of it).
1911 */
1912int sock_common_getsockopt(struct socket *sock, int level, int optname,
1913 char __user *optval, int __user *optlen)
1914{
1915 struct sock *sk = sock->sk;
1916
1917 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1918}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919EXPORT_SYMBOL(sock_common_getsockopt);
1920
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001921#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001922int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
1923 char __user *optval, int __user *optlen)
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001924{
1925 struct sock *sk = sock->sk;
1926
Johannes Berg1e51f952007-03-06 13:44:06 -08001927 if (sk->sk_prot->compat_getsockopt != NULL)
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001928 return sk->sk_prot->compat_getsockopt(sk, level, optname,
1929 optval, optlen);
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001930 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
1931}
1932EXPORT_SYMBOL(compat_sock_common_getsockopt);
1933#endif
1934
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
1936 struct msghdr *msg, size_t size, int flags)
1937{
1938 struct sock *sk = sock->sk;
1939 int addr_len = 0;
1940 int err;
1941
1942 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
1943 flags & ~MSG_DONTWAIT, &addr_len);
1944 if (err >= 0)
1945 msg->msg_namelen = addr_len;
1946 return err;
1947}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948EXPORT_SYMBOL(sock_common_recvmsg);
1949
1950/*
1951 * Set socket options on an inet socket.
1952 */
1953int sock_common_setsockopt(struct socket *sock, int level, int optname,
1954 char __user *optval, int optlen)
1955{
1956 struct sock *sk = sock->sk;
1957
1958 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1959}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960EXPORT_SYMBOL(sock_common_setsockopt);
1961
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001962#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001963int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
1964 char __user *optval, int optlen)
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001965{
1966 struct sock *sk = sock->sk;
1967
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08001968 if (sk->sk_prot->compat_setsockopt != NULL)
1969 return sk->sk_prot->compat_setsockopt(sk, level, optname,
1970 optval, optlen);
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08001971 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
1972}
1973EXPORT_SYMBOL(compat_sock_common_setsockopt);
1974#endif
1975
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976void sk_common_release(struct sock *sk)
1977{
1978 if (sk->sk_prot->destroy)
1979 sk->sk_prot->destroy(sk);
1980
1981 /*
1982 * Observation: when sock_common_release is called, processes have
1983 * no access to socket. But net still has.
1984 * Step one, detach it from networking:
1985 *
1986 * A. Remove from hash tables.
1987 */
1988
1989 sk->sk_prot->unhash(sk);
1990
1991 /*
1992 * In this point socket cannot receive new packets, but it is possible
1993 * that some packets are in flight because some CPU runs receiver and
1994 * did hash table lookup before we unhashed socket. They will achieve
1995 * receive queue and will be purged by socket destructor.
1996 *
1997 * Also we still have packets pending on receive queue and probably,
1998 * our own packets waiting in device queues. sock_destroy will drain
1999 * receive queue, but transmitted packets will delay socket destruction
2000 * until the last reference will be released.
2001 */
2002
2003 sock_orphan(sk);
2004
2005 xfrm_sk_free_policy(sk);
2006
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07002007 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 sock_put(sk);
2009}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010EXPORT_SYMBOL(sk_common_release);
2011
2012static DEFINE_RWLOCK(proto_list_lock);
2013static LIST_HEAD(proto_list);
2014
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002015#ifdef CONFIG_PROC_FS
2016#define PROTO_INUSE_NR 64 /* should be enough for the first time */
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002017struct prot_inuse {
2018 int val[PROTO_INUSE_NR];
2019};
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002020
2021static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002022
2023#ifdef CONFIG_NET_NS
2024void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2025{
2026 int cpu = smp_processor_id();
2027 per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
2028}
2029EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2030
2031int sock_prot_inuse_get(struct net *net, struct proto *prot)
2032{
2033 int cpu, idx = prot->inuse_idx;
2034 int res = 0;
2035
2036 for_each_possible_cpu(cpu)
2037 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2038
2039 return res >= 0 ? res : 0;
2040}
2041EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2042
2043static int sock_inuse_init_net(struct net *net)
2044{
2045 net->core.inuse = alloc_percpu(struct prot_inuse);
2046 return net->core.inuse ? 0 : -ENOMEM;
2047}
2048
2049static void sock_inuse_exit_net(struct net *net)
2050{
2051 free_percpu(net->core.inuse);
2052}
2053
2054static struct pernet_operations net_inuse_ops = {
2055 .init = sock_inuse_init_net,
2056 .exit = sock_inuse_exit_net,
2057};
2058
2059static __init int net_inuse_init(void)
2060{
2061 if (register_pernet_subsys(&net_inuse_ops))
2062 panic("Cannot initialize net inuse counters");
2063
2064 return 0;
2065}
2066
2067core_initcall(net_inuse_init);
2068#else
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002069static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2070
Pavel Emelyanovc29a0bc2008-03-31 19:41:46 -07002071void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002072{
2073 __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
2074}
2075EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2076
Pavel Emelyanovc29a0bc2008-03-31 19:41:46 -07002077int sock_prot_inuse_get(struct net *net, struct proto *prot)
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002078{
2079 int cpu, idx = prot->inuse_idx;
2080 int res = 0;
2081
2082 for_each_possible_cpu(cpu)
2083 res += per_cpu(prot_inuse, cpu).val[idx];
2084
2085 return res >= 0 ? res : 0;
2086}
2087EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002088#endif
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002089
2090static void assign_proto_idx(struct proto *prot)
2091{
2092 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2093
2094 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2095 printk(KERN_ERR "PROTO_INUSE_NR exhausted\n");
2096 return;
2097 }
2098
2099 set_bit(prot->inuse_idx, proto_inuse_idx);
2100}
2101
2102static void release_proto_idx(struct proto *prot)
2103{
2104 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2105 clear_bit(prot->inuse_idx, proto_inuse_idx);
2106}
2107#else
2108static inline void assign_proto_idx(struct proto *prot)
2109{
2110}
2111
2112static inline void release_proto_idx(struct proto *prot)
2113{
2114}
2115#endif
2116
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117int proto_register(struct proto *prot, int alloc_slab)
2118{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 if (alloc_slab) {
2120 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
Eric Dumazet271b72c2008-10-29 02:11:14 -07002121 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2122 NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123
2124 if (prot->slab == NULL) {
2125 printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n",
2126 prot->name);
Pavel Emelyanov60e76632008-03-28 16:39:10 -07002127 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002129
2130 if (prot->rsk_prot != NULL) {
2131 static const char mask[] = "request_sock_%s";
2132
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002133 prot->rsk_prot->slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
2134 if (prot->rsk_prot->slab_name == NULL)
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002135 goto out_free_sock_slab;
2136
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002137 sprintf(prot->rsk_prot->slab_name, mask, prot->name);
2138 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002139 prot->rsk_prot->obj_size, 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002140 SLAB_HWCACHE_ALIGN, NULL);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002141
2142 if (prot->rsk_prot->slab == NULL) {
2143 printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n",
2144 prot->name);
2145 goto out_free_request_sock_slab_name;
2146 }
2147 }
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002148
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002149 if (prot->twsk_prot != NULL) {
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002150 static const char mask[] = "tw_sock_%s";
2151
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002152 prot->twsk_prot->twsk_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002153
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002154 if (prot->twsk_prot->twsk_slab_name == NULL)
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002155 goto out_free_request_sock_slab;
2156
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002157 sprintf(prot->twsk_prot->twsk_slab_name, mask, prot->name);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002158 prot->twsk_prot->twsk_slab =
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002159 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002160 prot->twsk_prot->twsk_obj_size,
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002161 0,
2162 SLAB_HWCACHE_ALIGN |
2163 prot->slab_flags,
Paul Mundt20c2df82007-07-20 10:11:58 +09002164 NULL);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002165 if (prot->twsk_prot->twsk_slab == NULL)
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002166 goto out_free_timewait_sock_slab_name;
2167 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168 }
2169
Arnaldo Carvalho de Melo2a278052005-04-16 15:24:09 -07002170 write_lock(&proto_list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 list_add(&prot->node, &proto_list);
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002172 assign_proto_idx(prot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 write_unlock(&proto_list_lock);
Pavel Emelyanovb733c002007-11-07 02:23:38 -08002174 return 0;
2175
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002176out_free_timewait_sock_slab_name:
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002177 kfree(prot->twsk_prot->twsk_slab_name);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002178out_free_request_sock_slab:
2179 if (prot->rsk_prot && prot->rsk_prot->slab) {
2180 kmem_cache_destroy(prot->rsk_prot->slab);
2181 prot->rsk_prot->slab = NULL;
2182 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002183out_free_request_sock_slab_name:
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002184 kfree(prot->rsk_prot->slab_name);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002185out_free_sock_slab:
2186 kmem_cache_destroy(prot->slab);
2187 prot->slab = NULL;
Pavel Emelyanovb733c002007-11-07 02:23:38 -08002188out:
2189 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191EXPORT_SYMBOL(proto_register);
2192
2193void proto_unregister(struct proto *prot)
2194{
2195 write_lock(&proto_list_lock);
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002196 release_proto_idx(prot);
Patrick McHardy0a3f4352005-09-06 19:47:50 -07002197 list_del(&prot->node);
2198 write_unlock(&proto_list_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199
2200 if (prot->slab != NULL) {
2201 kmem_cache_destroy(prot->slab);
2202 prot->slab = NULL;
2203 }
2204
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002205 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002206 kmem_cache_destroy(prot->rsk_prot->slab);
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002207 kfree(prot->rsk_prot->slab_name);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002208 prot->rsk_prot->slab = NULL;
2209 }
2210
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002211 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002212 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002213 kfree(prot->twsk_prot->twsk_slab_name);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002214 prot->twsk_prot->twsk_slab = NULL;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002215 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217EXPORT_SYMBOL(proto_unregister);
2218
2219#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002221 __acquires(proto_list_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222{
2223 read_lock(&proto_list_lock);
Pavel Emelianov60f04382007-07-09 13:15:14 -07002224 return seq_list_start_head(&proto_list, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225}
2226
2227static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2228{
Pavel Emelianov60f04382007-07-09 13:15:14 -07002229 return seq_list_next(v, &proto_list, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230}
2231
2232static void proto_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002233 __releases(proto_list_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234{
2235 read_unlock(&proto_list_lock);
2236}
2237
2238static char proto_method_implemented(const void *method)
2239{
2240 return method == NULL ? 'n' : 'y';
2241}
2242
2243static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2244{
2245 seq_printf(seq, "%-9s %4u %6d %6d %-3s %6u %-3s %-10s "
2246 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2247 proto->name,
2248 proto->obj_size,
Eric Dumazet14e943d2008-11-19 15:14:01 -08002249 sock_prot_inuse_get(seq_file_net(seq), proto),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002250 proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
2251 proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
2252 proto->max_header,
2253 proto->slab == NULL ? "no" : "yes",
2254 module_name(proto->owner),
2255 proto_method_implemented(proto->close),
2256 proto_method_implemented(proto->connect),
2257 proto_method_implemented(proto->disconnect),
2258 proto_method_implemented(proto->accept),
2259 proto_method_implemented(proto->ioctl),
2260 proto_method_implemented(proto->init),
2261 proto_method_implemented(proto->destroy),
2262 proto_method_implemented(proto->shutdown),
2263 proto_method_implemented(proto->setsockopt),
2264 proto_method_implemented(proto->getsockopt),
2265 proto_method_implemented(proto->sendmsg),
2266 proto_method_implemented(proto->recvmsg),
2267 proto_method_implemented(proto->sendpage),
2268 proto_method_implemented(proto->bind),
2269 proto_method_implemented(proto->backlog_rcv),
2270 proto_method_implemented(proto->hash),
2271 proto_method_implemented(proto->unhash),
2272 proto_method_implemented(proto->get_port),
2273 proto_method_implemented(proto->enter_memory_pressure));
2274}
2275
2276static int proto_seq_show(struct seq_file *seq, void *v)
2277{
Pavel Emelianov60f04382007-07-09 13:15:14 -07002278 if (v == &proto_list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2280 "protocol",
2281 "size",
2282 "sockets",
2283 "memory",
2284 "press",
2285 "maxhdr",
2286 "slab",
2287 "module",
2288 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2289 else
Pavel Emelianov60f04382007-07-09 13:15:14 -07002290 proto_seq_printf(seq, list_entry(v, struct proto, node));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 return 0;
2292}
2293
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002294static const struct seq_operations proto_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 .start = proto_seq_start,
2296 .next = proto_seq_next,
2297 .stop = proto_seq_stop,
2298 .show = proto_seq_show,
2299};
2300
2301static int proto_seq_open(struct inode *inode, struct file *file)
2302{
Eric Dumazet14e943d2008-11-19 15:14:01 -08002303 return seq_open_net(inode, file, &proto_seq_ops,
2304 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305}
2306
Arjan van de Ven9a321442007-02-12 00:55:35 -08002307static const struct file_operations proto_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308 .owner = THIS_MODULE,
2309 .open = proto_seq_open,
2310 .read = seq_read,
2311 .llseek = seq_lseek,
Eric Dumazet14e943d2008-11-19 15:14:01 -08002312 .release = seq_release_net,
2313};
2314
2315static __net_init int proto_init_net(struct net *net)
2316{
2317 if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops))
2318 return -ENOMEM;
2319
2320 return 0;
2321}
2322
2323static __net_exit void proto_exit_net(struct net *net)
2324{
2325 proc_net_remove(net, "protocols");
2326}
2327
2328
2329static __net_initdata struct pernet_operations proto_net_ops = {
2330 .init = proto_init_net,
2331 .exit = proto_exit_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332};
2333
2334static int __init proto_init(void)
2335{
Eric Dumazet14e943d2008-11-19 15:14:01 -08002336 return register_pernet_subsys(&proto_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337}
2338
2339subsys_initcall(proto_init);
2340
2341#endif /* PROC_FS */