blob: d6d024cfaaafd0575723d41004dd8d18816b55b0 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +090035 * code. The ACK stuff can wait and needs major
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
Joe Perchese005d192012-05-16 19:58:40 +000092#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
Randy Dunlap4fc268d2006-01-11 12:17:47 -080094#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070095#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -0400114#include <linux/highmem.h>
Eric W. Biederman3f551f92010-06-13 03:28:59 +0000115#include <linux/user_namespace.h>
Ingo Molnarc5905af2012-02-24 08:31:31 +0100116#include <linux/static_key.h>
David S. Miller3969eb32012-01-09 13:44:23 -0800117#include <linux/memcontrol.h>
David S. Miller8c1ae102012-05-03 02:25:55 -0400118#include <linux/prefetch.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
120#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121
122#include <linux/netdevice.h>
123#include <net/protocol.h>
124#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200125#include <net/net_namespace.h>
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700126#include <net/request_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127#include <net/sock.h>
Patrick Ohly20d49472009-02-12 05:03:38 +0000128#include <linux/net_tstamp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129#include <net/xfrm.h>
130#include <linux/ipsec.h>
Herbert Xuf8451722010-05-24 00:12:34 -0700131#include <net/cls_cgroup.h>
Neil Horman5bc14212011-11-22 05:10:51 +0000132#include <net/netprio_cgroup.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133
134#include <linux/filter.h>
135
Satoru Moriya3847ce32011-06-17 12:00:03 +0000136#include <trace/events/sock.h>
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138#ifdef CONFIG_INET
139#include <net/tcp.h>
140#endif
141
Glauber Costa36b77a52011-12-16 00:51:59 +0000142static DEFINE_MUTEX(proto_list_mutex);
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000143static LIST_HEAD(proto_list);
144
Andrew Mortonc255a452012-07-31 16:43:02 -0700145#ifdef CONFIG_MEMCG_KMEM
Glauber Costa1d62e432012-04-09 19:36:33 -0300146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000147{
148 struct proto *proto;
149 int ret = 0;
150
Glauber Costa36b77a52011-12-16 00:51:59 +0000151 mutex_lock(&proto_list_mutex);
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000152 list_for_each_entry(proto, &proto_list, node) {
153 if (proto->init_cgroup) {
Glauber Costa1d62e432012-04-09 19:36:33 -0300154 ret = proto->init_cgroup(memcg, ss);
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000155 if (ret)
156 goto out;
157 }
158 }
159
Glauber Costa36b77a52011-12-16 00:51:59 +0000160 mutex_unlock(&proto_list_mutex);
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000161 return ret;
162out:
163 list_for_each_entry_continue_reverse(proto, &proto_list, node)
164 if (proto->destroy_cgroup)
Glauber Costa1d62e432012-04-09 19:36:33 -0300165 proto->destroy_cgroup(memcg);
Glauber Costa36b77a52011-12-16 00:51:59 +0000166 mutex_unlock(&proto_list_mutex);
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000167 return ret;
168}
169
Glauber Costa1d62e432012-04-09 19:36:33 -0300170void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000171{
172 struct proto *proto;
173
Glauber Costa36b77a52011-12-16 00:51:59 +0000174 mutex_lock(&proto_list_mutex);
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000175 list_for_each_entry_reverse(proto, &proto_list, node)
176 if (proto->destroy_cgroup)
Glauber Costa1d62e432012-04-09 19:36:33 -0300177 proto->destroy_cgroup(memcg);
Glauber Costa36b77a52011-12-16 00:51:59 +0000178 mutex_unlock(&proto_list_mutex);
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000179}
180#endif
181
Ingo Molnarda21f242006-07-03 00:25:12 -0700182/*
183 * Each address family might have different locking rules, so we have
184 * one slock key per address family:
185 */
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700186static struct lock_class_key af_family_keys[AF_MAX];
187static struct lock_class_key af_family_slock_keys[AF_MAX];
188
stephen hemmingercbda4ea2013-02-22 07:59:10 +0000189#if defined(CONFIG_MEMCG_KMEM)
Ingo Molnarc5905af2012-02-24 08:31:31 +0100190struct static_key memcg_socket_limit_enabled;
Glauber Costae1aab162011-12-11 21:47:03 +0000191EXPORT_SYMBOL(memcg_socket_limit_enabled);
stephen hemmingercbda4ea2013-02-22 07:59:10 +0000192#endif
Glauber Costae1aab162011-12-11 21:47:03 +0000193
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700194/*
195 * Make lock validator output more readable. (we pre-construct these
196 * strings build-time, so that runtime initialization of socket
197 * locks is fast):
198 */
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700199static const char *const af_family_key_strings[AF_MAX+1] = {
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700200 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
201 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
202 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
203 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
204 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
205 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
206 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800207 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700208 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
Oliver Hartkoppcd05acf2007-12-16 15:59:24 -0800209 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
David Howells17926a72007-04-26 15:48:28 -0700210 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700211 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
Miloslav Trmač6f107b52010-12-08 14:35:34 +0800212 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
Federico Vaga456db6a2013-05-28 05:02:44 +0000213 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700214};
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700215static const char *const af_family_slock_key_strings[AF_MAX+1] = {
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700216 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
217 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
218 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
219 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
220 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
221 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
222 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800223 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700224 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
Oliver Hartkoppcd05acf2007-12-16 15:59:24 -0800225 "slock-27" , "slock-28" , "slock-AF_CAN" ,
David Howells17926a72007-04-26 15:48:28 -0700226 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700227 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
Miloslav Trmač6f107b52010-12-08 14:35:34 +0800228 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
Federico Vaga456db6a2013-05-28 05:02:44 +0000229 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700230};
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700231static const char *const af_family_clock_key_strings[AF_MAX+1] = {
Peter Zijlstra443aef02007-07-19 01:49:00 -0700232 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
233 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
234 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
235 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
236 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
237 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
238 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800239 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
Peter Zijlstra443aef02007-07-19 01:49:00 -0700240 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
Oliver Hartkoppb4942af2008-07-23 14:06:04 -0700241 "clock-27" , "clock-28" , "clock-AF_CAN" ,
David Howellse51f8022007-07-21 19:30:16 -0700242 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700243 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
Miloslav Trmač6f107b52010-12-08 14:35:34 +0800244 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
Federico Vaga456db6a2013-05-28 05:02:44 +0000245 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
Peter Zijlstra443aef02007-07-19 01:49:00 -0700246};
Ingo Molnarda21f242006-07-03 00:25:12 -0700247
248/*
249 * sk_callback_lock locking rules are per-address-family,
250 * so split the lock classes by using a per-AF key:
251 */
252static struct lock_class_key af_callback_keys[AF_MAX];
253
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254/* Take into consideration the size of the struct sk_buff overhead in the
255 * determination of these values, since that is non-constant across
256 * platforms. This makes socket queueing behavior and performance
257 * not depend upon such differences.
258 */
259#define _SK_MEM_PACKETS 256
Eric Dumazet87fb4b72011-10-13 07:28:54 +0000260#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
262#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
263
264/* Run time adjustable parameters. */
Brian Haleyab32ea52006-09-22 14:15:41 -0700265__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
Hans Schillstrom6d8ebc82012-04-30 08:13:50 +0200266EXPORT_SYMBOL(sysctl_wmem_max);
Brian Haleyab32ea52006-09-22 14:15:41 -0700267__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
Hans Schillstrom6d8ebc82012-04-30 08:13:50 +0200268EXPORT_SYMBOL(sysctl_rmem_max);
Brian Haleyab32ea52006-09-22 14:15:41 -0700269__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
270__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300272/* Maximal space eaten by iovec or ancillary data plus some space */
Brian Haleyab32ea52006-09-22 14:15:41 -0700273int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
Eric Dumazet2a915252009-05-27 11:30:05 +0000274EXPORT_SYMBOL(sysctl_optmem_max);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275
Mel Gormanc93bdd02012-07-31 16:44:19 -0700276struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
277EXPORT_SYMBOL_GPL(memalloc_socks);
278
Mel Gorman7cb02402012-07-31 16:44:16 -0700279/**
280 * sk_set_memalloc - sets %SOCK_MEMALLOC
281 * @sk: socket to set it on
282 *
283 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
284 * It's the responsibility of the admin to adjust min_free_kbytes
285 * to meet the requirements
286 */
287void sk_set_memalloc(struct sock *sk)
288{
289 sock_set_flag(sk, SOCK_MEMALLOC);
290 sk->sk_allocation |= __GFP_MEMALLOC;
Mel Gormanc93bdd02012-07-31 16:44:19 -0700291 static_key_slow_inc(&memalloc_socks);
Mel Gorman7cb02402012-07-31 16:44:16 -0700292}
293EXPORT_SYMBOL_GPL(sk_set_memalloc);
294
295void sk_clear_memalloc(struct sock *sk)
296{
297 sock_reset_flag(sk, SOCK_MEMALLOC);
298 sk->sk_allocation &= ~__GFP_MEMALLOC;
Mel Gormanc93bdd02012-07-31 16:44:19 -0700299 static_key_slow_dec(&memalloc_socks);
Mel Gormanc76562b2012-07-31 16:44:41 -0700300
301 /*
302 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
303 * progress of swapping. However, if SOCK_MEMALLOC is cleared while
304 * it has rmem allocations there is a risk that the user of the
305 * socket cannot make forward progress due to exceeding the rmem
306 * limits. By rights, sk_clear_memalloc() should only be called
307 * on sockets being torn down but warn and reset the accounting if
308 * that assumption breaks.
309 */
310 if (WARN_ON(sk->sk_forward_alloc))
311 sk_mem_reclaim(sk);
Mel Gorman7cb02402012-07-31 16:44:16 -0700312}
313EXPORT_SYMBOL_GPL(sk_clear_memalloc);
314
Mel Gormanb4b9e352012-07-31 16:44:26 -0700315int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
316{
317 int ret;
318 unsigned long pflags = current->flags;
319
320 /* these should have been dropped before queueing */
321 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
322
323 current->flags |= PF_MEMALLOC;
324 ret = sk->sk_backlog_rcv(sk, skb);
325 tsk_restore_flags(current, pflags, PF_MEMALLOC);
326
327 return ret;
328}
329EXPORT_SYMBOL(__sk_backlog_rcv);
330
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
332{
333 struct timeval tv;
334
335 if (optlen < sizeof(tv))
336 return -EINVAL;
337 if (copy_from_user(&tv, optval, sizeof(tv)))
338 return -EFAULT;
Vasily Averinba780732007-05-24 16:58:54 -0700339 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
340 return -EDOM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341
Vasily Averinba780732007-05-24 16:58:54 -0700342 if (tv.tv_sec < 0) {
Andrew Morton6f11df82007-07-09 13:16:00 -0700343 static int warned __read_mostly;
344
Vasily Averinba780732007-05-24 16:58:54 -0700345 *timeo_p = 0;
Ilpo Järvinen50aab542008-05-02 16:20:10 -0700346 if (warned < 10 && net_ratelimit()) {
Vasily Averinba780732007-05-24 16:58:54 -0700347 warned++;
Joe Perchese005d192012-05-16 19:58:40 +0000348 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
349 __func__, current->comm, task_pid_nr(current));
Ilpo Järvinen50aab542008-05-02 16:20:10 -0700350 }
Vasily Averinba780732007-05-24 16:58:54 -0700351 return 0;
352 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 *timeo_p = MAX_SCHEDULE_TIMEOUT;
354 if (tv.tv_sec == 0 && tv.tv_usec == 0)
355 return 0;
356 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
357 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
358 return 0;
359}
360
361static void sock_warn_obsolete_bsdism(const char *name)
362{
363 static int warned;
364 static char warncomm[TASK_COMM_LEN];
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900365 if (strcmp(warncomm, current->comm) && warned < 5) {
366 strcpy(warncomm, current->comm);
Joe Perchese005d192012-05-16 19:58:40 +0000367 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
368 warncomm, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 warned++;
370 }
371}
372
Eric Dumazet08e29af2011-11-28 12:04:18 +0000373#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
374
375static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900376{
Eric Dumazet08e29af2011-11-28 12:04:18 +0000377 if (sk->sk_flags & flags) {
378 sk->sk_flags &= ~flags;
379 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +0000380 net_disable_timestamp();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 }
382}
383
384
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800385int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
386{
Eric Dumazet766e90372009-10-14 20:40:11 -0700387 int err;
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800388 int skb_len;
Neil Horman3b885782009-10-12 13:26:31 -0700389 unsigned long flags;
390 struct sk_buff_head *list = &sk->sk_receive_queue;
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800391
Eric Dumazet0fd7bac2011-12-21 07:11:44 +0000392 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
Eric Dumazet766e90372009-10-14 20:40:11 -0700393 atomic_inc(&sk->sk_drops);
Satoru Moriya3847ce32011-06-17 12:00:03 +0000394 trace_sock_rcvqueue_full(sk, skb);
Eric Dumazet766e90372009-10-14 20:40:11 -0700395 return -ENOMEM;
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800396 }
397
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700398 err = sk_filter(sk, skb);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800399 if (err)
Eric Dumazet766e90372009-10-14 20:40:11 -0700400 return err;
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800401
Mel Gormanc76562b2012-07-31 16:44:41 -0700402 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
Eric Dumazet766e90372009-10-14 20:40:11 -0700403 atomic_inc(&sk->sk_drops);
404 return -ENOBUFS;
Hideo Aoki3ab224b2007-12-31 00:11:19 -0800405 }
406
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800407 skb->dev = NULL;
408 skb_set_owner_r(skb, sk);
David S. Miller49ad9592008-12-17 22:11:38 -0800409
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800410 /* Cache the SKB length before we tack it onto the receive
411 * queue. Once it is added it no longer belongs to us and
412 * may be freed by other threads of control pulling packets
413 * from the queue.
414 */
415 skb_len = skb->len;
416
Eric Dumazet7fee2262010-05-11 23:19:48 +0000417 /* we escape from rcu protected region, make sure we dont leak
418 * a norefcounted dst
419 */
420 skb_dst_force(skb);
421
Neil Horman3b885782009-10-12 13:26:31 -0700422 spin_lock_irqsave(&list->lock, flags);
423 skb->dropcount = atomic_read(&sk->sk_drops);
424 __skb_queue_tail(list, skb);
425 spin_unlock_irqrestore(&list->lock, flags);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800426
427 if (!sock_flag(sk, SOCK_DEAD))
428 sk->sk_data_ready(sk, skb_len);
Eric Dumazet766e90372009-10-14 20:40:11 -0700429 return 0;
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800430}
431EXPORT_SYMBOL(sock_queue_rcv_skb);
432
Arnaldo Carvalho de Melo58a5a7b2006-11-16 14:06:06 -0200433int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800434{
435 int rc = NET_RX_SUCCESS;
436
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700437 if (sk_filter(sk, skb))
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800438 goto discard_and_relse;
439
440 skb->dev = NULL;
441
Eric Dumazetf545a382012-04-22 23:34:26 +0000442 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
Eric Dumazetc3774112010-04-27 15:13:20 -0700443 atomic_inc(&sk->sk_drops);
444 goto discard_and_relse;
445 }
Arnaldo Carvalho de Melo58a5a7b2006-11-16 14:06:06 -0200446 if (nested)
447 bh_lock_sock_nested(sk);
448 else
449 bh_lock_sock(sk);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700450 if (!sock_owned_by_user(sk)) {
451 /*
452 * trylock + unlock semantics:
453 */
454 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
455
Peter Zijlstrac57943a2008-10-07 14:18:42 -0700456 rc = sk_backlog_rcv(sk, skb);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700457
458 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
Eric Dumazetf545a382012-04-22 23:34:26 +0000459 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
Zhu Yi8eae9392010-03-04 18:01:40 +0000460 bh_unlock_sock(sk);
461 atomic_inc(&sk->sk_drops);
462 goto discard_and_relse;
463 }
464
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800465 bh_unlock_sock(sk);
466out:
467 sock_put(sk);
468 return rc;
469discard_and_relse:
470 kfree_skb(skb);
471 goto out;
472}
473EXPORT_SYMBOL(sk_receive_skb);
474
Krishna Kumarea94ff32009-10-19 23:46:45 +0000475void sk_reset_txq(struct sock *sk)
476{
477 sk_tx_queue_clear(sk);
478}
479EXPORT_SYMBOL(sk_reset_txq);
480
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800481struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
482{
Eric Dumazetb6c67122010-04-08 23:03:29 +0000483 struct dst_entry *dst = __sk_dst_get(sk);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800484
485 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
Krishna Kumare022f0b2009-10-19 23:46:20 +0000486 sk_tx_queue_clear(sk);
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +0000487 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800488 dst_release(dst);
489 return NULL;
490 }
491
492 return dst;
493}
494EXPORT_SYMBOL(__sk_dst_check);
495
496struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
497{
498 struct dst_entry *dst = sk_dst_get(sk);
499
500 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
501 sk_dst_reset(sk);
502 dst_release(dst);
503 return NULL;
504 }
505
506 return dst;
507}
508EXPORT_SYMBOL(sk_dst_check);
509
Brian Haleyc91f6df2012-11-26 05:21:08 +0000510static int sock_setbindtodevice(struct sock *sk, char __user *optval,
511 int optlen)
David S. Miller48788092007-09-14 16:41:03 -0700512{
513 int ret = -ENOPROTOOPT;
514#ifdef CONFIG_NETDEVICES
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900515 struct net *net = sock_net(sk);
David S. Miller48788092007-09-14 16:41:03 -0700516 char devname[IFNAMSIZ];
517 int index;
518
519 /* Sorry... */
520 ret = -EPERM;
Eric W. Biederman5e1fccc2012-11-16 03:03:04 +0000521 if (!ns_capable(net->user_ns, CAP_NET_RAW))
David S. Miller48788092007-09-14 16:41:03 -0700522 goto out;
523
524 ret = -EINVAL;
525 if (optlen < 0)
526 goto out;
527
528 /* Bind this socket to a particular device like "eth0",
529 * as specified in the passed interface name. If the
530 * name is "" or the option length is zero the socket
531 * is not bound.
532 */
533 if (optlen > IFNAMSIZ - 1)
534 optlen = IFNAMSIZ - 1;
535 memset(devname, 0, sizeof(devname));
536
537 ret = -EFAULT;
538 if (copy_from_user(devname, optval, optlen))
539 goto out;
540
David S. Miller000ba2e2009-11-05 22:37:11 -0800541 index = 0;
542 if (devname[0] != '\0') {
Eric Dumazetbf8e56b2009-11-05 21:03:39 -0800543 struct net_device *dev;
David S. Miller48788092007-09-14 16:41:03 -0700544
Eric Dumazetbf8e56b2009-11-05 21:03:39 -0800545 rcu_read_lock();
546 dev = dev_get_by_name_rcu(net, devname);
547 if (dev)
548 index = dev->ifindex;
549 rcu_read_unlock();
David S. Miller48788092007-09-14 16:41:03 -0700550 ret = -ENODEV;
551 if (!dev)
552 goto out;
David S. Miller48788092007-09-14 16:41:03 -0700553 }
554
555 lock_sock(sk);
556 sk->sk_bound_dev_if = index;
557 sk_dst_reset(sk);
558 release_sock(sk);
559
560 ret = 0;
561
562out:
563#endif
564
565 return ret;
566}
567
Brian Haleyc91f6df2012-11-26 05:21:08 +0000568static int sock_getbindtodevice(struct sock *sk, char __user *optval,
569 int __user *optlen, int len)
570{
571 int ret = -ENOPROTOOPT;
572#ifdef CONFIG_NETDEVICES
573 struct net *net = sock_net(sk);
Brian Haleyc91f6df2012-11-26 05:21:08 +0000574 char devname[IFNAMSIZ];
Brian Haleyc91f6df2012-11-26 05:21:08 +0000575
576 if (sk->sk_bound_dev_if == 0) {
577 len = 0;
578 goto zero;
579 }
580
581 ret = -EINVAL;
582 if (len < IFNAMSIZ)
583 goto out;
584
Nicolas Schichan5dbe7c12013-06-26 17:23:42 +0200585 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
586 if (ret)
Brian Haleyc91f6df2012-11-26 05:21:08 +0000587 goto out;
Brian Haleyc91f6df2012-11-26 05:21:08 +0000588
589 len = strlen(devname) + 1;
590
591 ret = -EFAULT;
592 if (copy_to_user(optval, devname, len))
593 goto out;
594
595zero:
596 ret = -EFAULT;
597 if (put_user(len, optlen))
598 goto out;
599
600 ret = 0;
601
602out:
603#endif
604
605 return ret;
606}
607
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800608static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
609{
610 if (valbool)
611 sock_set_flag(sk, bit);
612 else
613 sock_reset_flag(sk, bit);
614}
615
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616/*
617 * This is meant for all protocols to use and covers goings on
618 * at the socket level. Everything here is generic.
619 */
620
621int sock_setsockopt(struct socket *sock, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -0700622 char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623{
Eric Dumazet2a915252009-05-27 11:30:05 +0000624 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 int val;
626 int valbool;
627 struct linger ling;
628 int ret = 0;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900629
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 /*
631 * Options without arguments
632 */
633
David S. Miller48788092007-09-14 16:41:03 -0700634 if (optname == SO_BINDTODEVICE)
Brian Haleyc91f6df2012-11-26 05:21:08 +0000635 return sock_setbindtodevice(sk, optval, optlen);
David S. Miller48788092007-09-14 16:41:03 -0700636
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700637 if (optlen < sizeof(int))
638 return -EINVAL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900639
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 if (get_user(val, (int __user *)optval))
641 return -EFAULT;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900642
Eric Dumazet2a915252009-05-27 11:30:05 +0000643 valbool = val ? 1 : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
645 lock_sock(sk);
646
Eric Dumazet2a915252009-05-27 11:30:05 +0000647 switch (optname) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700648 case SO_DEBUG:
Eric Dumazet2a915252009-05-27 11:30:05 +0000649 if (val && !capable(CAP_NET_ADMIN))
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700650 ret = -EACCES;
Eric Dumazet2a915252009-05-27 11:30:05 +0000651 else
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800652 sock_valbool_flag(sk, SOCK_DBG, valbool);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700653 break;
654 case SO_REUSEADDR:
Pavel Emelyanov4a17fd52012-04-19 03:39:36 +0000655 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700656 break;
Tom Herbert055dc212013-01-22 09:49:50 +0000657 case SO_REUSEPORT:
658 sk->sk_reuseport = valbool;
659 break;
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700660 case SO_TYPE:
Jan Engelhardt49c794e2009-08-04 07:28:28 +0000661 case SO_PROTOCOL:
Jan Engelhardt0d6038e2009-08-04 07:28:29 +0000662 case SO_DOMAIN:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700663 case SO_ERROR:
664 ret = -ENOPROTOOPT;
665 break;
666 case SO_DONTROUTE:
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800667 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700668 break;
669 case SO_BROADCAST:
670 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
671 break;
672 case SO_SNDBUF:
673 /* Don't error on this BSD doesn't and if you think
Eric Dumazet82981932012-04-26 20:07:59 +0000674 * about it this is right. Otherwise apps have to
675 * play 'guess the biggest size' games. RCVBUF/SNDBUF
676 * are treated in BSD as hints
677 */
678 val = min_t(u32, val, sysctl_wmem_max);
Patrick McHardyb0573de2005-08-09 19:30:51 -0700679set_sndbuf:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700680 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
Eric Dumazet82981932012-04-26 20:07:59 +0000681 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
682 /* Wake up sending tasks if we upped the value. */
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700683 sk->sk_write_space(sk);
684 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700686 case SO_SNDBUFFORCE:
687 if (!capable(CAP_NET_ADMIN)) {
688 ret = -EPERM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 break;
690 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700691 goto set_sndbuf;
692
693 case SO_RCVBUF:
694 /* Don't error on this BSD doesn't and if you think
Eric Dumazet82981932012-04-26 20:07:59 +0000695 * about it this is right. Otherwise apps have to
696 * play 'guess the biggest size' games. RCVBUF/SNDBUF
697 * are treated in BSD as hints
698 */
699 val = min_t(u32, val, sysctl_rmem_max);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700700set_rcvbuf:
701 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
702 /*
703 * We double it on the way in to account for
704 * "struct sk_buff" etc. overhead. Applications
705 * assume that the SO_RCVBUF setting they make will
706 * allow that much actual data to be received on that
707 * socket.
708 *
709 * Applications are unaware that "struct sk_buff" and
710 * other overheads allocate from the receive buffer
711 * during socket buffer allocation.
712 *
713 * And after considering the possible alternatives,
714 * returning the value we actually used in getsockopt
715 * is the most desirable behavior.
716 */
Eric Dumazet82981932012-04-26 20:07:59 +0000717 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700718 break;
719
720 case SO_RCVBUFFORCE:
721 if (!capable(CAP_NET_ADMIN)) {
722 ret = -EPERM;
723 break;
724 }
725 goto set_rcvbuf;
726
727 case SO_KEEPALIVE:
728#ifdef CONFIG_INET
Eric Dumazet3e109862012-09-24 07:00:11 +0000729 if (sk->sk_protocol == IPPROTO_TCP &&
730 sk->sk_type == SOCK_STREAM)
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700731 tcp_set_keepalive(sk, valbool);
732#endif
733 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
734 break;
735
736 case SO_OOBINLINE:
737 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
738 break;
739
740 case SO_NO_CHECK:
741 sk->sk_no_check = valbool;
742 break;
743
744 case SO_PRIORITY:
Eric W. Biederman5e1fccc2012-11-16 03:03:04 +0000745 if ((val >= 0 && val <= 6) ||
746 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700747 sk->sk_priority = val;
748 else
749 ret = -EPERM;
750 break;
751
752 case SO_LINGER:
753 if (optlen < sizeof(ling)) {
754 ret = -EINVAL; /* 1003.1g */
755 break;
756 }
Eric Dumazet2a915252009-05-27 11:30:05 +0000757 if (copy_from_user(&ling, optval, sizeof(ling))) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700758 ret = -EFAULT;
759 break;
760 }
761 if (!ling.l_onoff)
762 sock_reset_flag(sk, SOCK_LINGER);
763 else {
764#if (BITS_PER_LONG == 32)
765 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
766 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
767 else
768#endif
769 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
770 sock_set_flag(sk, SOCK_LINGER);
771 }
772 break;
773
774 case SO_BSDCOMPAT:
775 sock_warn_obsolete_bsdism("setsockopt");
776 break;
777
778 case SO_PASSCRED:
779 if (valbool)
780 set_bit(SOCK_PASSCRED, &sock->flags);
781 else
782 clear_bit(SOCK_PASSCRED, &sock->flags);
783 break;
784
785 case SO_TIMESTAMP:
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700786 case SO_TIMESTAMPNS:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700787 if (valbool) {
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700788 if (optname == SO_TIMESTAMP)
789 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
790 else
791 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700792 sock_set_flag(sk, SOCK_RCVTSTAMP);
Patrick Ohly20d49472009-02-12 05:03:38 +0000793 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700794 } else {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700795 sock_reset_flag(sk, SOCK_RCVTSTAMP);
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700796 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
797 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700798 break;
799
Patrick Ohly20d49472009-02-12 05:03:38 +0000800 case SO_TIMESTAMPING:
801 if (val & ~SOF_TIMESTAMPING_MASK) {
Rémi Denis-Courmontf249fb72009-07-20 00:47:04 +0000802 ret = -EINVAL;
Patrick Ohly20d49472009-02-12 05:03:38 +0000803 break;
804 }
805 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
806 val & SOF_TIMESTAMPING_TX_HARDWARE);
807 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
808 val & SOF_TIMESTAMPING_TX_SOFTWARE);
809 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
810 val & SOF_TIMESTAMPING_RX_HARDWARE);
811 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
812 sock_enable_timestamp(sk,
813 SOCK_TIMESTAMPING_RX_SOFTWARE);
814 else
815 sock_disable_timestamp(sk,
Eric Dumazet08e29af2011-11-28 12:04:18 +0000816 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
Patrick Ohly20d49472009-02-12 05:03:38 +0000817 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
818 val & SOF_TIMESTAMPING_SOFTWARE);
819 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
820 val & SOF_TIMESTAMPING_SYS_HARDWARE);
821 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
822 val & SOF_TIMESTAMPING_RAW_HARDWARE);
823 break;
824
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700825 case SO_RCVLOWAT:
826 if (val < 0)
827 val = INT_MAX;
828 sk->sk_rcvlowat = val ? : 1;
829 break;
830
831 case SO_RCVTIMEO:
832 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
833 break;
834
835 case SO_SNDTIMEO:
836 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
837 break;
838
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700839 case SO_ATTACH_FILTER:
840 ret = -EINVAL;
841 if (optlen == sizeof(struct sock_fprog)) {
842 struct sock_fprog fprog;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700844 ret = -EFAULT;
845 if (copy_from_user(&fprog, optval, sizeof(fprog)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700848 ret = sk_attach_filter(&fprog, sk);
849 }
850 break;
851
852 case SO_DETACH_FILTER:
Pavel Emelyanov55b33322007-10-17 21:21:26 -0700853 ret = sk_detach_filter(sk);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700854 break;
855
Vincent Bernatd59577b2013-01-16 22:55:49 +0100856 case SO_LOCK_FILTER:
857 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
858 ret = -EPERM;
859 else
860 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
861 break;
862
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700863 case SO_PASSSEC:
864 if (valbool)
865 set_bit(SOCK_PASSSEC, &sock->flags);
866 else
867 clear_bit(SOCK_PASSSEC, &sock->flags);
868 break;
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -0800869 case SO_MARK:
Eric W. Biederman5e1fccc2012-11-16 03:03:04 +0000870 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -0800871 ret = -EPERM;
Eric Dumazet2a915252009-05-27 11:30:05 +0000872 else
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -0800873 sk->sk_mark = val;
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -0800874 break;
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700875
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 /* We implement the SO_SNDLOWAT etc to
877 not be settable (1003.1g 5.3) */
Neil Horman3b885782009-10-12 13:26:31 -0700878 case SO_RXQ_OVFL:
Johannes Berg8083f0f2011-10-07 03:30:20 +0000879 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
Neil Horman3b885782009-10-12 13:26:31 -0700880 break;
Johannes Berg6e3e9392011-11-09 10:15:42 +0100881
882 case SO_WIFI_STATUS:
883 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
884 break;
885
Pavel Emelyanovef64a542012-02-21 07:31:34 +0000886 case SO_PEEK_OFF:
887 if (sock->ops->set_peek_off)
888 sock->ops->set_peek_off(sk, val);
889 else
890 ret = -EOPNOTSUPP;
891 break;
Ben Greear3bdc0eb2012-02-11 15:39:30 +0000892
893 case SO_NOFCS:
894 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
895 break;
896
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +0000897 case SO_SELECT_ERR_QUEUE:
898 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
899 break;
900
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700901 default:
902 ret = -ENOPROTOOPT;
903 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900904 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 release_sock(sk);
906 return ret;
907}
Eric Dumazet2a915252009-05-27 11:30:05 +0000908EXPORT_SYMBOL(sock_setsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909
910
Eric W. Biederman3f551f92010-06-13 03:28:59 +0000911void cred_to_ucred(struct pid *pid, const struct cred *cred,
912 struct ucred *ucred)
913{
914 ucred->pid = pid_vnr(pid);
915 ucred->uid = ucred->gid = -1;
916 if (cred) {
917 struct user_namespace *current_ns = current_user_ns();
918
Eric W. Biedermanb2e4f542012-05-23 16:39:45 -0600919 ucred->uid = from_kuid_munged(current_ns, cred->euid);
920 ucred->gid = from_kgid_munged(current_ns, cred->egid);
Eric W. Biederman3f551f92010-06-13 03:28:59 +0000921 }
922}
David S. Miller39247732010-06-16 16:18:25 -0700923EXPORT_SYMBOL_GPL(cred_to_ucred);
Eric W. Biederman3f551f92010-06-13 03:28:59 +0000924
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925int sock_getsockopt(struct socket *sock, int level, int optname,
926 char __user *optval, int __user *optlen)
927{
928 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900929
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700930 union {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900931 int val;
932 struct linger ling;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 struct timeval tm;
934 } v;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900935
H Hartley Sweeten4d0392b2010-01-15 01:08:58 -0800936 int lv = sizeof(int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 int len;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900938
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700939 if (get_user(len, optlen))
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900940 return -EFAULT;
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700941 if (len < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 return -EINVAL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900943
Eugene Teo50fee1d2009-02-23 15:38:41 -0800944 memset(&v, 0, sizeof(v));
Clément Lecignedf0bca02009-02-12 16:59:09 -0800945
Eric Dumazet2a915252009-05-27 11:30:05 +0000946 switch (optname) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700947 case SO_DEBUG:
948 v.val = sock_flag(sk, SOCK_DBG);
949 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900950
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700951 case SO_DONTROUTE:
952 v.val = sock_flag(sk, SOCK_LOCALROUTE);
953 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900954
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700955 case SO_BROADCAST:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +0000956 v.val = sock_flag(sk, SOCK_BROADCAST);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700957 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700959 case SO_SNDBUF:
960 v.val = sk->sk_sndbuf;
961 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900962
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700963 case SO_RCVBUF:
964 v.val = sk->sk_rcvbuf;
965 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700967 case SO_REUSEADDR:
968 v.val = sk->sk_reuse;
969 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970
Tom Herbert055dc212013-01-22 09:49:50 +0000971 case SO_REUSEPORT:
972 v.val = sk->sk_reuseport;
973 break;
974
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700975 case SO_KEEPALIVE:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +0000976 v.val = sock_flag(sk, SOCK_KEEPOPEN);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700977 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700979 case SO_TYPE:
980 v.val = sk->sk_type;
981 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982
Jan Engelhardt49c794e2009-08-04 07:28:28 +0000983 case SO_PROTOCOL:
984 v.val = sk->sk_protocol;
985 break;
986
Jan Engelhardt0d6038e2009-08-04 07:28:29 +0000987 case SO_DOMAIN:
988 v.val = sk->sk_family;
989 break;
990
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700991 case SO_ERROR:
992 v.val = -sock_error(sk);
Eric Dumazet2a915252009-05-27 11:30:05 +0000993 if (v.val == 0)
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700994 v.val = xchg(&sk->sk_err_soft, 0);
995 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700997 case SO_OOBINLINE:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +0000998 v.val = sock_flag(sk, SOCK_URGINLINE);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700999 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001000
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001001 case SO_NO_CHECK:
1002 v.val = sk->sk_no_check;
1003 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001005 case SO_PRIORITY:
1006 v.val = sk->sk_priority;
1007 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001008
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001009 case SO_LINGER:
1010 lv = sizeof(v.ling);
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001011 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001012 v.ling.l_linger = sk->sk_lingertime / HZ;
1013 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001014
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001015 case SO_BSDCOMPAT:
1016 sock_warn_obsolete_bsdism("getsockopt");
1017 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001019 case SO_TIMESTAMP:
Eric Dumazet92f37fd2007-03-25 22:14:49 -07001020 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1021 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1022 break;
1023
1024 case SO_TIMESTAMPNS:
1025 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001026 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027
Patrick Ohly20d49472009-02-12 05:03:38 +00001028 case SO_TIMESTAMPING:
1029 v.val = 0;
1030 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1031 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1032 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1033 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1034 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1035 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1036 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1037 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1038 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1039 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1040 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1041 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1042 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1043 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1044 break;
1045
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001046 case SO_RCVTIMEO:
Eric Dumazet2a915252009-05-27 11:30:05 +00001047 lv = sizeof(struct timeval);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001048 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1049 v.tm.tv_sec = 0;
1050 v.tm.tv_usec = 0;
1051 } else {
1052 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1053 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001055 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001057 case SO_SNDTIMEO:
Eric Dumazet2a915252009-05-27 11:30:05 +00001058 lv = sizeof(struct timeval);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001059 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1060 v.tm.tv_sec = 0;
1061 v.tm.tv_usec = 0;
1062 } else {
1063 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1064 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1065 }
1066 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001068 case SO_RCVLOWAT:
1069 v.val = sk->sk_rcvlowat;
1070 break;
Catherine Zhang877ce7c2006-06-29 12:27:47 -07001071
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001072 case SO_SNDLOWAT:
Eric Dumazet2a915252009-05-27 11:30:05 +00001073 v.val = 1;
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001074 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001076 case SO_PASSCRED:
Eric Dumazet82981932012-04-26 20:07:59 +00001077 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001078 break;
1079
1080 case SO_PEERCRED:
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001081 {
1082 struct ucred peercred;
1083 if (len > sizeof(peercred))
1084 len = sizeof(peercred);
1085 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1086 if (copy_to_user(optval, &peercred, len))
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001087 return -EFAULT;
1088 goto lenout;
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001089 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001090
1091 case SO_PEERNAME:
1092 {
1093 char address[128];
1094
1095 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1096 return -ENOTCONN;
1097 if (lv < len)
1098 return -EINVAL;
1099 if (copy_to_user(optval, address, len))
1100 return -EFAULT;
1101 goto lenout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001103
1104 /* Dubious BSD thing... Probably nobody even uses it, but
1105 * the UNIX standard wants it for whatever reason... -DaveM
1106 */
1107 case SO_ACCEPTCONN:
1108 v.val = sk->sk_state == TCP_LISTEN;
1109 break;
1110
1111 case SO_PASSSEC:
Eric Dumazet82981932012-04-26 20:07:59 +00001112 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001113 break;
1114
1115 case SO_PEERSEC:
1116 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1117
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -08001118 case SO_MARK:
1119 v.val = sk->sk_mark;
1120 break;
1121
Neil Horman3b885782009-10-12 13:26:31 -07001122 case SO_RXQ_OVFL:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001123 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
Neil Horman3b885782009-10-12 13:26:31 -07001124 break;
1125
Johannes Berg6e3e9392011-11-09 10:15:42 +01001126 case SO_WIFI_STATUS:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001127 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
Johannes Berg6e3e9392011-11-09 10:15:42 +01001128 break;
1129
Pavel Emelyanovef64a542012-02-21 07:31:34 +00001130 case SO_PEEK_OFF:
1131 if (!sock->ops->set_peek_off)
1132 return -EOPNOTSUPP;
1133
1134 v.val = sk->sk_peek_off;
1135 break;
David S. Millerbc2f7992012-02-24 14:48:34 -05001136 case SO_NOFCS:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001137 v.val = sock_flag(sk, SOCK_NOFCS);
David S. Millerbc2f7992012-02-24 14:48:34 -05001138 break;
Brian Haleyc91f6df2012-11-26 05:21:08 +00001139
Pavel Emelyanovf7b86bf2012-10-18 23:55:56 +00001140 case SO_BINDTODEVICE:
Brian Haleyc91f6df2012-11-26 05:21:08 +00001141 return sock_getbindtodevice(sk, optval, optlen, len);
1142
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001143 case SO_GET_FILTER:
1144 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1145 if (len < 0)
1146 return len;
1147
1148 goto lenout;
Brian Haleyc91f6df2012-11-26 05:21:08 +00001149
Vincent Bernatd59577b2013-01-16 22:55:49 +01001150 case SO_LOCK_FILTER:
1151 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1152 break;
1153
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +00001154 case SO_SELECT_ERR_QUEUE:
1155 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1156 break;
1157
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001158 default:
1159 return -ENOPROTOOPT;
1160 }
1161
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 if (len > lv)
1163 len = lv;
1164 if (copy_to_user(optval, &v, len))
1165 return -EFAULT;
1166lenout:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001167 if (put_user(len, optlen))
1168 return -EFAULT;
1169 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170}
1171
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001172/*
1173 * Initialize an sk_lock.
1174 *
1175 * (We also register the sk_lock with the lock validator.)
1176 */
Dave Jonesb6f99a22007-03-22 12:27:49 -07001177static inline void sock_lock_init(struct sock *sk)
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001178{
Peter Zijlstraed075362006-12-06 20:35:24 -08001179 sock_lock_init_class_and_name(sk,
1180 af_family_slock_key_strings[sk->sk_family],
1181 af_family_slock_keys + sk->sk_family,
1182 af_family_key_strings[sk->sk_family],
1183 af_family_keys + sk->sk_family);
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001184}
1185
Eric Dumazet4dc6dc72009-07-15 23:13:10 +00001186/*
1187 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1188 * even temporarly, because of RCU lookups. sk_node should also be left as is.
Eric Dumazet68835ab2010-11-30 19:04:07 +00001189 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
Eric Dumazet4dc6dc72009-07-15 23:13:10 +00001190 */
Pavel Emelyanovf1a6c4d2007-11-01 00:29:45 -07001191static void sock_copy(struct sock *nsk, const struct sock *osk)
1192{
1193#ifdef CONFIG_SECURITY_NETWORK
1194 void *sptr = nsk->sk_security;
1195#endif
Eric Dumazet68835ab2010-11-30 19:04:07 +00001196 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1197
1198 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1199 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1200
Pavel Emelyanovf1a6c4d2007-11-01 00:29:45 -07001201#ifdef CONFIG_SECURITY_NETWORK
1202 nsk->sk_security = sptr;
1203 security_sk_clone(osk, nsk);
1204#endif
1205}
1206
Octavian Purdilafcbdf092010-12-16 14:26:56 -08001207void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1208{
1209 unsigned long nulls1, nulls2;
1210
1211 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1212 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1213 if (nulls1 > nulls2)
1214 swap(nulls1, nulls2);
1215
1216 if (nulls1 != 0)
1217 memset((char *)sk, 0, nulls1);
1218 memset((char *)sk + nulls1 + sizeof(void *), 0,
1219 nulls2 - nulls1 - sizeof(void *));
1220 memset((char *)sk + nulls2 + sizeof(void *), 0,
1221 size - nulls2 - sizeof(void *));
1222}
1223EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1224
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001225static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1226 int family)
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001227{
1228 struct sock *sk;
1229 struct kmem_cache *slab;
1230
1231 slab = prot->slab;
Eric Dumazete912b112009-07-08 19:36:05 +00001232 if (slab != NULL) {
1233 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1234 if (!sk)
1235 return sk;
1236 if (priority & __GFP_ZERO) {
Octavian Purdilafcbdf092010-12-16 14:26:56 -08001237 if (prot->clear_sk)
1238 prot->clear_sk(sk, prot->obj_size);
1239 else
1240 sk_prot_clear_nulls(sk, prot->obj_size);
Eric Dumazete912b112009-07-08 19:36:05 +00001241 }
Octavian Purdilafcbdf092010-12-16 14:26:56 -08001242 } else
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001243 sk = kmalloc(prot->obj_size, priority);
1244
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001245 if (sk != NULL) {
Vegard Nossuma98b65a2009-02-26 14:46:57 +01001246 kmemcheck_annotate_bitfield(sk, flags);
1247
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001248 if (security_sk_alloc(sk, family, priority))
1249 goto out_free;
1250
1251 if (!try_module_get(prot->owner))
1252 goto out_free_sec;
Krishna Kumare022f0b2009-10-19 23:46:20 +00001253 sk_tx_queue_clear(sk);
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001254 }
1255
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001256 return sk;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001257
1258out_free_sec:
1259 security_sk_free(sk);
1260out_free:
1261 if (slab != NULL)
1262 kmem_cache_free(slab, sk);
1263 else
1264 kfree(sk);
1265 return NULL;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001266}
1267
1268static void sk_prot_free(struct proto *prot, struct sock *sk)
1269{
1270 struct kmem_cache *slab;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001271 struct module *owner;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001272
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001273 owner = prot->owner;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001274 slab = prot->slab;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001275
1276 security_sk_free(sk);
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001277 if (slab != NULL)
1278 kmem_cache_free(slab, sk);
1279 else
1280 kfree(sk);
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001281 module_put(owner);
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001282}
1283
Daniel Wagner8fb974c2012-09-12 16:12:02 +02001284#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
Zefan Li211d2f972013-04-08 20:03:35 +00001285void sock_update_classid(struct sock *sk)
Herbert Xuf8451722010-05-24 00:12:34 -07001286{
Paul E. McKenney11441822010-10-06 17:15:35 -07001287 u32 classid;
Herbert Xuf8451722010-05-24 00:12:34 -07001288
Zefan Li211d2f972013-04-08 20:03:35 +00001289 classid = task_cls_classid(current);
Neil Horman3afa6d02012-08-20 07:59:10 +00001290 if (classid != sk->sk_classid)
Herbert Xuf8451722010-05-24 00:12:34 -07001291 sk->sk_classid = classid;
1292}
Herbert Xu82862742010-05-24 00:14:10 -07001293EXPORT_SYMBOL(sock_update_classid);
Daniel Wagner8fb974c2012-09-12 16:12:02 +02001294#endif
Neil Horman5bc14212011-11-22 05:10:51 +00001295
Daniel Wagner51e4e7f2012-09-12 16:12:03 +02001296#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
Zefan Li6ffd4642013-04-08 20:03:47 +00001297void sock_update_netprioidx(struct sock *sk)
Neil Horman5bc14212011-11-22 05:10:51 +00001298{
Neil Horman5bc14212011-11-22 05:10:51 +00001299 if (in_interrupt())
1300 return;
Neil Horman2b73bc62012-02-10 05:43:38 +00001301
Zefan Li6ffd4642013-04-08 20:03:47 +00001302 sk->sk_cgrp_prioidx = task_netprioidx(current);
Neil Horman5bc14212011-11-22 05:10:51 +00001303}
1304EXPORT_SYMBOL_GPL(sock_update_netprioidx);
Herbert Xuf8451722010-05-24 00:12:34 -07001305#endif
1306
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307/**
1308 * sk_alloc - All socket objects are allocated here
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001309 * @net: the applicable net namespace
Pavel Pisa4dc3b162005-05-01 08:59:25 -07001310 * @family: protocol family
1311 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1312 * @prot: struct proto associated with this new sock instance
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 */
Eric W. Biederman1b8d7ae2007-10-08 23:24:22 -07001314struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
Pavel Emelyanov6257ff22007-11-01 00:39:31 -07001315 struct proto *prot)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316{
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001317 struct sock *sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318
Pavel Emelyanov154adbc2007-11-01 00:38:43 -07001319 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 if (sk) {
Pavel Emelyanov154adbc2007-11-01 00:38:43 -07001321 sk->sk_family = family;
1322 /*
1323 * See comment in struct sock definition to understand
1324 * why we need sk_prot_creator -acme
1325 */
1326 sk->sk_prot = sk->sk_prot_creator = prot;
1327 sock_lock_init(sk);
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001328 sock_net_set(sk, get_net(net));
Jarek Poplawskid66ee052009-08-30 23:15:36 +00001329 atomic_set(&sk->sk_wmem_alloc, 1);
Herbert Xuf8451722010-05-24 00:12:34 -07001330
Zefan Li211d2f972013-04-08 20:03:35 +00001331 sock_update_classid(sk);
Zefan Li6ffd4642013-04-08 20:03:47 +00001332 sock_update_netprioidx(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 }
Frank Filza79af592005-09-27 15:23:38 -07001334
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001335 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336}
Eric Dumazet2a915252009-05-27 11:30:05 +00001337EXPORT_SYMBOL(sk_alloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
Eric Dumazet2b85a342009-06-11 02:55:43 -07001339static void __sk_free(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340{
1341 struct sk_filter *filter;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342
1343 if (sk->sk_destruct)
1344 sk->sk_destruct(sk);
1345
Paul E. McKenneya898def2010-02-22 17:04:49 -08001346 filter = rcu_dereference_check(sk->sk_filter,
1347 atomic_read(&sk->sk_wmem_alloc) == 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 if (filter) {
Pavel Emelyanov309dd5f2007-10-17 21:21:51 -07001349 sk_filter_uncharge(sk, filter);
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00001350 RCU_INIT_POINTER(sk->sk_filter, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 }
1352
Eric Dumazet08e29af2011-11-28 12:04:18 +00001353 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
1355 if (atomic_read(&sk->sk_omem_alloc))
Joe Perchese005d192012-05-16 19:58:40 +00001356 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1357 __func__, atomic_read(&sk->sk_omem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001359 if (sk->sk_peer_cred)
1360 put_cred(sk->sk_peer_cred);
1361 put_pid(sk->sk_peer_pid);
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001362 put_net(sock_net(sk));
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001363 sk_prot_free(sk->sk_prot_creator, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364}
Eric Dumazet2b85a342009-06-11 02:55:43 -07001365
1366void sk_free(struct sock *sk)
1367{
1368 /*
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001369 * We subtract one from sk_wmem_alloc and can know if
Eric Dumazet2b85a342009-06-11 02:55:43 -07001370 * some packets are still in some tx queue.
1371 * If not null, sock_wfree() will call __sk_free(sk) later
1372 */
1373 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1374 __sk_free(sk);
1375}
Eric Dumazet2a915252009-05-27 11:30:05 +00001376EXPORT_SYMBOL(sk_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377
Denis V. Lunevedf02082008-02-29 11:18:32 -08001378/*
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001379 * Last sock_put should drop reference to sk->sk_net. It has already
1380 * been dropped in sk_change_net. Taking reference to stopping namespace
Denis V. Lunevedf02082008-02-29 11:18:32 -08001381 * is not an option.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001382 * Take reference to a socket to remove it from hash _alive_ and after that
Denis V. Lunevedf02082008-02-29 11:18:32 -08001383 * destroy it in the context of init_net.
1384 */
1385void sk_release_kernel(struct sock *sk)
1386{
1387 if (sk == NULL || sk->sk_socket == NULL)
1388 return;
1389
1390 sock_hold(sk);
1391 sock_release(sk->sk_socket);
Denis V. Lunev65a18ec2008-04-16 01:59:46 -07001392 release_net(sock_net(sk));
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001393 sock_net_set(sk, get_net(&init_net));
Denis V. Lunevedf02082008-02-29 11:18:32 -08001394 sock_put(sk);
1395}
David S. Miller45af1752008-02-29 11:33:19 -08001396EXPORT_SYMBOL(sk_release_kernel);
Denis V. Lunevedf02082008-02-29 11:18:32 -08001397
Stephen Rothwell475f1b52012-01-09 16:33:16 +11001398static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1399{
1400 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1401 sock_update_memcg(newsk);
1402}
1403
Eric Dumazete56c57d2011-11-08 17:07:07 -05001404/**
1405 * sk_clone_lock - clone a socket, and lock its clone
1406 * @sk: the socket to clone
1407 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1408 *
1409 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1410 */
1411struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001412{
Pavel Emelyanov8fd1d172007-11-01 00:37:32 -07001413 struct sock *newsk;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001414
Pavel Emelyanov8fd1d172007-11-01 00:37:32 -07001415 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001416 if (newsk != NULL) {
1417 struct sk_filter *filter;
1418
Venkat Yekkirala892c1412006-08-04 23:08:56 -07001419 sock_copy(newsk, sk);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001420
1421 /* SANITY */
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001422 get_net(sock_net(newsk));
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001423 sk_node_init(&newsk->sk_node);
1424 sock_lock_init(newsk);
1425 bh_lock_sock(newsk);
Eric Dumazetfa438cc2007-03-04 16:05:44 -08001426 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
Zhu Yi8eae9392010-03-04 18:01:40 +00001427 newsk->sk_backlog.len = 0;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001428
1429 atomic_set(&newsk->sk_rmem_alloc, 0);
Eric Dumazet2b85a342009-06-11 02:55:43 -07001430 /*
1431 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1432 */
1433 atomic_set(&newsk->sk_wmem_alloc, 1);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001434 atomic_set(&newsk->sk_omem_alloc, 0);
1435 skb_queue_head_init(&newsk->sk_receive_queue);
1436 skb_queue_head_init(&newsk->sk_write_queue);
Chris Leech97fc2f02006-05-23 17:55:33 -07001437#ifdef CONFIG_NET_DMA
1438 skb_queue_head_init(&newsk->sk_async_wait_queue);
1439#endif
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001440
Eric Dumazetb6c67122010-04-08 23:03:29 +00001441 spin_lock_init(&newsk->sk_dst_lock);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001442 rwlock_init(&newsk->sk_callback_lock);
Peter Zijlstra443aef02007-07-19 01:49:00 -07001443 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1444 af_callback_keys + newsk->sk_family,
1445 af_family_clock_key_strings[newsk->sk_family]);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001446
1447 newsk->sk_dst_cache = NULL;
1448 newsk->sk_wmem_queued = 0;
1449 newsk->sk_forward_alloc = 0;
1450 newsk->sk_send_head = NULL;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001451 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1452
1453 sock_reset_flag(newsk, SOCK_DONE);
1454 skb_queue_head_init(&newsk->sk_error_queue);
1455
Eric Dumazet0d7da9d2010-10-25 03:47:05 +00001456 filter = rcu_dereference_protected(newsk->sk_filter, 1);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001457 if (filter != NULL)
1458 sk_filter_charge(newsk, filter);
1459
1460 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1461 /* It is still raw copy of parent, so invalidate
1462 * destructor and make plain sk_free() */
1463 newsk->sk_destruct = NULL;
Thomas Gleixnerb0691c82011-10-25 02:30:50 +00001464 bh_unlock_sock(newsk);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001465 sk_free(newsk);
1466 newsk = NULL;
1467 goto out;
1468 }
1469
1470 newsk->sk_err = 0;
1471 newsk->sk_priority = 0;
Eric Dumazet4dc6dc72009-07-15 23:13:10 +00001472 /*
1473 * Before updating sk_refcnt, we must commit prior changes to memory
1474 * (Documentation/RCU/rculist_nulls.txt for details)
1475 */
1476 smp_wmb();
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001477 atomic_set(&newsk->sk_refcnt, 2);
1478
1479 /*
1480 * Increment the counter in the same struct proto as the master
1481 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1482 * is the same as sk->sk_prot->socks, as this field was copied
1483 * with memcpy).
1484 *
1485 * This _changes_ the previous behaviour, where
1486 * tcp_create_openreq_child always was incrementing the
1487 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1488 * to be taken into account in all callers. -acme
1489 */
1490 sk_refcnt_debug_inc(newsk);
David S. Miller972692e2008-06-17 22:41:38 -07001491 sk_set_socket(newsk, NULL);
Eric Dumazet43815482010-04-29 11:01:49 +00001492 newsk->sk_wq = NULL;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001493
Glauber Costaf3f511e2012-01-05 20:16:39 +00001494 sk_update_clone(sk, newsk);
1495
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001496 if (newsk->sk_prot->sockets_allocated)
Glauber Costa180d8cd2011-12-11 21:47:02 +00001497 sk_sockets_allocated_inc(newsk);
Octavian Purdila704da5602010-01-08 00:00:09 -08001498
Eric Dumazet08e29af2011-11-28 12:04:18 +00001499 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
Octavian Purdila704da5602010-01-08 00:00:09 -08001500 net_enable_timestamp();
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001501 }
1502out:
1503 return newsk;
1504}
Eric Dumazete56c57d2011-11-08 17:07:07 -05001505EXPORT_SYMBOL_GPL(sk_clone_lock);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001506
Andi Kleen99580892007-04-20 17:12:43 -07001507void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1508{
1509 __sk_dst_set(sk, dst);
1510 sk->sk_route_caps = dst->dev->features;
1511 if (sk->sk_route_caps & NETIF_F_GSO)
Herbert Xu4fcd6b92007-05-31 22:15:50 -07001512 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
Eric Dumazeta4654192010-05-16 00:36:33 -07001513 sk->sk_route_caps &= ~sk->sk_route_nocaps;
Andi Kleen99580892007-04-20 17:12:43 -07001514 if (sk_can_gso(sk)) {
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001515 if (dst->header_len) {
Andi Kleen99580892007-04-20 17:12:43 -07001516 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001517 } else {
Andi Kleen99580892007-04-20 17:12:43 -07001518 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001519 sk->sk_gso_max_size = dst->dev->gso_max_size;
Ben Hutchings14853482012-07-30 16:11:42 +00001520 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001521 }
Andi Kleen99580892007-04-20 17:12:43 -07001522 }
1523}
1524EXPORT_SYMBOL_GPL(sk_setup_caps);
1525
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526/*
1527 * Simple resource managers for sockets.
1528 */
1529
1530
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001531/*
1532 * Write buffer destructor automatically called from kfree_skb.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 */
1534void sock_wfree(struct sk_buff *skb)
1535{
1536 struct sock *sk = skb->sk;
Eric Dumazetd99927f2009-09-24 10:49:24 +00001537 unsigned int len = skb->truesize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538
Eric Dumazetd99927f2009-09-24 10:49:24 +00001539 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1540 /*
1541 * Keep a reference on sk_wmem_alloc, this will be released
1542 * after sk_write_space() call
1543 */
1544 atomic_sub(len - 1, &sk->sk_wmem_alloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 sk->sk_write_space(sk);
Eric Dumazetd99927f2009-09-24 10:49:24 +00001546 len = 1;
1547 }
Eric Dumazet2b85a342009-06-11 02:55:43 -07001548 /*
Eric Dumazetd99927f2009-09-24 10:49:24 +00001549 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1550 * could not do because of in-flight packets
Eric Dumazet2b85a342009-06-11 02:55:43 -07001551 */
Eric Dumazetd99927f2009-09-24 10:49:24 +00001552 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
Eric Dumazet2b85a342009-06-11 02:55:43 -07001553 __sk_free(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554}
Eric Dumazet2a915252009-05-27 11:30:05 +00001555EXPORT_SYMBOL(sock_wfree);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001557/*
1558 * Read buffer destructor automatically called from kfree_skb.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 */
1560void sock_rfree(struct sk_buff *skb)
1561{
1562 struct sock *sk = skb->sk;
Eric Dumazetd361fd52010-07-10 22:45:17 +00001563 unsigned int len = skb->truesize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564
Eric Dumazetd361fd52010-07-10 22:45:17 +00001565 atomic_sub(len, &sk->sk_rmem_alloc);
1566 sk_mem_uncharge(sk, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567}
Eric Dumazet2a915252009-05-27 11:30:05 +00001568EXPORT_SYMBOL(sock_rfree);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569
David S. Miller41063e92012-06-19 21:22:05 -07001570void sock_edemux(struct sk_buff *skb)
1571{
Eric Dumazete8123472012-09-02 23:57:18 +00001572 struct sock *sk = skb->sk;
1573
Randy Dunlap1c463e52012-09-10 09:13:07 -07001574#ifdef CONFIG_INET
Eric Dumazete8123472012-09-02 23:57:18 +00001575 if (sk->sk_state == TCP_TIME_WAIT)
1576 inet_twsk_put(inet_twsk(sk));
1577 else
Randy Dunlap1c463e52012-09-10 09:13:07 -07001578#endif
Eric Dumazete8123472012-09-02 23:57:18 +00001579 sock_put(sk);
David S. Miller41063e92012-06-19 21:22:05 -07001580}
1581EXPORT_SYMBOL(sock_edemux);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582
Eric W. Biederman976d02012012-05-23 17:16:53 -06001583kuid_t sock_i_uid(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584{
Eric W. Biederman976d02012012-05-23 17:16:53 -06001585 kuid_t uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586
Eric Dumazetf064af12010-09-22 12:43:39 +00001587 read_lock_bh(&sk->sk_callback_lock);
Eric W. Biederman976d02012012-05-23 17:16:53 -06001588 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
Eric Dumazetf064af12010-09-22 12:43:39 +00001589 read_unlock_bh(&sk->sk_callback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 return uid;
1591}
Eric Dumazet2a915252009-05-27 11:30:05 +00001592EXPORT_SYMBOL(sock_i_uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593
1594unsigned long sock_i_ino(struct sock *sk)
1595{
1596 unsigned long ino;
1597
Eric Dumazetf064af12010-09-22 12:43:39 +00001598 read_lock_bh(&sk->sk_callback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
Eric Dumazetf064af12010-09-22 12:43:39 +00001600 read_unlock_bh(&sk->sk_callback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 return ino;
1602}
Eric Dumazet2a915252009-05-27 11:30:05 +00001603EXPORT_SYMBOL(sock_i_ino);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604
1605/*
1606 * Allocate a skb from the socket's send buffer.
1607 */
Victor Fusco86a76ca2005-07-08 14:57:47 -07001608struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
Al Virodd0fc662005-10-07 07:46:04 +01001609 gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610{
1611 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
Eric Dumazet2a915252009-05-27 11:30:05 +00001612 struct sk_buff *skb = alloc_skb(size, priority);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 if (skb) {
1614 skb_set_owner_w(skb, sk);
1615 return skb;
1616 }
1617 }
1618 return NULL;
1619}
Eric Dumazet2a915252009-05-27 11:30:05 +00001620EXPORT_SYMBOL(sock_wmalloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621
1622/*
1623 * Allocate a skb from the socket's receive buffer.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001624 */
Victor Fusco86a76ca2005-07-08 14:57:47 -07001625struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
Al Virodd0fc662005-10-07 07:46:04 +01001626 gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627{
1628 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1629 struct sk_buff *skb = alloc_skb(size, priority);
1630 if (skb) {
1631 skb_set_owner_r(skb, sk);
1632 return skb;
1633 }
1634 }
1635 return NULL;
1636}
1637
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001638/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 * Allocate a memory block from the socket's option memory buffer.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001640 */
Al Virodd0fc662005-10-07 07:46:04 +01001641void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642{
Eric Dumazet95c96172012-04-15 05:58:06 +00001643 if ((unsigned int)size <= sysctl_optmem_max &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1645 void *mem;
1646 /* First do the add, to avoid the race if kmalloc
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001647 * might sleep.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 */
1649 atomic_add(size, &sk->sk_omem_alloc);
1650 mem = kmalloc(size, priority);
1651 if (mem)
1652 return mem;
1653 atomic_sub(size, &sk->sk_omem_alloc);
1654 }
1655 return NULL;
1656}
Eric Dumazet2a915252009-05-27 11:30:05 +00001657EXPORT_SYMBOL(sock_kmalloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
1659/*
1660 * Free an option memory block.
1661 */
1662void sock_kfree_s(struct sock *sk, void *mem, int size)
1663{
1664 kfree(mem);
1665 atomic_sub(size, &sk->sk_omem_alloc);
1666}
Eric Dumazet2a915252009-05-27 11:30:05 +00001667EXPORT_SYMBOL(sock_kfree_s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668
1669/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1670 I think, these locks should be removed for datagram sockets.
1671 */
Eric Dumazet2a915252009-05-27 11:30:05 +00001672static long sock_wait_for_wmem(struct sock *sk, long timeo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673{
1674 DEFINE_WAIT(wait);
1675
1676 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1677 for (;;) {
1678 if (!timeo)
1679 break;
1680 if (signal_pending(current))
1681 break;
1682 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
Eric Dumazetaa395142010-04-20 13:03:51 +00001683 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1685 break;
1686 if (sk->sk_shutdown & SEND_SHUTDOWN)
1687 break;
1688 if (sk->sk_err)
1689 break;
1690 timeo = schedule_timeout(timeo);
1691 }
Eric Dumazetaa395142010-04-20 13:03:51 +00001692 finish_wait(sk_sleep(sk), &wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 return timeo;
1694}
1695
1696
1697/*
1698 * Generic send/receive buffer handlers
1699 */
1700
Herbert Xu4cc7f682009-02-04 16:55:54 -08001701struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1702 unsigned long data_len, int noblock,
1703 int *errcode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704{
1705 struct sk_buff *skb;
Al Viro7d877f32005-10-21 03:20:43 -04001706 gfp_t gfp_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 long timeo;
1708 int err;
Jason Wangcc9b17a2012-05-30 21:18:10 +00001709 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1710
1711 err = -EMSGSIZE;
1712 if (npages > MAX_SKB_FRAGS)
1713 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714
1715 gfp_mask = sk->sk_allocation;
1716 if (gfp_mask & __GFP_WAIT)
1717 gfp_mask |= __GFP_REPEAT;
1718
1719 timeo = sock_sndtimeo(sk, noblock);
1720 while (1) {
1721 err = sock_error(sk);
1722 if (err != 0)
1723 goto failure;
1724
1725 err = -EPIPE;
1726 if (sk->sk_shutdown & SEND_SHUTDOWN)
1727 goto failure;
1728
1729 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
Larry Woodmandb38c1792006-11-03 16:05:45 -08001730 skb = alloc_skb(header_len, gfp_mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 if (skb) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 int i;
1733
1734 /* No pages, we're done... */
1735 if (!data_len)
1736 break;
1737
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 skb->truesize += data_len;
1739 skb_shinfo(skb)->nr_frags = npages;
1740 for (i = 0; i < npages; i++) {
1741 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742
1743 page = alloc_pages(sk->sk_allocation, 0);
1744 if (!page) {
1745 err = -ENOBUFS;
1746 skb_shinfo(skb)->nr_frags = i;
1747 kfree_skb(skb);
1748 goto failure;
1749 }
1750
Ian Campbellea2ab692011-08-22 23:44:58 +00001751 __skb_fill_page_desc(skb, i,
1752 page, 0,
1753 (data_len >= PAGE_SIZE ?
1754 PAGE_SIZE :
1755 data_len));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 data_len -= PAGE_SIZE;
1757 }
1758
1759 /* Full success... */
1760 break;
1761 }
1762 err = -ENOBUFS;
1763 goto failure;
1764 }
1765 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1766 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1767 err = -EAGAIN;
1768 if (!timeo)
1769 goto failure;
1770 if (signal_pending(current))
1771 goto interrupted;
1772 timeo = sock_wait_for_wmem(sk, timeo);
1773 }
1774
1775 skb_set_owner_w(skb, sk);
1776 return skb;
1777
1778interrupted:
1779 err = sock_intr_errno(timeo);
1780failure:
1781 *errcode = err;
1782 return NULL;
1783}
Herbert Xu4cc7f682009-02-04 16:55:54 -08001784EXPORT_SYMBOL(sock_alloc_send_pskb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001786struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 int noblock, int *errcode)
1788{
1789 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1790}
Eric Dumazet2a915252009-05-27 11:30:05 +00001791EXPORT_SYMBOL(sock_alloc_send_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792
Eric Dumazet5640f762012-09-23 23:04:42 +00001793/* On 32bit arches, an skb frag is limited to 2^15 */
1794#define SKB_FRAG_PAGE_ORDER get_order(32768)
1795
1796bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1797{
1798 int order;
1799
1800 if (pfrag->page) {
1801 if (atomic_read(&pfrag->page->_count) == 1) {
1802 pfrag->offset = 0;
1803 return true;
1804 }
1805 if (pfrag->offset < pfrag->size)
1806 return true;
1807 put_page(pfrag->page);
1808 }
1809
1810 /* We restrict high order allocations to users that can afford to wait */
1811 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1812
1813 do {
1814 gfp_t gfp = sk->sk_allocation;
1815
1816 if (order)
1817 gfp |= __GFP_COMP | __GFP_NOWARN;
1818 pfrag->page = alloc_pages(gfp, order);
1819 if (likely(pfrag->page)) {
1820 pfrag->offset = 0;
1821 pfrag->size = PAGE_SIZE << order;
1822 return true;
1823 }
1824 } while (--order >= 0);
1825
1826 sk_enter_memory_pressure(sk);
1827 sk_stream_moderate_sndbuf(sk);
1828 return false;
1829}
1830EXPORT_SYMBOL(sk_page_frag_refill);
1831
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832static void __lock_sock(struct sock *sk)
Namhyung Kimf39234d2010-09-08 03:48:48 +00001833 __releases(&sk->sk_lock.slock)
1834 __acquires(&sk->sk_lock.slock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835{
1836 DEFINE_WAIT(wait);
1837
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001838 for (;;) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1840 TASK_UNINTERRUPTIBLE);
1841 spin_unlock_bh(&sk->sk_lock.slock);
1842 schedule();
1843 spin_lock_bh(&sk->sk_lock.slock);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001844 if (!sock_owned_by_user(sk))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845 break;
1846 }
1847 finish_wait(&sk->sk_lock.wq, &wait);
1848}
1849
1850static void __release_sock(struct sock *sk)
Namhyung Kimf39234d2010-09-08 03:48:48 +00001851 __releases(&sk->sk_lock.slock)
1852 __acquires(&sk->sk_lock.slock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853{
1854 struct sk_buff *skb = sk->sk_backlog.head;
1855
1856 do {
1857 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1858 bh_unlock_sock(sk);
1859
1860 do {
1861 struct sk_buff *next = skb->next;
1862
Eric Dumazete4cbb022012-04-30 16:07:09 +00001863 prefetch(next);
Eric Dumazet7fee2262010-05-11 23:19:48 +00001864 WARN_ON_ONCE(skb_dst_is_noref(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 skb->next = NULL;
Peter Zijlstrac57943a2008-10-07 14:18:42 -07001866 sk_backlog_rcv(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
1868 /*
1869 * We are in process context here with softirqs
1870 * disabled, use cond_resched_softirq() to preempt.
1871 * This is safe to do because we've taken the backlog
1872 * queue private:
1873 */
1874 cond_resched_softirq();
1875
1876 skb = next;
1877 } while (skb != NULL);
1878
1879 bh_lock_sock(sk);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001880 } while ((skb = sk->sk_backlog.head) != NULL);
Zhu Yi8eae9392010-03-04 18:01:40 +00001881
1882 /*
1883 * Doing the zeroing here guarantee we can not loop forever
1884 * while a wild producer attempts to flood us.
1885 */
1886 sk->sk_backlog.len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887}
1888
1889/**
1890 * sk_wait_data - wait for data to arrive at sk_receive_queue
Pavel Pisa4dc3b162005-05-01 08:59:25 -07001891 * @sk: sock to wait on
1892 * @timeo: for how long
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 *
1894 * Now socket state including sk->sk_err is changed only under lock,
1895 * hence we may omit checks after joining wait queue.
1896 * We check receive queue before schedule() only as optimization;
1897 * it is very likely that release_sock() added new data.
1898 */
1899int sk_wait_data(struct sock *sk, long *timeo)
1900{
1901 int rc;
1902 DEFINE_WAIT(wait);
1903
Eric Dumazetaa395142010-04-20 13:03:51 +00001904 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1906 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1907 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
Eric Dumazetaa395142010-04-20 13:03:51 +00001908 finish_wait(sk_sleep(sk), &wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909 return rc;
1910}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911EXPORT_SYMBOL(sk_wait_data);
1912
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001913/**
1914 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
1915 * @sk: socket
1916 * @size: memory size to allocate
1917 * @kind: allocation type
1918 *
1919 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
1920 * rmem allocation. This function assumes that protocols which have
1921 * memory_pressure use sk_wmem_queued as write buffer accounting.
1922 */
1923int __sk_mem_schedule(struct sock *sk, int size, int kind)
1924{
1925 struct proto *prot = sk->sk_prot;
1926 int amt = sk_mem_pages(size);
Eric Dumazet8d987e52010-11-09 23:24:26 +00001927 long allocated;
Glauber Costae1aab162011-12-11 21:47:03 +00001928 int parent_status = UNDER_LIMIT;
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001929
1930 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
Glauber Costa180d8cd2011-12-11 21:47:02 +00001931
Glauber Costae1aab162011-12-11 21:47:03 +00001932 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001933
1934 /* Under limit. */
Glauber Costae1aab162011-12-11 21:47:03 +00001935 if (parent_status == UNDER_LIMIT &&
1936 allocated <= sk_prot_mem_limits(sk, 0)) {
Glauber Costa180d8cd2011-12-11 21:47:02 +00001937 sk_leave_memory_pressure(sk);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001938 return 1;
1939 }
1940
Glauber Costae1aab162011-12-11 21:47:03 +00001941 /* Under pressure. (we or our parents) */
1942 if ((parent_status > SOFT_LIMIT) ||
1943 allocated > sk_prot_mem_limits(sk, 1))
Glauber Costa180d8cd2011-12-11 21:47:02 +00001944 sk_enter_memory_pressure(sk);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001945
Glauber Costae1aab162011-12-11 21:47:03 +00001946 /* Over hard limit (we or our parents) */
1947 if ((parent_status == OVER_LIMIT) ||
1948 (allocated > sk_prot_mem_limits(sk, 2)))
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001949 goto suppress_allocation;
1950
1951 /* guarantee minimum buffer size under pressure */
1952 if (kind == SK_MEM_RECV) {
1953 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1954 return 1;
Glauber Costa180d8cd2011-12-11 21:47:02 +00001955
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001956 } else { /* SK_MEM_SEND */
1957 if (sk->sk_type == SOCK_STREAM) {
1958 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1959 return 1;
1960 } else if (atomic_read(&sk->sk_wmem_alloc) <
1961 prot->sysctl_wmem[0])
1962 return 1;
1963 }
1964
Glauber Costa180d8cd2011-12-11 21:47:02 +00001965 if (sk_has_memory_pressure(sk)) {
Eric Dumazet17483762008-11-25 21:16:35 -08001966 int alloc;
1967
Glauber Costa180d8cd2011-12-11 21:47:02 +00001968 if (!sk_under_memory_pressure(sk))
Eric Dumazet17483762008-11-25 21:16:35 -08001969 return 1;
Glauber Costa180d8cd2011-12-11 21:47:02 +00001970 alloc = sk_sockets_allocated_read_positive(sk);
1971 if (sk_prot_mem_limits(sk, 2) > alloc *
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001972 sk_mem_pages(sk->sk_wmem_queued +
1973 atomic_read(&sk->sk_rmem_alloc) +
1974 sk->sk_forward_alloc))
1975 return 1;
1976 }
1977
1978suppress_allocation:
1979
1980 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
1981 sk_stream_moderate_sndbuf(sk);
1982
1983 /* Fail only if socket is _under_ its sndbuf.
1984 * In this case we cannot block, so that we have to fail.
1985 */
1986 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
1987 return 1;
1988 }
1989
Satoru Moriya3847ce32011-06-17 12:00:03 +00001990 trace_sock_exceed_buf_limit(sk, prot, allocated);
1991
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001992 /* Alas. Undo changes. */
1993 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
Glauber Costa180d8cd2011-12-11 21:47:02 +00001994
Glauber Costa0e90b312012-01-20 04:57:16 +00001995 sk_memory_allocated_sub(sk, amt);
Glauber Costa180d8cd2011-12-11 21:47:02 +00001996
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001997 return 0;
1998}
Hideo Aoki3ab224b2007-12-31 00:11:19 -08001999EXPORT_SYMBOL(__sk_mem_schedule);
2000
2001/**
2002 * __sk_reclaim - reclaim memory_allocated
2003 * @sk: socket
2004 */
2005void __sk_mem_reclaim(struct sock *sk)
2006{
Glauber Costa180d8cd2011-12-11 21:47:02 +00002007 sk_memory_allocated_sub(sk,
Glauber Costa0e90b312012-01-20 04:57:16 +00002008 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002009 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
2010
Glauber Costa180d8cd2011-12-11 21:47:02 +00002011 if (sk_under_memory_pressure(sk) &&
2012 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2013 sk_leave_memory_pressure(sk);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002014}
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002015EXPORT_SYMBOL(__sk_mem_reclaim);
2016
2017
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018/*
2019 * Set of default routines for initialising struct proto_ops when
2020 * the protocol does not support a particular function. In certain
2021 * cases where it makes no sense for a protocol to have a "do nothing"
2022 * function, some default processing is provided.
2023 */
2024
2025int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2026{
2027 return -EOPNOTSUPP;
2028}
Eric Dumazet2a915252009-05-27 11:30:05 +00002029EXPORT_SYMBOL(sock_no_bind);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002031int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032 int len, int flags)
2033{
2034 return -EOPNOTSUPP;
2035}
Eric Dumazet2a915252009-05-27 11:30:05 +00002036EXPORT_SYMBOL(sock_no_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037
2038int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2039{
2040 return -EOPNOTSUPP;
2041}
Eric Dumazet2a915252009-05-27 11:30:05 +00002042EXPORT_SYMBOL(sock_no_socketpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043
2044int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2045{
2046 return -EOPNOTSUPP;
2047}
Eric Dumazet2a915252009-05-27 11:30:05 +00002048EXPORT_SYMBOL(sock_no_accept);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002050int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 int *len, int peer)
2052{
2053 return -EOPNOTSUPP;
2054}
Eric Dumazet2a915252009-05-27 11:30:05 +00002055EXPORT_SYMBOL(sock_no_getname);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056
Eric Dumazet2a915252009-05-27 11:30:05 +00002057unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058{
2059 return 0;
2060}
Eric Dumazet2a915252009-05-27 11:30:05 +00002061EXPORT_SYMBOL(sock_no_poll);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062
2063int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2064{
2065 return -EOPNOTSUPP;
2066}
Eric Dumazet2a915252009-05-27 11:30:05 +00002067EXPORT_SYMBOL(sock_no_ioctl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068
2069int sock_no_listen(struct socket *sock, int backlog)
2070{
2071 return -EOPNOTSUPP;
2072}
Eric Dumazet2a915252009-05-27 11:30:05 +00002073EXPORT_SYMBOL(sock_no_listen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074
2075int sock_no_shutdown(struct socket *sock, int how)
2076{
2077 return -EOPNOTSUPP;
2078}
Eric Dumazet2a915252009-05-27 11:30:05 +00002079EXPORT_SYMBOL(sock_no_shutdown);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080
2081int sock_no_setsockopt(struct socket *sock, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -07002082 char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083{
2084 return -EOPNOTSUPP;
2085}
Eric Dumazet2a915252009-05-27 11:30:05 +00002086EXPORT_SYMBOL(sock_no_setsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087
2088int sock_no_getsockopt(struct socket *sock, int level, int optname,
2089 char __user *optval, int __user *optlen)
2090{
2091 return -EOPNOTSUPP;
2092}
Eric Dumazet2a915252009-05-27 11:30:05 +00002093EXPORT_SYMBOL(sock_no_getsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094
2095int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2096 size_t len)
2097{
2098 return -EOPNOTSUPP;
2099}
Eric Dumazet2a915252009-05-27 11:30:05 +00002100EXPORT_SYMBOL(sock_no_sendmsg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002101
2102int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2103 size_t len, int flags)
2104{
2105 return -EOPNOTSUPP;
2106}
Eric Dumazet2a915252009-05-27 11:30:05 +00002107EXPORT_SYMBOL(sock_no_recvmsg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108
2109int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2110{
2111 /* Mirror missing mmap method error code */
2112 return -ENODEV;
2113}
Eric Dumazet2a915252009-05-27 11:30:05 +00002114EXPORT_SYMBOL(sock_no_mmap);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115
2116ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2117{
2118 ssize_t res;
2119 struct msghdr msg = {.msg_flags = flags};
2120 struct kvec iov;
2121 char *kaddr = kmap(page);
2122 iov.iov_base = kaddr + offset;
2123 iov.iov_len = size;
2124 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2125 kunmap(page);
2126 return res;
2127}
Eric Dumazet2a915252009-05-27 11:30:05 +00002128EXPORT_SYMBOL(sock_no_sendpage);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129
2130/*
2131 * Default Socket Callbacks
2132 */
2133
2134static void sock_def_wakeup(struct sock *sk)
2135{
Eric Dumazet43815482010-04-29 11:01:49 +00002136 struct socket_wq *wq;
2137
2138 rcu_read_lock();
2139 wq = rcu_dereference(sk->sk_wq);
2140 if (wq_has_sleeper(wq))
2141 wake_up_interruptible_all(&wq->wait);
2142 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143}
2144
2145static void sock_def_error_report(struct sock *sk)
2146{
Eric Dumazet43815482010-04-29 11:01:49 +00002147 struct socket_wq *wq;
2148
2149 rcu_read_lock();
2150 wq = rcu_dereference(sk->sk_wq);
2151 if (wq_has_sleeper(wq))
2152 wake_up_interruptible_poll(&wq->wait, POLLERR);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08002153 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
Eric Dumazet43815482010-04-29 11:01:49 +00002154 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155}
2156
2157static void sock_def_readable(struct sock *sk, int len)
2158{
Eric Dumazet43815482010-04-29 11:01:49 +00002159 struct socket_wq *wq;
2160
2161 rcu_read_lock();
2162 wq = rcu_dereference(sk->sk_wq);
2163 if (wq_has_sleeper(wq))
Eric Dumazet2c6607c2011-01-06 10:54:29 -08002164 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
Davide Libenzi37e55402009-03-31 15:24:21 -07002165 POLLRDNORM | POLLRDBAND);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08002166 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
Eric Dumazet43815482010-04-29 11:01:49 +00002167 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168}
2169
2170static void sock_def_write_space(struct sock *sk)
2171{
Eric Dumazet43815482010-04-29 11:01:49 +00002172 struct socket_wq *wq;
2173
2174 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175
2176 /* Do not wake up a writer until he can make "significant"
2177 * progress. --DaveM
2178 */
Stephen Hemmingere71a4782007-04-10 20:10:33 -07002179 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
Eric Dumazet43815482010-04-29 11:01:49 +00002180 wq = rcu_dereference(sk->sk_wq);
2181 if (wq_has_sleeper(wq))
2182 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
Davide Libenzi37e55402009-03-31 15:24:21 -07002183 POLLWRNORM | POLLWRBAND);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184
2185 /* Should agree with poll, otherwise some programs break */
2186 if (sock_writeable(sk))
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08002187 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 }
2189
Eric Dumazet43815482010-04-29 11:01:49 +00002190 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191}
2192
2193static void sock_def_destruct(struct sock *sk)
2194{
Jesper Juhla51482b2005-11-08 09:41:34 -08002195 kfree(sk->sk_protinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196}
2197
2198void sk_send_sigurg(struct sock *sk)
2199{
2200 if (sk->sk_socket && sk->sk_socket->file)
2201 if (send_sigurg(&sk->sk_socket->file->f_owner))
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08002202 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203}
Eric Dumazet2a915252009-05-27 11:30:05 +00002204EXPORT_SYMBOL(sk_send_sigurg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205
2206void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2207 unsigned long expires)
2208{
2209 if (!mod_timer(timer, expires))
2210 sock_hold(sk);
2211}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212EXPORT_SYMBOL(sk_reset_timer);
2213
2214void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2215{
Ying Xue25cc4ae2013-02-03 20:32:57 +00002216 if (del_timer(timer))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 __sock_put(sk);
2218}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219EXPORT_SYMBOL(sk_stop_timer);
2220
2221void sock_init_data(struct socket *sock, struct sock *sk)
2222{
2223 skb_queue_head_init(&sk->sk_receive_queue);
2224 skb_queue_head_init(&sk->sk_write_queue);
2225 skb_queue_head_init(&sk->sk_error_queue);
Chris Leech97fc2f02006-05-23 17:55:33 -07002226#ifdef CONFIG_NET_DMA
2227 skb_queue_head_init(&sk->sk_async_wait_queue);
2228#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229
2230 sk->sk_send_head = NULL;
2231
2232 init_timer(&sk->sk_timer);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002233
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 sk->sk_allocation = GFP_KERNEL;
2235 sk->sk_rcvbuf = sysctl_rmem_default;
2236 sk->sk_sndbuf = sysctl_wmem_default;
2237 sk->sk_state = TCP_CLOSE;
David S. Miller972692e2008-06-17 22:41:38 -07002238 sk_set_socket(sk, sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239
2240 sock_set_flag(sk, SOCK_ZAPPED);
2241
Stephen Hemmingere71a4782007-04-10 20:10:33 -07002242 if (sock) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 sk->sk_type = sock->type;
Eric Dumazet43815482010-04-29 11:01:49 +00002244 sk->sk_wq = sock->wq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 sock->sk = sk;
2246 } else
Eric Dumazet43815482010-04-29 11:01:49 +00002247 sk->sk_wq = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248
Eric Dumazetb6c67122010-04-08 23:03:29 +00002249 spin_lock_init(&sk->sk_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002250 rwlock_init(&sk->sk_callback_lock);
Peter Zijlstra443aef02007-07-19 01:49:00 -07002251 lockdep_set_class_and_name(&sk->sk_callback_lock,
2252 af_callback_keys + sk->sk_family,
2253 af_family_clock_key_strings[sk->sk_family]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254
2255 sk->sk_state_change = sock_def_wakeup;
2256 sk->sk_data_ready = sock_def_readable;
2257 sk->sk_write_space = sock_def_write_space;
2258 sk->sk_error_report = sock_def_error_report;
2259 sk->sk_destruct = sock_def_destruct;
2260
Eric Dumazet5640f762012-09-23 23:04:42 +00002261 sk->sk_frag.page = NULL;
2262 sk->sk_frag.offset = 0;
Pavel Emelyanovef64a542012-02-21 07:31:34 +00002263 sk->sk_peek_off = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264
Eric W. Biederman109f6e32010-06-13 03:30:14 +00002265 sk->sk_peer_pid = NULL;
2266 sk->sk_peer_cred = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 sk->sk_write_pending = 0;
2268 sk->sk_rcvlowat = 1;
2269 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2270 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2271
Eric Dumazetf37f0af2008-04-13 21:39:26 -07002272 sk->sk_stamp = ktime_set(-1L, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273
Eric Dumazet4dc6dc72009-07-15 23:13:10 +00002274 /*
2275 * Before updating sk_refcnt, we must commit prior changes to memory
2276 * (Documentation/RCU/rculist_nulls.txt for details)
2277 */
2278 smp_wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 atomic_set(&sk->sk_refcnt, 1);
Wang Chen33c732c2007-11-13 20:30:01 -08002280 atomic_set(&sk->sk_drops, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281}
Eric Dumazet2a915252009-05-27 11:30:05 +00002282EXPORT_SYMBOL(sock_init_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002284void lock_sock_nested(struct sock *sk, int subclass)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285{
2286 might_sleep();
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002287 spin_lock_bh(&sk->sk_lock.slock);
John Heffnerd2e91172007-09-12 10:44:19 +02002288 if (sk->sk_lock.owned)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002289 __lock_sock(sk);
John Heffnerd2e91172007-09-12 10:44:19 +02002290 sk->sk_lock.owned = 1;
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002291 spin_unlock(&sk->sk_lock.slock);
2292 /*
2293 * The sk_lock has mutex_lock() semantics here:
2294 */
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08002295 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002296 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297}
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08002298EXPORT_SYMBOL(lock_sock_nested);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002300void release_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301{
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002302 /*
2303 * The sk_lock has mutex_unlock() semantics:
2304 */
2305 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2306
2307 spin_lock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308 if (sk->sk_backlog.tail)
2309 __release_sock(sk);
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002310
2311 if (sk->sk_prot->release_cb)
2312 sk->sk_prot->release_cb(sk);
2313
John Heffnerd2e91172007-09-12 10:44:19 +02002314 sk->sk_lock.owned = 0;
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002315 if (waitqueue_active(&sk->sk_lock.wq))
2316 wake_up(&sk->sk_lock.wq);
2317 spin_unlock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318}
2319EXPORT_SYMBOL(release_sock);
2320
Eric Dumazet8a74ad62010-05-26 19:20:18 +00002321/**
2322 * lock_sock_fast - fast version of lock_sock
2323 * @sk: socket
2324 *
2325 * This version should be used for very small section, where process wont block
2326 * return false if fast path is taken
2327 * sk_lock.slock locked, owned = 0, BH disabled
2328 * return true if slow path is taken
2329 * sk_lock.slock unlocked, owned = 1, BH enabled
2330 */
2331bool lock_sock_fast(struct sock *sk)
2332{
2333 might_sleep();
2334 spin_lock_bh(&sk->sk_lock.slock);
2335
2336 if (!sk->sk_lock.owned)
2337 /*
2338 * Note : We must disable BH
2339 */
2340 return false;
2341
2342 __lock_sock(sk);
2343 sk->sk_lock.owned = 1;
2344 spin_unlock(&sk->sk_lock.slock);
2345 /*
2346 * The sk_lock has mutex_lock() semantics here:
2347 */
2348 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2349 local_bh_enable();
2350 return true;
2351}
2352EXPORT_SYMBOL(lock_sock_fast);
2353
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002355{
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002356 struct timeval tv;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357 if (!sock_flag(sk, SOCK_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +00002358 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002359 tv = ktime_to_timeval(sk->sk_stamp);
2360 if (tv.tv_sec == -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 return -ENOENT;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002362 if (tv.tv_sec == 0) {
2363 sk->sk_stamp = ktime_get_real();
2364 tv = ktime_to_timeval(sk->sk_stamp);
2365 }
2366 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002367}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368EXPORT_SYMBOL(sock_get_timestamp);
2369
Eric Dumazetae40eb12007-03-18 17:33:16 -07002370int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2371{
2372 struct timespec ts;
2373 if (!sock_flag(sk, SOCK_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +00002374 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazetae40eb12007-03-18 17:33:16 -07002375 ts = ktime_to_timespec(sk->sk_stamp);
2376 if (ts.tv_sec == -1)
2377 return -ENOENT;
2378 if (ts.tv_sec == 0) {
2379 sk->sk_stamp = ktime_get_real();
2380 ts = ktime_to_timespec(sk->sk_stamp);
2381 }
2382 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2383}
2384EXPORT_SYMBOL(sock_get_timestampns);
2385
Patrick Ohly20d49472009-02-12 05:03:38 +00002386void sock_enable_timestamp(struct sock *sk, int flag)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002387{
Patrick Ohly20d49472009-02-12 05:03:38 +00002388 if (!sock_flag(sk, flag)) {
Eric Dumazet08e29af2011-11-28 12:04:18 +00002389 unsigned long previous_flags = sk->sk_flags;
2390
Patrick Ohly20d49472009-02-12 05:03:38 +00002391 sock_set_flag(sk, flag);
2392 /*
2393 * we just set one of the two flags which require net
2394 * time stamping, but time stamping might have been on
2395 * already because of the other one
2396 */
Eric Dumazet08e29af2011-11-28 12:04:18 +00002397 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +00002398 net_enable_timestamp();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 }
2400}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401
2402/*
2403 * Get a socket option on an socket.
2404 *
2405 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2406 * asynchronous errors should be reported by getsockopt. We assume
2407 * this means if you specify SO_ERROR (otherwise whats the point of it).
2408 */
2409int sock_common_getsockopt(struct socket *sock, int level, int optname,
2410 char __user *optval, int __user *optlen)
2411{
2412 struct sock *sk = sock->sk;
2413
2414 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2415}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416EXPORT_SYMBOL(sock_common_getsockopt);
2417
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002418#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002419int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2420 char __user *optval, int __user *optlen)
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002421{
2422 struct sock *sk = sock->sk;
2423
Johannes Berg1e51f952007-03-06 13:44:06 -08002424 if (sk->sk_prot->compat_getsockopt != NULL)
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002425 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2426 optval, optlen);
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002427 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2428}
2429EXPORT_SYMBOL(compat_sock_common_getsockopt);
2430#endif
2431
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2433 struct msghdr *msg, size_t size, int flags)
2434{
2435 struct sock *sk = sock->sk;
2436 int addr_len = 0;
2437 int err;
2438
2439 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2440 flags & ~MSG_DONTWAIT, &addr_len);
2441 if (err >= 0)
2442 msg->msg_namelen = addr_len;
2443 return err;
2444}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445EXPORT_SYMBOL(sock_common_recvmsg);
2446
2447/*
2448 * Set socket options on an inet socket.
2449 */
2450int sock_common_setsockopt(struct socket *sock, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -07002451 char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452{
2453 struct sock *sk = sock->sk;
2454
2455 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2456}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457EXPORT_SYMBOL(sock_common_setsockopt);
2458
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002459#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002460int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -07002461 char __user *optval, unsigned int optlen)
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002462{
2463 struct sock *sk = sock->sk;
2464
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002465 if (sk->sk_prot->compat_setsockopt != NULL)
2466 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2467 optval, optlen);
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002468 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2469}
2470EXPORT_SYMBOL(compat_sock_common_setsockopt);
2471#endif
2472
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473void sk_common_release(struct sock *sk)
2474{
2475 if (sk->sk_prot->destroy)
2476 sk->sk_prot->destroy(sk);
2477
2478 /*
2479 * Observation: when sock_common_release is called, processes have
2480 * no access to socket. But net still has.
2481 * Step one, detach it from networking:
2482 *
2483 * A. Remove from hash tables.
2484 */
2485
2486 sk->sk_prot->unhash(sk);
2487
2488 /*
2489 * In this point socket cannot receive new packets, but it is possible
2490 * that some packets are in flight because some CPU runs receiver and
2491 * did hash table lookup before we unhashed socket. They will achieve
2492 * receive queue and will be purged by socket destructor.
2493 *
2494 * Also we still have packets pending on receive queue and probably,
2495 * our own packets waiting in device queues. sock_destroy will drain
2496 * receive queue, but transmitted packets will delay socket destruction
2497 * until the last reference will be released.
2498 */
2499
2500 sock_orphan(sk);
2501
2502 xfrm_sk_free_policy(sk);
2503
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07002504 sk_refcnt_debug_release(sk);
Eric Dumazet5640f762012-09-23 23:04:42 +00002505
2506 if (sk->sk_frag.page) {
2507 put_page(sk->sk_frag.page);
2508 sk->sk_frag.page = NULL;
2509 }
2510
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 sock_put(sk);
2512}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513EXPORT_SYMBOL(sk_common_release);
2514
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002515#ifdef CONFIG_PROC_FS
2516#define PROTO_INUSE_NR 64 /* should be enough for the first time */
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002517struct prot_inuse {
2518 int val[PROTO_INUSE_NR];
2519};
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002520
2521static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002522
2523#ifdef CONFIG_NET_NS
2524void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2525{
Eric Dumazetd6d9ca02010-07-19 10:48:49 +00002526 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002527}
2528EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2529
2530int sock_prot_inuse_get(struct net *net, struct proto *prot)
2531{
2532 int cpu, idx = prot->inuse_idx;
2533 int res = 0;
2534
2535 for_each_possible_cpu(cpu)
2536 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2537
2538 return res >= 0 ? res : 0;
2539}
2540EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2541
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002542static int __net_init sock_inuse_init_net(struct net *net)
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002543{
2544 net->core.inuse = alloc_percpu(struct prot_inuse);
2545 return net->core.inuse ? 0 : -ENOMEM;
2546}
2547
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002548static void __net_exit sock_inuse_exit_net(struct net *net)
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002549{
2550 free_percpu(net->core.inuse);
2551}
2552
2553static struct pernet_operations net_inuse_ops = {
2554 .init = sock_inuse_init_net,
2555 .exit = sock_inuse_exit_net,
2556};
2557
2558static __init int net_inuse_init(void)
2559{
2560 if (register_pernet_subsys(&net_inuse_ops))
2561 panic("Cannot initialize net inuse counters");
2562
2563 return 0;
2564}
2565
2566core_initcall(net_inuse_init);
2567#else
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002568static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2569
Pavel Emelyanovc29a0bc2008-03-31 19:41:46 -07002570void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002571{
Eric Dumazetd6d9ca02010-07-19 10:48:49 +00002572 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002573}
2574EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2575
Pavel Emelyanovc29a0bc2008-03-31 19:41:46 -07002576int sock_prot_inuse_get(struct net *net, struct proto *prot)
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002577{
2578 int cpu, idx = prot->inuse_idx;
2579 int res = 0;
2580
2581 for_each_possible_cpu(cpu)
2582 res += per_cpu(prot_inuse, cpu).val[idx];
2583
2584 return res >= 0 ? res : 0;
2585}
2586EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002587#endif
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002588
2589static void assign_proto_idx(struct proto *prot)
2590{
2591 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2592
2593 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
Joe Perchese005d192012-05-16 19:58:40 +00002594 pr_err("PROTO_INUSE_NR exhausted\n");
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002595 return;
2596 }
2597
2598 set_bit(prot->inuse_idx, proto_inuse_idx);
2599}
2600
2601static void release_proto_idx(struct proto *prot)
2602{
2603 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2604 clear_bit(prot->inuse_idx, proto_inuse_idx);
2605}
2606#else
2607static inline void assign_proto_idx(struct proto *prot)
2608{
2609}
2610
2611static inline void release_proto_idx(struct proto *prot)
2612{
2613}
2614#endif
2615
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616int proto_register(struct proto *prot, int alloc_slab)
2617{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 if (alloc_slab) {
2619 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
Eric Dumazet271b72c2008-10-29 02:11:14 -07002620 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2621 NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622
2623 if (prot->slab == NULL) {
Joe Perchese005d192012-05-16 19:58:40 +00002624 pr_crit("%s: Can't create sock SLAB cache!\n",
2625 prot->name);
Pavel Emelyanov60e76632008-03-28 16:39:10 -07002626 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002628
2629 if (prot->rsk_prot != NULL) {
Alexey Dobriyanfaf23422010-02-17 09:34:12 +00002630 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002631 if (prot->rsk_prot->slab_name == NULL)
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002632 goto out_free_sock_slab;
2633
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002634 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002635 prot->rsk_prot->obj_size, 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002636 SLAB_HWCACHE_ALIGN, NULL);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002637
2638 if (prot->rsk_prot->slab == NULL) {
Joe Perchese005d192012-05-16 19:58:40 +00002639 pr_crit("%s: Can't create request sock SLAB cache!\n",
2640 prot->name);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002641 goto out_free_request_sock_slab_name;
2642 }
2643 }
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002644
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002645 if (prot->twsk_prot != NULL) {
Alexey Dobriyanfaf23422010-02-17 09:34:12 +00002646 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002647
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002648 if (prot->twsk_prot->twsk_slab_name == NULL)
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002649 goto out_free_request_sock_slab;
2650
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002651 prot->twsk_prot->twsk_slab =
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002652 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002653 prot->twsk_prot->twsk_obj_size,
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08002654 0,
2655 SLAB_HWCACHE_ALIGN |
2656 prot->slab_flags,
Paul Mundt20c2df82007-07-20 10:11:58 +09002657 NULL);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002658 if (prot->twsk_prot->twsk_slab == NULL)
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002659 goto out_free_timewait_sock_slab_name;
2660 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002661 }
2662
Glauber Costa36b77a52011-12-16 00:51:59 +00002663 mutex_lock(&proto_list_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664 list_add(&prot->node, &proto_list);
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002665 assign_proto_idx(prot);
Glauber Costa36b77a52011-12-16 00:51:59 +00002666 mutex_unlock(&proto_list_mutex);
Pavel Emelyanovb733c002007-11-07 02:23:38 -08002667 return 0;
2668
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002669out_free_timewait_sock_slab_name:
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002670 kfree(prot->twsk_prot->twsk_slab_name);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002671out_free_request_sock_slab:
2672 if (prot->rsk_prot && prot->rsk_prot->slab) {
2673 kmem_cache_destroy(prot->rsk_prot->slab);
2674 prot->rsk_prot->slab = NULL;
2675 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002676out_free_request_sock_slab_name:
Dan Carpenter72150e92010-03-06 01:04:45 +00002677 if (prot->rsk_prot)
2678 kfree(prot->rsk_prot->slab_name);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002679out_free_sock_slab:
2680 kmem_cache_destroy(prot->slab);
2681 prot->slab = NULL;
Pavel Emelyanovb733c002007-11-07 02:23:38 -08002682out:
2683 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002684}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685EXPORT_SYMBOL(proto_register);
2686
2687void proto_unregister(struct proto *prot)
2688{
Glauber Costa36b77a52011-12-16 00:51:59 +00002689 mutex_lock(&proto_list_mutex);
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002690 release_proto_idx(prot);
Patrick McHardy0a3f4352005-09-06 19:47:50 -07002691 list_del(&prot->node);
Glauber Costa36b77a52011-12-16 00:51:59 +00002692 mutex_unlock(&proto_list_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693
2694 if (prot->slab != NULL) {
2695 kmem_cache_destroy(prot->slab);
2696 prot->slab = NULL;
2697 }
2698
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002699 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002700 kmem_cache_destroy(prot->rsk_prot->slab);
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002701 kfree(prot->rsk_prot->slab_name);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002702 prot->rsk_prot->slab = NULL;
2703 }
2704
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002705 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002706 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08002707 kfree(prot->twsk_prot->twsk_slab_name);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08002708 prot->twsk_prot->twsk_slab = NULL;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07002709 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002710}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002711EXPORT_SYMBOL(proto_unregister);
2712
2713#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
Glauber Costa36b77a52011-12-16 00:51:59 +00002715 __acquires(proto_list_mutex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716{
Glauber Costa36b77a52011-12-16 00:51:59 +00002717 mutex_lock(&proto_list_mutex);
Pavel Emelianov60f04382007-07-09 13:15:14 -07002718 return seq_list_start_head(&proto_list, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002719}
2720
2721static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2722{
Pavel Emelianov60f04382007-07-09 13:15:14 -07002723 return seq_list_next(v, &proto_list, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724}
2725
2726static void proto_seq_stop(struct seq_file *seq, void *v)
Glauber Costa36b77a52011-12-16 00:51:59 +00002727 __releases(proto_list_mutex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728{
Glauber Costa36b77a52011-12-16 00:51:59 +00002729 mutex_unlock(&proto_list_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002730}
2731
2732static char proto_method_implemented(const void *method)
2733{
2734 return method == NULL ? 'n' : 'y';
2735}
Glauber Costa180d8cd2011-12-11 21:47:02 +00002736static long sock_prot_memory_allocated(struct proto *proto)
2737{
Jeffrin Josecb75a362012-04-25 19:17:29 +05302738 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
Glauber Costa180d8cd2011-12-11 21:47:02 +00002739}
2740
2741static char *sock_prot_memory_pressure(struct proto *proto)
2742{
2743 return proto->memory_pressure != NULL ?
2744 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2745}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002746
2747static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2748{
Glauber Costa180d8cd2011-12-11 21:47:02 +00002749
Eric Dumazet8d987e52010-11-09 23:24:26 +00002750 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
Linus Torvalds1da177e2005-04-16 15:20:36 -07002751 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2752 proto->name,
2753 proto->obj_size,
Eric Dumazet14e943d2008-11-19 15:14:01 -08002754 sock_prot_inuse_get(seq_file_net(seq), proto),
Glauber Costa180d8cd2011-12-11 21:47:02 +00002755 sock_prot_memory_allocated(proto),
2756 sock_prot_memory_pressure(proto),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757 proto->max_header,
2758 proto->slab == NULL ? "no" : "yes",
2759 module_name(proto->owner),
2760 proto_method_implemented(proto->close),
2761 proto_method_implemented(proto->connect),
2762 proto_method_implemented(proto->disconnect),
2763 proto_method_implemented(proto->accept),
2764 proto_method_implemented(proto->ioctl),
2765 proto_method_implemented(proto->init),
2766 proto_method_implemented(proto->destroy),
2767 proto_method_implemented(proto->shutdown),
2768 proto_method_implemented(proto->setsockopt),
2769 proto_method_implemented(proto->getsockopt),
2770 proto_method_implemented(proto->sendmsg),
2771 proto_method_implemented(proto->recvmsg),
2772 proto_method_implemented(proto->sendpage),
2773 proto_method_implemented(proto->bind),
2774 proto_method_implemented(proto->backlog_rcv),
2775 proto_method_implemented(proto->hash),
2776 proto_method_implemented(proto->unhash),
2777 proto_method_implemented(proto->get_port),
2778 proto_method_implemented(proto->enter_memory_pressure));
2779}
2780
2781static int proto_seq_show(struct seq_file *seq, void *v)
2782{
Pavel Emelianov60f04382007-07-09 13:15:14 -07002783 if (v == &proto_list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2785 "protocol",
2786 "size",
2787 "sockets",
2788 "memory",
2789 "press",
2790 "maxhdr",
2791 "slab",
2792 "module",
2793 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2794 else
Pavel Emelianov60f04382007-07-09 13:15:14 -07002795 proto_seq_printf(seq, list_entry(v, struct proto, node));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002796 return 0;
2797}
2798
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002799static const struct seq_operations proto_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002800 .start = proto_seq_start,
2801 .next = proto_seq_next,
2802 .stop = proto_seq_stop,
2803 .show = proto_seq_show,
2804};
2805
2806static int proto_seq_open(struct inode *inode, struct file *file)
2807{
Eric Dumazet14e943d2008-11-19 15:14:01 -08002808 return seq_open_net(inode, file, &proto_seq_ops,
2809 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002810}
2811
Arjan van de Ven9a321442007-02-12 00:55:35 -08002812static const struct file_operations proto_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002813 .owner = THIS_MODULE,
2814 .open = proto_seq_open,
2815 .read = seq_read,
2816 .llseek = seq_lseek,
Eric Dumazet14e943d2008-11-19 15:14:01 -08002817 .release = seq_release_net,
2818};
2819
2820static __net_init int proto_init_net(struct net *net)
2821{
Gao fengd4beaa62013-02-18 01:34:54 +00002822 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
Eric Dumazet14e943d2008-11-19 15:14:01 -08002823 return -ENOMEM;
2824
2825 return 0;
2826}
2827
2828static __net_exit void proto_exit_net(struct net *net)
2829{
Gao fengece31ff2013-02-18 01:34:56 +00002830 remove_proc_entry("protocols", net->proc_net);
Eric Dumazet14e943d2008-11-19 15:14:01 -08002831}
2832
2833
2834static __net_initdata struct pernet_operations proto_net_ops = {
2835 .init = proto_init_net,
2836 .exit = proto_exit_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002837};
2838
2839static int __init proto_init(void)
2840{
Eric Dumazet14e943d2008-11-19 15:14:01 -08002841 return register_pernet_subsys(&proto_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002842}
2843
2844subsys_initcall(proto_init);
2845
2846#endif /* PROC_FS */