blob: 768aedf238f5b4dd4ca395e1320e9ca491233add [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Generic socket support routines. Memory allocators, socket lock/release
7 * handler for protocols to use and generic option handler.
8 *
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +090035 * code. The ACK stuff can wait and needs major
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 *
85 *
86 * This program is free software; you can redistribute it and/or
87 * modify it under the terms of the GNU General Public License
88 * as published by the Free Software Foundation; either version
89 * 2 of the License, or (at your option) any later version.
90 */
91
Joe Perchese005d192012-05-16 19:58:40 +000092#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
Randy Dunlap4fc268d2006-01-11 12:17:47 -080094#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070095#include <linux/errno.h>
Richard Cochrancb820f82013-07-19 19:40:09 +020096#include <linux/errqueue.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <linux/types.h>
98#include <linux/socket.h>
99#include <linux/in.h>
100#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101#include <linux/module.h>
102#include <linux/proc_fs.h>
103#include <linux/seq_file.h>
104#include <linux/sched.h>
105#include <linux/timer.h>
106#include <linux/string.h>
107#include <linux/sockios.h>
108#include <linux/net.h>
109#include <linux/mm.h>
110#include <linux/slab.h>
111#include <linux/interrupt.h>
112#include <linux/poll.h>
113#include <linux/tcp.h>
114#include <linux/init.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -0400115#include <linux/highmem.h>
Eric W. Biederman3f551f92010-06-13 03:28:59 +0000116#include <linux/user_namespace.h>
Ingo Molnarc5905af2012-02-24 08:31:31 +0100117#include <linux/static_key.h>
David S. Miller3969eb32012-01-09 13:44:23 -0800118#include <linux/memcontrol.h>
David S. Miller8c1ae102012-05-03 02:25:55 -0400119#include <linux/prefetch.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -0800121#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
123#include <linux/netdevice.h>
124#include <net/protocol.h>
125#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200126#include <net/net_namespace.h>
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -0700127#include <net/request_sock.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128#include <net/sock.h>
Patrick Ohly20d49472009-02-12 05:03:38 +0000129#include <linux/net_tstamp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130#include <net/xfrm.h>
131#include <linux/ipsec.h>
Herbert Xuf8451722010-05-24 00:12:34 -0700132#include <net/cls_cgroup.h>
Neil Horman5bc14212011-11-22 05:10:51 +0000133#include <net/netprio_cgroup.h>
Craig Gallekeb4cb002015-06-15 11:26:18 -0400134#include <linux/sock_diag.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
136#include <linux/filter.h>
Craig Gallek538950a2016-01-04 17:41:47 -0500137#include <net/sock_reuseport.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Satoru Moriya3847ce32011-06-17 12:00:03 +0000139#include <trace/events/sock.h>
140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141#ifdef CONFIG_INET
142#include <net/tcp.h>
143#endif
144
Eliezer Tamir076bb0c2013-07-10 17:13:17 +0300145#include <net/busy_poll.h>
Eliezer Tamir06021292013-06-10 11:39:50 +0300146
Glauber Costa36b77a52011-12-16 00:51:59 +0000147static DEFINE_MUTEX(proto_list_mutex);
Glauber Costad1a4c0b2011-12-11 21:47:04 +0000148static LIST_HEAD(proto_list);
149
Eric W. Biedermana3b299d2014-04-23 14:26:56 -0700150/**
151 * sk_ns_capable - General socket capability test
152 * @sk: Socket to use a capability on or through
153 * @user_ns: The user namespace of the capability to use
154 * @cap: The capability to use
155 *
156 * Test to see if the opener of the socket had when the socket was
157 * created and the current process has the capability @cap in the user
158 * namespace @user_ns.
159 */
160bool sk_ns_capable(const struct sock *sk,
161 struct user_namespace *user_ns, int cap)
162{
163 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
164 ns_capable(user_ns, cap);
165}
166EXPORT_SYMBOL(sk_ns_capable);
167
168/**
169 * sk_capable - Socket global capability test
170 * @sk: Socket to use a capability on or through
Masanari Iidae793c0f2014-09-04 23:44:36 +0900171 * @cap: The global capability to use
Eric W. Biedermana3b299d2014-04-23 14:26:56 -0700172 *
173 * Test to see if the opener of the socket had when the socket was
174 * created and the current process has the capability @cap in all user
175 * namespaces.
176 */
177bool sk_capable(const struct sock *sk, int cap)
178{
179 return sk_ns_capable(sk, &init_user_ns, cap);
180}
181EXPORT_SYMBOL(sk_capable);
182
183/**
184 * sk_net_capable - Network namespace socket capability test
185 * @sk: Socket to use a capability on or through
186 * @cap: The capability to use
187 *
Masanari Iidae793c0f2014-09-04 23:44:36 +0900188 * Test to see if the opener of the socket had when the socket was created
Eric W. Biedermana3b299d2014-04-23 14:26:56 -0700189 * and the current process has the capability @cap over the network namespace
190 * the socket is a member of.
191 */
192bool sk_net_capable(const struct sock *sk, int cap)
193{
194 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
195}
196EXPORT_SYMBOL(sk_net_capable);
197
Ingo Molnarda21f242006-07-03 00:25:12 -0700198/*
199 * Each address family might have different locking rules, so we have
200 * one slock key per address family:
201 */
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700202static struct lock_class_key af_family_keys[AF_MAX];
203static struct lock_class_key af_family_slock_keys[AF_MAX];
204
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700205/*
206 * Make lock validator output more readable. (we pre-construct these
207 * strings build-time, so that runtime initialization of socket
208 * locks is fast):
209 */
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700210static const char *const af_family_key_strings[AF_MAX+1] = {
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700211 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
212 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
213 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
214 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
215 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
216 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
217 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800218 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700219 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
Oliver Hartkoppcd05acf2007-12-16 15:59:24 -0800220 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
David Howells17926a72007-04-26 15:48:28 -0700221 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700222 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
Miloslav Trmač6f107b52010-12-08 14:35:34 +0800223 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
Dexuan Cui0a1a37b2016-04-05 07:41:11 -0700224 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
David S. Miller02ac5d12017-01-11 14:43:39 -0500225 "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC" , "sk_lock-AF_MAX"
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700226};
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700227static const char *const af_family_slock_key_strings[AF_MAX+1] = {
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700228 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
229 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
230 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
231 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
232 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
233 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
234 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800235 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700236 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
Oliver Hartkoppcd05acf2007-12-16 15:59:24 -0800237 "slock-27" , "slock-28" , "slock-AF_CAN" ,
David Howells17926a72007-04-26 15:48:28 -0700238 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700239 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
Miloslav Trmač6f107b52010-12-08 14:35:34 +0800240 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
Dexuan Cui0a1a37b2016-04-05 07:41:11 -0700241 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
David S. Miller02ac5d12017-01-11 14:43:39 -0500242 "slock-AF_QIPCRTR", "slock-AF_SMC" , "slock-AF_MAX"
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700243};
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700244static const char *const af_family_clock_key_strings[AF_MAX+1] = {
Peter Zijlstra443aef0e2007-07-19 01:49:00 -0700245 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
246 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
247 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
248 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
249 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
250 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
251 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
Andy Grovercbd151b2009-02-26 23:43:19 -0800252 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
Peter Zijlstra443aef0e2007-07-19 01:49:00 -0700253 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
Oliver Hartkoppb4942af2008-07-23 14:06:04 -0700254 "clock-27" , "clock-28" , "clock-AF_CAN" ,
David Howellse51f8022007-07-21 19:30:16 -0700255 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
Remi Denis-Courmontbce7b152008-09-22 19:51:15 -0700256 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
Miloslav Trmač6f107b52010-12-08 14:35:34 +0800257 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
Dexuan Cui0a1a37b2016-04-05 07:41:11 -0700258 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
Ursula Braun526735d2017-01-12 14:57:14 +0100259 "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX"
Peter Zijlstra443aef0e2007-07-19 01:49:00 -0700260};
Paolo Abeni581319c2017-03-09 13:54:08 +0100261static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
262 "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
263 "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
264 "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
265 "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
266 "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
267 "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
268 "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
269 "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
270 "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
271 "rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
272 "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
273 "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
274 "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
275 "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
276 "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX"
277};
278static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
279 "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
280 "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
281 "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
282 "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
283 "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
284 "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
285 "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
286 "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
287 "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
288 "wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
289 "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
290 "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
291 "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
292 "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
293 "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX"
294};
295static const char *const af_family_elock_key_strings[AF_MAX+1] = {
296 "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
297 "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
298 "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
299 "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
300 "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
301 "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
302 "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
303 "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
304 "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
305 "elock-27" , "elock-28" , "elock-AF_CAN" ,
306 "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
307 "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
308 "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
309 "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
310 "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX"
311};
Ingo Molnarda21f242006-07-03 00:25:12 -0700312
313/*
Paolo Abeni581319c2017-03-09 13:54:08 +0100314 * sk_callback_lock and sk queues locking rules are per-address-family,
Ingo Molnarda21f242006-07-03 00:25:12 -0700315 * so split the lock classes by using a per-AF key:
316 */
317static struct lock_class_key af_callback_keys[AF_MAX];
Paolo Abeni581319c2017-03-09 13:54:08 +0100318static struct lock_class_key af_rlock_keys[AF_MAX];
319static struct lock_class_key af_wlock_keys[AF_MAX];
320static struct lock_class_key af_elock_keys[AF_MAX];
Ingo Molnarda21f242006-07-03 00:25:12 -0700321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/* Take into consideration the size of the struct sk_buff overhead in the
323 * determination of these values, since that is non-constant across
324 * platforms. This makes socket queueing behavior and performance
325 * not depend upon such differences.
326 */
327#define _SK_MEM_PACKETS 256
Eric Dumazet87fb4b72011-10-13 07:28:54 +0000328#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
330#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
331
332/* Run time adjustable parameters. */
Brian Haleyab32ea52006-09-22 14:15:41 -0700333__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
Hans Schillstrom6d8ebc82012-04-30 08:13:50 +0200334EXPORT_SYMBOL(sysctl_wmem_max);
Brian Haleyab32ea52006-09-22 14:15:41 -0700335__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
Hans Schillstrom6d8ebc82012-04-30 08:13:50 +0200336EXPORT_SYMBOL(sysctl_rmem_max);
Brian Haleyab32ea52006-09-22 14:15:41 -0700337__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
338__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300340/* Maximal space eaten by iovec or ancillary data plus some space */
Brian Haleyab32ea52006-09-22 14:15:41 -0700341int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
Eric Dumazet2a915252009-05-27 11:30:05 +0000342EXPORT_SYMBOL(sysctl_optmem_max);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343
Willem de Bruijnb245be12015-01-30 13:29:32 -0500344int sysctl_tstamp_allow_data __read_mostly = 1;
345
Mel Gormanc93bdd02012-07-31 16:44:19 -0700346struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
347EXPORT_SYMBOL_GPL(memalloc_socks);
348
Mel Gorman7cb02402012-07-31 16:44:16 -0700349/**
350 * sk_set_memalloc - sets %SOCK_MEMALLOC
351 * @sk: socket to set it on
352 *
353 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
354 * It's the responsibility of the admin to adjust min_free_kbytes
355 * to meet the requirements
356 */
357void sk_set_memalloc(struct sock *sk)
358{
359 sock_set_flag(sk, SOCK_MEMALLOC);
360 sk->sk_allocation |= __GFP_MEMALLOC;
Mel Gormanc93bdd02012-07-31 16:44:19 -0700361 static_key_slow_inc(&memalloc_socks);
Mel Gorman7cb02402012-07-31 16:44:16 -0700362}
363EXPORT_SYMBOL_GPL(sk_set_memalloc);
364
365void sk_clear_memalloc(struct sock *sk)
366{
367 sock_reset_flag(sk, SOCK_MEMALLOC);
368 sk->sk_allocation &= ~__GFP_MEMALLOC;
Mel Gormanc93bdd02012-07-31 16:44:19 -0700369 static_key_slow_dec(&memalloc_socks);
Mel Gormanc76562b2012-07-31 16:44:41 -0700370
371 /*
372 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
Mel Gorman5d753612015-06-10 21:02:04 -0400373 * progress of swapping. SOCK_MEMALLOC may be cleared while
374 * it has rmem allocations due to the last swapfile being deactivated
375 * but there is a risk that the socket is unusable due to exceeding
376 * the rmem limits. Reclaim the reserves and obey rmem limits again.
Mel Gormanc76562b2012-07-31 16:44:41 -0700377 */
Mel Gorman5d753612015-06-10 21:02:04 -0400378 sk_mem_reclaim(sk);
Mel Gorman7cb02402012-07-31 16:44:16 -0700379}
380EXPORT_SYMBOL_GPL(sk_clear_memalloc);
381
Mel Gormanb4b9e352012-07-31 16:44:26 -0700382int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
383{
384 int ret;
385 unsigned long pflags = current->flags;
386
387 /* these should have been dropped before queueing */
388 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
389
390 current->flags |= PF_MEMALLOC;
391 ret = sk->sk_backlog_rcv(sk, skb);
392 tsk_restore_flags(current, pflags, PF_MEMALLOC);
393
394 return ret;
395}
396EXPORT_SYMBOL(__sk_backlog_rcv);
397
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
399{
400 struct timeval tv;
401
402 if (optlen < sizeof(tv))
403 return -EINVAL;
404 if (copy_from_user(&tv, optval, sizeof(tv)))
405 return -EFAULT;
Vasily Averinba780732007-05-24 16:58:54 -0700406 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
407 return -EDOM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408
Vasily Averinba780732007-05-24 16:58:54 -0700409 if (tv.tv_sec < 0) {
Andrew Morton6f11df82007-07-09 13:16:00 -0700410 static int warned __read_mostly;
411
Vasily Averinba780732007-05-24 16:58:54 -0700412 *timeo_p = 0;
Ilpo Järvinen50aab542008-05-02 16:20:10 -0700413 if (warned < 10 && net_ratelimit()) {
Vasily Averinba780732007-05-24 16:58:54 -0700414 warned++;
Joe Perchese005d192012-05-16 19:58:40 +0000415 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
416 __func__, current->comm, task_pid_nr(current));
Ilpo Järvinen50aab542008-05-02 16:20:10 -0700417 }
Vasily Averinba780732007-05-24 16:58:54 -0700418 return 0;
419 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 *timeo_p = MAX_SCHEDULE_TIMEOUT;
421 if (tv.tv_sec == 0 && tv.tv_usec == 0)
422 return 0;
423 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
Gao Feng8ccde4c2017-02-21 17:09:19 +0800424 *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 return 0;
426}
427
428static void sock_warn_obsolete_bsdism(const char *name)
429{
430 static int warned;
431 static char warncomm[TASK_COMM_LEN];
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900432 if (strcmp(warncomm, current->comm) && warned < 5) {
433 strcpy(warncomm, current->comm);
Joe Perchese005d192012-05-16 19:58:40 +0000434 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
435 warncomm, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 warned++;
437 }
438}
439
Hannes Frederic Sowa080a2702015-10-26 13:51:37 +0100440static bool sock_needs_netstamp(const struct sock *sk)
441{
442 switch (sk->sk_family) {
443 case AF_UNSPEC:
444 case AF_UNIX:
445 return false;
446 default:
447 return true;
448 }
449}
450
Eric Dumazet08e29af2011-11-28 12:04:18 +0000451static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900452{
Eric Dumazet08e29af2011-11-28 12:04:18 +0000453 if (sk->sk_flags & flags) {
454 sk->sk_flags &= ~flags;
Hannes Frederic Sowa080a2702015-10-26 13:51:37 +0100455 if (sock_needs_netstamp(sk) &&
456 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +0000457 net_disable_timestamp();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 }
459}
460
461
samanthakumare6afc8a2016-04-05 12:41:15 -0400462int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800463{
Neil Horman3b885782009-10-12 13:26:31 -0700464 unsigned long flags;
465 struct sk_buff_head *list = &sk->sk_receive_queue;
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800466
Eric Dumazet0fd7bac2011-12-21 07:11:44 +0000467 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
Eric Dumazet766e90372009-10-14 20:40:11 -0700468 atomic_inc(&sk->sk_drops);
Satoru Moriya3847ce32011-06-17 12:00:03 +0000469 trace_sock_rcvqueue_full(sk, skb);
Eric Dumazet766e90372009-10-14 20:40:11 -0700470 return -ENOMEM;
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800471 }
472
Mel Gormanc76562b2012-07-31 16:44:41 -0700473 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
Eric Dumazet766e90372009-10-14 20:40:11 -0700474 atomic_inc(&sk->sk_drops);
475 return -ENOBUFS;
Hideo Aoki3ab224b2007-12-31 00:11:19 -0800476 }
477
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800478 skb->dev = NULL;
479 skb_set_owner_r(skb, sk);
David S. Miller49ad9592008-12-17 22:11:38 -0800480
Eric Dumazet7fee2262010-05-11 23:19:48 +0000481 /* we escape from rcu protected region, make sure we dont leak
482 * a norefcounted dst
483 */
484 skb_dst_force(skb);
485
Neil Horman3b885782009-10-12 13:26:31 -0700486 spin_lock_irqsave(&list->lock, flags);
Eyal Birger3bc3b962015-03-01 14:58:30 +0200487 sock_skb_set_dropcount(sk, skb);
Neil Horman3b885782009-10-12 13:26:31 -0700488 __skb_queue_tail(list, skb);
489 spin_unlock_irqrestore(&list->lock, flags);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800490
491 if (!sock_flag(sk, SOCK_DEAD))
David S. Miller676d2362014-04-11 16:15:36 -0400492 sk->sk_data_ready(sk);
Eric Dumazet766e90372009-10-14 20:40:11 -0700493 return 0;
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800494}
samanthakumare6afc8a2016-04-05 12:41:15 -0400495EXPORT_SYMBOL(__sock_queue_rcv_skb);
496
497int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
498{
499 int err;
500
501 err = sk_filter(sk, skb);
502 if (err)
503 return err;
504
505 return __sock_queue_rcv_skb(sk, skb);
506}
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800507EXPORT_SYMBOL(sock_queue_rcv_skb);
508
Willem de Bruijn4f0c40d92016-07-12 18:18:57 -0400509int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
Eric Dumazetc3f24cf2016-11-02 17:14:41 -0700510 const int nested, unsigned int trim_cap, bool refcounted)
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800511{
512 int rc = NET_RX_SUCCESS;
513
Willem de Bruijn4f0c40d92016-07-12 18:18:57 -0400514 if (sk_filter_trim_cap(sk, skb, trim_cap))
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800515 goto discard_and_relse;
516
517 skb->dev = NULL;
518
Sorin Dumitru274f4822014-07-22 21:16:51 +0300519 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
Eric Dumazetc3774112010-04-27 15:13:20 -0700520 atomic_inc(&sk->sk_drops);
521 goto discard_and_relse;
522 }
Arnaldo Carvalho de Melo58a5a7b2006-11-16 14:06:06 -0200523 if (nested)
524 bh_lock_sock_nested(sk);
525 else
526 bh_lock_sock(sk);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700527 if (!sock_owned_by_user(sk)) {
528 /*
529 * trylock + unlock semantics:
530 */
531 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
532
Peter Zijlstrac57943a2008-10-07 14:18:42 -0700533 rc = sk_backlog_rcv(sk, skb);
Ingo Molnara5b5bb92006-07-03 00:25:35 -0700534
535 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
Eric Dumazetf545a382012-04-22 23:34:26 +0000536 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
Zhu Yi8eae9392010-03-04 18:01:40 +0000537 bh_unlock_sock(sk);
538 atomic_inc(&sk->sk_drops);
539 goto discard_and_relse;
540 }
541
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800542 bh_unlock_sock(sk);
543out:
Eric Dumazetc3f24cf2016-11-02 17:14:41 -0700544 if (refcounted)
545 sock_put(sk);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800546 return rc;
547discard_and_relse:
548 kfree_skb(skb);
549 goto out;
550}
Willem de Bruijn4f0c40d92016-07-12 18:18:57 -0400551EXPORT_SYMBOL(__sk_receive_skb);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800552
553struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
554{
Eric Dumazetb6c67122010-04-08 23:03:29 +0000555 struct dst_entry *dst = __sk_dst_get(sk);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800556
557 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
Krishna Kumare022f0b2009-10-19 23:46:20 +0000558 sk_tx_queue_clear(sk);
Julian Anastasov9b8805a2017-02-06 23:14:11 +0200559 sk->sk_dst_pending_confirm = 0;
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +0000560 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
Denis Vlasenkof0088a52006-03-28 01:08:21 -0800561 dst_release(dst);
562 return NULL;
563 }
564
565 return dst;
566}
567EXPORT_SYMBOL(__sk_dst_check);
568
569struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
570{
571 struct dst_entry *dst = sk_dst_get(sk);
572
573 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
574 sk_dst_reset(sk);
575 dst_release(dst);
576 return NULL;
577 }
578
579 return dst;
580}
581EXPORT_SYMBOL(sk_dst_check);
582
Brian Haleyc91f6df2012-11-26 05:21:08 +0000583static int sock_setbindtodevice(struct sock *sk, char __user *optval,
584 int optlen)
David S. Miller48788092007-09-14 16:41:03 -0700585{
586 int ret = -ENOPROTOOPT;
587#ifdef CONFIG_NETDEVICES
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900588 struct net *net = sock_net(sk);
David S. Miller48788092007-09-14 16:41:03 -0700589 char devname[IFNAMSIZ];
590 int index;
591
592 /* Sorry... */
593 ret = -EPERM;
Eric W. Biederman5e1fccc2012-11-16 03:03:04 +0000594 if (!ns_capable(net->user_ns, CAP_NET_RAW))
David S. Miller48788092007-09-14 16:41:03 -0700595 goto out;
596
597 ret = -EINVAL;
598 if (optlen < 0)
599 goto out;
600
601 /* Bind this socket to a particular device like "eth0",
602 * as specified in the passed interface name. If the
603 * name is "" or the option length is zero the socket
604 * is not bound.
605 */
606 if (optlen > IFNAMSIZ - 1)
607 optlen = IFNAMSIZ - 1;
608 memset(devname, 0, sizeof(devname));
609
610 ret = -EFAULT;
611 if (copy_from_user(devname, optval, optlen))
612 goto out;
613
David S. Miller000ba2e2009-11-05 22:37:11 -0800614 index = 0;
615 if (devname[0] != '\0') {
Eric Dumazetbf8e56b2009-11-05 21:03:39 -0800616 struct net_device *dev;
David S. Miller48788092007-09-14 16:41:03 -0700617
Eric Dumazetbf8e56b2009-11-05 21:03:39 -0800618 rcu_read_lock();
619 dev = dev_get_by_name_rcu(net, devname);
620 if (dev)
621 index = dev->ifindex;
622 rcu_read_unlock();
David S. Miller48788092007-09-14 16:41:03 -0700623 ret = -ENODEV;
624 if (!dev)
625 goto out;
David S. Miller48788092007-09-14 16:41:03 -0700626 }
627
628 lock_sock(sk);
629 sk->sk_bound_dev_if = index;
630 sk_dst_reset(sk);
631 release_sock(sk);
632
633 ret = 0;
634
635out:
636#endif
637
638 return ret;
639}
640
Brian Haleyc91f6df2012-11-26 05:21:08 +0000641static int sock_getbindtodevice(struct sock *sk, char __user *optval,
642 int __user *optlen, int len)
643{
644 int ret = -ENOPROTOOPT;
645#ifdef CONFIG_NETDEVICES
646 struct net *net = sock_net(sk);
Brian Haleyc91f6df2012-11-26 05:21:08 +0000647 char devname[IFNAMSIZ];
Brian Haleyc91f6df2012-11-26 05:21:08 +0000648
649 if (sk->sk_bound_dev_if == 0) {
650 len = 0;
651 goto zero;
652 }
653
654 ret = -EINVAL;
655 if (len < IFNAMSIZ)
656 goto out;
657
Nicolas Schichan5dbe7c12013-06-26 17:23:42 +0200658 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
659 if (ret)
Brian Haleyc91f6df2012-11-26 05:21:08 +0000660 goto out;
Brian Haleyc91f6df2012-11-26 05:21:08 +0000661
662 len = strlen(devname) + 1;
663
664 ret = -EFAULT;
665 if (copy_to_user(optval, devname, len))
666 goto out;
667
668zero:
669 ret = -EFAULT;
670 if (put_user(len, optlen))
671 goto out;
672
673 ret = 0;
674
675out:
676#endif
677
678 return ret;
679}
680
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800681static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
682{
683 if (valbool)
684 sock_set_flag(sk, bit);
685 else
686 sock_reset_flag(sk, bit);
687}
688
hannes@stressinduktion.orgf60e5992015-04-01 17:07:44 +0200689bool sk_mc_loop(struct sock *sk)
690{
691 if (dev_recursion_level())
692 return false;
693 if (!sk)
694 return true;
695 switch (sk->sk_family) {
696 case AF_INET:
697 return inet_sk(sk)->mc_loop;
698#if IS_ENABLED(CONFIG_IPV6)
699 case AF_INET6:
700 return inet6_sk(sk)->mc_loop;
701#endif
702 }
703 WARN_ON(1);
704 return true;
705}
706EXPORT_SYMBOL(sk_mc_loop);
707
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708/*
709 * This is meant for all protocols to use and covers goings on
710 * at the socket level. Everything here is generic.
711 */
712
713int sock_setsockopt(struct socket *sock, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -0700714 char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715{
Eric Dumazet2a915252009-05-27 11:30:05 +0000716 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 int val;
718 int valbool;
719 struct linger ling;
720 int ret = 0;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900721
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 /*
723 * Options without arguments
724 */
725
David S. Miller48788092007-09-14 16:41:03 -0700726 if (optname == SO_BINDTODEVICE)
Brian Haleyc91f6df2012-11-26 05:21:08 +0000727 return sock_setbindtodevice(sk, optval, optlen);
David S. Miller48788092007-09-14 16:41:03 -0700728
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700729 if (optlen < sizeof(int))
730 return -EINVAL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900731
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 if (get_user(val, (int __user *)optval))
733 return -EFAULT;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900734
Eric Dumazet2a915252009-05-27 11:30:05 +0000735 valbool = val ? 1 : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
737 lock_sock(sk);
738
Eric Dumazet2a915252009-05-27 11:30:05 +0000739 switch (optname) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700740 case SO_DEBUG:
Eric Dumazet2a915252009-05-27 11:30:05 +0000741 if (val && !capable(CAP_NET_ADMIN))
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700742 ret = -EACCES;
Eric Dumazet2a915252009-05-27 11:30:05 +0000743 else
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800744 sock_valbool_flag(sk, SOCK_DBG, valbool);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700745 break;
746 case SO_REUSEADDR:
Pavel Emelyanov4a17fd52012-04-19 03:39:36 +0000747 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700748 break;
Tom Herbert055dc212013-01-22 09:49:50 +0000749 case SO_REUSEPORT:
750 sk->sk_reuseport = valbool;
751 break;
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700752 case SO_TYPE:
Jan Engelhardt49c794e2009-08-04 07:28:28 +0000753 case SO_PROTOCOL:
Jan Engelhardt0d6038e2009-08-04 07:28:29 +0000754 case SO_DOMAIN:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700755 case SO_ERROR:
756 ret = -ENOPROTOOPT;
757 break;
758 case SO_DONTROUTE:
Pavel Emelyanovc0ef8772007-11-15 03:03:19 -0800759 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700760 break;
761 case SO_BROADCAST:
762 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
763 break;
764 case SO_SNDBUF:
765 /* Don't error on this BSD doesn't and if you think
Eric Dumazet82981932012-04-26 20:07:59 +0000766 * about it this is right. Otherwise apps have to
767 * play 'guess the biggest size' games. RCVBUF/SNDBUF
768 * are treated in BSD as hints
769 */
770 val = min_t(u32, val, sysctl_wmem_max);
Patrick McHardyb0573de2005-08-09 19:30:51 -0700771set_sndbuf:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700772 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
Eric Dumazetb98b0bc2016-12-02 09:44:53 -0800773 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
Eric Dumazet82981932012-04-26 20:07:59 +0000774 /* Wake up sending tasks if we upped the value. */
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700775 sk->sk_write_space(sk);
776 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700778 case SO_SNDBUFFORCE:
779 if (!capable(CAP_NET_ADMIN)) {
780 ret = -EPERM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 break;
782 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700783 goto set_sndbuf;
784
785 case SO_RCVBUF:
786 /* Don't error on this BSD doesn't and if you think
Eric Dumazet82981932012-04-26 20:07:59 +0000787 * about it this is right. Otherwise apps have to
788 * play 'guess the biggest size' games. RCVBUF/SNDBUF
789 * are treated in BSD as hints
790 */
791 val = min_t(u32, val, sysctl_rmem_max);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700792set_rcvbuf:
793 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
794 /*
795 * We double it on the way in to account for
796 * "struct sk_buff" etc. overhead. Applications
797 * assume that the SO_RCVBUF setting they make will
798 * allow that much actual data to be received on that
799 * socket.
800 *
801 * Applications are unaware that "struct sk_buff" and
802 * other overheads allocate from the receive buffer
803 * during socket buffer allocation.
804 *
805 * And after considering the possible alternatives,
806 * returning the value we actually used in getsockopt
807 * is the most desirable behavior.
808 */
Eric Dumazetb98b0bc2016-12-02 09:44:53 -0800809 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700810 break;
811
812 case SO_RCVBUFFORCE:
813 if (!capable(CAP_NET_ADMIN)) {
814 ret = -EPERM;
815 break;
816 }
817 goto set_rcvbuf;
818
819 case SO_KEEPALIVE:
Ursula Braun4b9d07a2017-01-09 16:55:12 +0100820 if (sk->sk_prot->keepalive)
821 sk->sk_prot->keepalive(sk, valbool);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700822 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
823 break;
824
825 case SO_OOBINLINE:
826 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
827 break;
828
829 case SO_NO_CHECK:
Tom Herbert28448b82014-05-23 08:47:19 -0700830 sk->sk_no_check_tx = valbool;
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700831 break;
832
833 case SO_PRIORITY:
Eric W. Biederman5e1fccc2012-11-16 03:03:04 +0000834 if ((val >= 0 && val <= 6) ||
835 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700836 sk->sk_priority = val;
837 else
838 ret = -EPERM;
839 break;
840
841 case SO_LINGER:
842 if (optlen < sizeof(ling)) {
843 ret = -EINVAL; /* 1003.1g */
844 break;
845 }
Eric Dumazet2a915252009-05-27 11:30:05 +0000846 if (copy_from_user(&ling, optval, sizeof(ling))) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700847 ret = -EFAULT;
848 break;
849 }
850 if (!ling.l_onoff)
851 sock_reset_flag(sk, SOCK_LINGER);
852 else {
853#if (BITS_PER_LONG == 32)
854 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
855 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
856 else
857#endif
858 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
859 sock_set_flag(sk, SOCK_LINGER);
860 }
861 break;
862
863 case SO_BSDCOMPAT:
864 sock_warn_obsolete_bsdism("setsockopt");
865 break;
866
867 case SO_PASSCRED:
868 if (valbool)
869 set_bit(SOCK_PASSCRED, &sock->flags);
870 else
871 clear_bit(SOCK_PASSCRED, &sock->flags);
872 break;
873
874 case SO_TIMESTAMP:
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700875 case SO_TIMESTAMPNS:
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700876 if (valbool) {
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700877 if (optname == SO_TIMESTAMP)
878 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
879 else
880 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700881 sock_set_flag(sk, SOCK_RCVTSTAMP);
Patrick Ohly20d49472009-02-12 05:03:38 +0000882 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700883 } else {
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700884 sock_reset_flag(sk, SOCK_RCVTSTAMP);
Eric Dumazet92f37fd2007-03-25 22:14:49 -0700885 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
886 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700887 break;
888
Patrick Ohly20d49472009-02-12 05:03:38 +0000889 case SO_TIMESTAMPING:
890 if (val & ~SOF_TIMESTAMPING_MASK) {
Rémi Denis-Courmontf249fb72009-07-20 00:47:04 +0000891 ret = -EINVAL;
Patrick Ohly20d49472009-02-12 05:03:38 +0000892 break;
893 }
Willem de Bruijnb245be12015-01-30 13:29:32 -0500894
Willem de Bruijn09c2d252014-08-04 22:11:47 -0400895 if (val & SOF_TIMESTAMPING_OPT_ID &&
Willem de Bruijn4ed2d762014-08-04 22:11:49 -0400896 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
WANG Congac5cc972015-12-16 23:39:04 -0800897 if (sk->sk_protocol == IPPROTO_TCP &&
898 sk->sk_type == SOCK_STREAM) {
Soheil Hassas Yeganeh6db8b962016-04-02 23:08:07 -0400899 if ((1 << sk->sk_state) &
900 (TCPF_CLOSE | TCPF_LISTEN)) {
Willem de Bruijn4ed2d762014-08-04 22:11:49 -0400901 ret = -EINVAL;
902 break;
903 }
904 sk->sk_tskey = tcp_sk(sk)->snd_una;
905 } else {
906 sk->sk_tskey = 0;
907 }
908 }
Francis Yan1c885802016-11-27 23:07:18 -0800909
910 if (val & SOF_TIMESTAMPING_OPT_STATS &&
911 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
912 ret = -EINVAL;
913 break;
914 }
915
Willem de Bruijnb9f40e22014-08-04 22:11:46 -0400916 sk->sk_tsflags = val;
Patrick Ohly20d49472009-02-12 05:03:38 +0000917 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
918 sock_enable_timestamp(sk,
919 SOCK_TIMESTAMPING_RX_SOFTWARE);
920 else
921 sock_disable_timestamp(sk,
Eric Dumazet08e29af2011-11-28 12:04:18 +0000922 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
Patrick Ohly20d49472009-02-12 05:03:38 +0000923 break;
924
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700925 case SO_RCVLOWAT:
926 if (val < 0)
927 val = INT_MAX;
928 sk->sk_rcvlowat = val ? : 1;
929 break;
930
931 case SO_RCVTIMEO:
932 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
933 break;
934
935 case SO_SNDTIMEO:
936 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
937 break;
938
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700939 case SO_ATTACH_FILTER:
940 ret = -EINVAL;
941 if (optlen == sizeof(struct sock_fprog)) {
942 struct sock_fprog fprog;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700944 ret = -EFAULT;
945 if (copy_from_user(&fprog, optval, sizeof(fprog)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700948 ret = sk_attach_filter(&fprog, sk);
949 }
950 break;
951
Alexei Starovoitov89aa0752014-12-01 15:06:35 -0800952 case SO_ATTACH_BPF:
953 ret = -EINVAL;
954 if (optlen == sizeof(u32)) {
955 u32 ufd;
956
957 ret = -EFAULT;
958 if (copy_from_user(&ufd, optval, sizeof(ufd)))
959 break;
960
961 ret = sk_attach_bpf(ufd, sk);
962 }
963 break;
964
Craig Gallek538950a2016-01-04 17:41:47 -0500965 case SO_ATTACH_REUSEPORT_CBPF:
966 ret = -EINVAL;
967 if (optlen == sizeof(struct sock_fprog)) {
968 struct sock_fprog fprog;
969
970 ret = -EFAULT;
971 if (copy_from_user(&fprog, optval, sizeof(fprog)))
972 break;
973
974 ret = sk_reuseport_attach_filter(&fprog, sk);
975 }
976 break;
977
978 case SO_ATTACH_REUSEPORT_EBPF:
979 ret = -EINVAL;
980 if (optlen == sizeof(u32)) {
981 u32 ufd;
982
983 ret = -EFAULT;
984 if (copy_from_user(&ufd, optval, sizeof(ufd)))
985 break;
986
987 ret = sk_reuseport_attach_bpf(ufd, sk);
988 }
989 break;
990
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700991 case SO_DETACH_FILTER:
Pavel Emelyanov55b33322007-10-17 21:21:26 -0700992 ret = sk_detach_filter(sk);
Stephen Hemmingere71a4782007-04-10 20:10:33 -0700993 break;
994
Vincent Bernatd59577b2013-01-16 22:55:49 +0100995 case SO_LOCK_FILTER:
996 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
997 ret = -EPERM;
998 else
999 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1000 break;
1001
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001002 case SO_PASSSEC:
1003 if (valbool)
1004 set_bit(SOCK_PASSSEC, &sock->flags);
1005 else
1006 clear_bit(SOCK_PASSSEC, &sock->flags);
1007 break;
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -08001008 case SO_MARK:
Eric W. Biederman5e1fccc2012-11-16 03:03:04 +00001009 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -08001010 ret = -EPERM;
Eric Dumazet2a915252009-05-27 11:30:05 +00001011 else
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -08001012 sk->sk_mark = val;
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -08001013 break;
Catherine Zhang877ce7c2006-06-29 12:27:47 -07001014
Neil Horman3b885782009-10-12 13:26:31 -07001015 case SO_RXQ_OVFL:
Johannes Berg8083f0f2011-10-07 03:30:20 +00001016 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
Neil Horman3b885782009-10-12 13:26:31 -07001017 break;
Johannes Berg6e3e9392011-11-09 10:15:42 +01001018
1019 case SO_WIFI_STATUS:
1020 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1021 break;
1022
Pavel Emelyanovef64a542012-02-21 07:31:34 +00001023 case SO_PEEK_OFF:
1024 if (sock->ops->set_peek_off)
Sasha Levin12663bf2013-12-07 17:26:27 -05001025 ret = sock->ops->set_peek_off(sk, val);
Pavel Emelyanovef64a542012-02-21 07:31:34 +00001026 else
1027 ret = -EOPNOTSUPP;
1028 break;
Ben Greear3bdc0eb2012-02-11 15:39:30 +00001029
1030 case SO_NOFCS:
1031 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1032 break;
1033
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +00001034 case SO_SELECT_ERR_QUEUE:
1035 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1036 break;
1037
Cong Wange0d10952013-08-01 11:10:25 +08001038#ifdef CONFIG_NET_RX_BUSY_POLL
Eliezer Tamir64b0dc52013-07-10 17:13:36 +03001039 case SO_BUSY_POLL:
Eliezer Tamirdafcc432013-06-14 16:33:57 +03001040 /* allow unprivileged users to decrease the value */
1041 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1042 ret = -EPERM;
1043 else {
1044 if (val < 0)
1045 ret = -EINVAL;
1046 else
1047 sk->sk_ll_usec = val;
1048 }
1049 break;
1050#endif
Eric Dumazet62748f32013-09-24 08:20:52 -07001051
1052 case SO_MAX_PACING_RATE:
1053 sk->sk_max_pacing_rate = val;
1054 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1055 sk->sk_max_pacing_rate);
1056 break;
1057
Eric Dumazet70da2682015-10-08 19:33:21 -07001058 case SO_INCOMING_CPU:
1059 sk->sk_incoming_cpu = val;
1060 break;
1061
Tom Herberta87cb3e2016-02-24 10:02:52 -08001062 case SO_CNX_ADVICE:
1063 if (val == 1)
1064 dst_negative_advice(sk);
1065 break;
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001066 default:
1067 ret = -ENOPROTOOPT;
1068 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001069 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 release_sock(sk);
1071 return ret;
1072}
Eric Dumazet2a915252009-05-27 11:30:05 +00001073EXPORT_SYMBOL(sock_setsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074
1075
stephen hemminger8f098982014-01-03 09:17:14 -08001076static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1077 struct ucred *ucred)
Eric W. Biederman3f551f92010-06-13 03:28:59 +00001078{
1079 ucred->pid = pid_vnr(pid);
1080 ucred->uid = ucred->gid = -1;
1081 if (cred) {
1082 struct user_namespace *current_ns = current_user_ns();
1083
Eric W. Biedermanb2e4f542012-05-23 16:39:45 -06001084 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1085 ucred->gid = from_kgid_munged(current_ns, cred->egid);
Eric W. Biederman3f551f92010-06-13 03:28:59 +00001086 }
1087}
1088
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089int sock_getsockopt(struct socket *sock, int level, int optname,
1090 char __user *optval, int __user *optlen)
1091{
1092 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001093
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001094 union {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001095 int val;
1096 struct linger ling;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 struct timeval tm;
1098 } v;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001099
H Hartley Sweeten4d0392b2010-01-15 01:08:58 -08001100 int lv = sizeof(int);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 int len;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001102
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001103 if (get_user(len, optlen))
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001104 return -EFAULT;
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001105 if (len < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 return -EINVAL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001107
Eugene Teo50fee1d2009-02-23 15:38:41 -08001108 memset(&v, 0, sizeof(v));
Clément Lecignedf0bca02009-02-12 16:59:09 -08001109
Eric Dumazet2a915252009-05-27 11:30:05 +00001110 switch (optname) {
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001111 case SO_DEBUG:
1112 v.val = sock_flag(sk, SOCK_DBG);
1113 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001114
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001115 case SO_DONTROUTE:
1116 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1117 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001118
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001119 case SO_BROADCAST:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001120 v.val = sock_flag(sk, SOCK_BROADCAST);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001121 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001123 case SO_SNDBUF:
1124 v.val = sk->sk_sndbuf;
1125 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001126
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001127 case SO_RCVBUF:
1128 v.val = sk->sk_rcvbuf;
1129 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001131 case SO_REUSEADDR:
1132 v.val = sk->sk_reuse;
1133 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134
Tom Herbert055dc212013-01-22 09:49:50 +00001135 case SO_REUSEPORT:
1136 v.val = sk->sk_reuseport;
1137 break;
1138
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001139 case SO_KEEPALIVE:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001140 v.val = sock_flag(sk, SOCK_KEEPOPEN);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001141 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001143 case SO_TYPE:
1144 v.val = sk->sk_type;
1145 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146
Jan Engelhardt49c794e2009-08-04 07:28:28 +00001147 case SO_PROTOCOL:
1148 v.val = sk->sk_protocol;
1149 break;
1150
Jan Engelhardt0d6038e2009-08-04 07:28:29 +00001151 case SO_DOMAIN:
1152 v.val = sk->sk_family;
1153 break;
1154
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001155 case SO_ERROR:
1156 v.val = -sock_error(sk);
Eric Dumazet2a915252009-05-27 11:30:05 +00001157 if (v.val == 0)
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001158 v.val = xchg(&sk->sk_err_soft, 0);
1159 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001161 case SO_OOBINLINE:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001162 v.val = sock_flag(sk, SOCK_URGINLINE);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001163 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001164
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001165 case SO_NO_CHECK:
Tom Herbert28448b82014-05-23 08:47:19 -07001166 v.val = sk->sk_no_check_tx;
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001167 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001169 case SO_PRIORITY:
1170 v.val = sk->sk_priority;
1171 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001172
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001173 case SO_LINGER:
1174 lv = sizeof(v.ling);
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001175 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001176 v.ling.l_linger = sk->sk_lingertime / HZ;
1177 break;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001178
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001179 case SO_BSDCOMPAT:
1180 sock_warn_obsolete_bsdism("getsockopt");
1181 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001183 case SO_TIMESTAMP:
Eric Dumazet92f37fd2007-03-25 22:14:49 -07001184 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1185 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1186 break;
1187
1188 case SO_TIMESTAMPNS:
1189 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001190 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191
Patrick Ohly20d49472009-02-12 05:03:38 +00001192 case SO_TIMESTAMPING:
Willem de Bruijnb9f40e22014-08-04 22:11:46 -04001193 v.val = sk->sk_tsflags;
Patrick Ohly20d49472009-02-12 05:03:38 +00001194 break;
1195
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001196 case SO_RCVTIMEO:
Eric Dumazet2a915252009-05-27 11:30:05 +00001197 lv = sizeof(struct timeval);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001198 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1199 v.tm.tv_sec = 0;
1200 v.tm.tv_usec = 0;
1201 } else {
1202 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
Gao Feng8ccde4c2017-02-21 17:09:19 +08001203 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001205 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001207 case SO_SNDTIMEO:
Eric Dumazet2a915252009-05-27 11:30:05 +00001208 lv = sizeof(struct timeval);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001209 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1210 v.tm.tv_sec = 0;
1211 v.tm.tv_usec = 0;
1212 } else {
1213 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
Gao Feng8ccde4c2017-02-21 17:09:19 +08001214 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001215 }
1216 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001218 case SO_RCVLOWAT:
1219 v.val = sk->sk_rcvlowat;
1220 break;
Catherine Zhang877ce7c2006-06-29 12:27:47 -07001221
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001222 case SO_SNDLOWAT:
Eric Dumazet2a915252009-05-27 11:30:05 +00001223 v.val = 1;
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001224 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001226 case SO_PASSCRED:
Eric Dumazet82981932012-04-26 20:07:59 +00001227 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001228 break;
1229
1230 case SO_PEERCRED:
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001231 {
1232 struct ucred peercred;
1233 if (len > sizeof(peercred))
1234 len = sizeof(peercred);
1235 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1236 if (copy_to_user(optval, &peercred, len))
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001237 return -EFAULT;
1238 goto lenout;
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001239 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001240
1241 case SO_PEERNAME:
1242 {
1243 char address[128];
1244
1245 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1246 return -ENOTCONN;
1247 if (lv < len)
1248 return -EINVAL;
1249 if (copy_to_user(optval, address, len))
1250 return -EFAULT;
1251 goto lenout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 }
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001253
1254 /* Dubious BSD thing... Probably nobody even uses it, but
1255 * the UNIX standard wants it for whatever reason... -DaveM
1256 */
1257 case SO_ACCEPTCONN:
1258 v.val = sk->sk_state == TCP_LISTEN;
1259 break;
1260
1261 case SO_PASSSEC:
Eric Dumazet82981932012-04-26 20:07:59 +00001262 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001263 break;
1264
1265 case SO_PEERSEC:
1266 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1267
Laszlo Attila Toth4a19ec52008-01-30 19:08:16 -08001268 case SO_MARK:
1269 v.val = sk->sk_mark;
1270 break;
1271
Neil Horman3b885782009-10-12 13:26:31 -07001272 case SO_RXQ_OVFL:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001273 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
Neil Horman3b885782009-10-12 13:26:31 -07001274 break;
1275
Johannes Berg6e3e9392011-11-09 10:15:42 +01001276 case SO_WIFI_STATUS:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001277 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
Johannes Berg6e3e9392011-11-09 10:15:42 +01001278 break;
1279
Pavel Emelyanovef64a542012-02-21 07:31:34 +00001280 case SO_PEEK_OFF:
1281 if (!sock->ops->set_peek_off)
1282 return -EOPNOTSUPP;
1283
1284 v.val = sk->sk_peek_off;
1285 break;
David S. Millerbc2f7992012-02-24 14:48:34 -05001286 case SO_NOFCS:
Eric Dumazet1b23a5d2012-05-16 05:57:07 +00001287 v.val = sock_flag(sk, SOCK_NOFCS);
David S. Millerbc2f7992012-02-24 14:48:34 -05001288 break;
Brian Haleyc91f6df2012-11-26 05:21:08 +00001289
Pavel Emelyanovf7b86bf2012-10-18 23:55:56 +00001290 case SO_BINDTODEVICE:
Brian Haleyc91f6df2012-11-26 05:21:08 +00001291 return sock_getbindtodevice(sk, optval, optlen, len);
1292
Pavel Emelyanova8fc9272012-11-01 02:01:48 +00001293 case SO_GET_FILTER:
1294 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1295 if (len < 0)
1296 return len;
1297
1298 goto lenout;
Brian Haleyc91f6df2012-11-26 05:21:08 +00001299
Vincent Bernatd59577b2013-01-16 22:55:49 +01001300 case SO_LOCK_FILTER:
1301 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1302 break;
1303
Michal Sekletarea02f942014-01-17 17:09:45 +01001304 case SO_BPF_EXTENSIONS:
1305 v.val = bpf_tell_extensions();
1306 break;
1307
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +00001308 case SO_SELECT_ERR_QUEUE:
1309 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1310 break;
1311
Cong Wange0d10952013-08-01 11:10:25 +08001312#ifdef CONFIG_NET_RX_BUSY_POLL
Eliezer Tamir64b0dc52013-07-10 17:13:36 +03001313 case SO_BUSY_POLL:
Eliezer Tamirdafcc432013-06-14 16:33:57 +03001314 v.val = sk->sk_ll_usec;
1315 break;
1316#endif
1317
Eric Dumazet62748f32013-09-24 08:20:52 -07001318 case SO_MAX_PACING_RATE:
1319 v.val = sk->sk_max_pacing_rate;
1320 break;
1321
Eric Dumazet2c8c56e2014-11-11 05:54:28 -08001322 case SO_INCOMING_CPU:
1323 v.val = sk->sk_incoming_cpu;
1324 break;
1325
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001326 default:
YOSHIFUJI Hideaki/吉藤英明443b5992015-03-23 18:04:13 +09001327 /* We implement the SO_SNDLOWAT etc to not be settable
1328 * (1003.1g 7).
1329 */
Stephen Hemmingere71a4782007-04-10 20:10:33 -07001330 return -ENOPROTOOPT;
1331 }
1332
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 if (len > lv)
1334 len = lv;
1335 if (copy_to_user(optval, &v, len))
1336 return -EFAULT;
1337lenout:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001338 if (put_user(len, optlen))
1339 return -EFAULT;
1340 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341}
1342
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001343/*
1344 * Initialize an sk_lock.
1345 *
1346 * (We also register the sk_lock with the lock validator.)
1347 */
Dave Jonesb6f99a22007-03-22 12:27:49 -07001348static inline void sock_lock_init(struct sock *sk)
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001349{
Peter Zijlstraed075362006-12-06 20:35:24 -08001350 sock_lock_init_class_and_name(sk,
1351 af_family_slock_key_strings[sk->sk_family],
1352 af_family_slock_keys + sk->sk_family,
1353 af_family_key_strings[sk->sk_family],
1354 af_family_keys + sk->sk_family);
Ingo Molnara5b5bb92006-07-03 00:25:35 -07001355}
1356
Eric Dumazet4dc6dc72009-07-15 23:13:10 +00001357/*
1358 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1359 * even temporarly, because of RCU lookups. sk_node should also be left as is.
Eric Dumazet68835ab2010-11-30 19:04:07 +00001360 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
Eric Dumazet4dc6dc72009-07-15 23:13:10 +00001361 */
Pavel Emelyanovf1a6c4d2007-11-01 00:29:45 -07001362static void sock_copy(struct sock *nsk, const struct sock *osk)
1363{
1364#ifdef CONFIG_SECURITY_NETWORK
1365 void *sptr = nsk->sk_security;
1366#endif
Eric Dumazet68835ab2010-11-30 19:04:07 +00001367 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1368
1369 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1370 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1371
Pavel Emelyanovf1a6c4d2007-11-01 00:29:45 -07001372#ifdef CONFIG_SECURITY_NETWORK
1373 nsk->sk_security = sptr;
1374 security_sk_clone(osk, nsk);
1375#endif
1376}
1377
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001378static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1379 int family)
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001380{
1381 struct sock *sk;
1382 struct kmem_cache *slab;
1383
1384 slab = prot->slab;
Eric Dumazete912b112009-07-08 19:36:05 +00001385 if (slab != NULL) {
1386 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1387 if (!sk)
1388 return sk;
Eric Dumazetba2489b2016-08-23 11:39:29 -07001389 if (priority & __GFP_ZERO)
1390 sk_prot_clear_nulls(sk, prot->obj_size);
Octavian Purdilafcbdf092010-12-16 14:26:56 -08001391 } else
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001392 sk = kmalloc(prot->obj_size, priority);
1393
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001394 if (sk != NULL) {
Vegard Nossuma98b65a2009-02-26 14:46:57 +01001395 kmemcheck_annotate_bitfield(sk, flags);
1396
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001397 if (security_sk_alloc(sk, family, priority))
1398 goto out_free;
1399
1400 if (!try_module_get(prot->owner))
1401 goto out_free_sec;
Krishna Kumare022f0b2009-10-19 23:46:20 +00001402 sk_tx_queue_clear(sk);
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001403 }
1404
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001405 return sk;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001406
1407out_free_sec:
1408 security_sk_free(sk);
1409out_free:
1410 if (slab != NULL)
1411 kmem_cache_free(slab, sk);
1412 else
1413 kfree(sk);
1414 return NULL;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001415}
1416
1417static void sk_prot_free(struct proto *prot, struct sock *sk)
1418{
1419 struct kmem_cache *slab;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001420 struct module *owner;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001421
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001422 owner = prot->owner;
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001423 slab = prot->slab;
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001424
Tejun Heobd1060a2015-12-07 17:38:53 -05001425 cgroup_sk_free(&sk->sk_cgrp_data);
Johannes Weiner2d758072016-10-07 17:00:58 -07001426 mem_cgroup_sk_free(sk);
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001427 security_sk_free(sk);
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001428 if (slab != NULL)
1429 kmem_cache_free(slab, sk);
1430 else
1431 kfree(sk);
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001432 module_put(owner);
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001433}
1434
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435/**
1436 * sk_alloc - All socket objects are allocated here
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001437 * @net: the applicable net namespace
Pavel Pisa4dc3b162005-05-01 08:59:25 -07001438 * @family: protocol family
1439 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1440 * @prot: struct proto associated with this new sock instance
Eric W. Biederman11aa9c22015-05-08 21:09:13 -05001441 * @kern: is this to be a kernel socket?
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 */
Eric W. Biederman1b8d7ae2007-10-08 23:24:22 -07001443struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
Eric W. Biederman11aa9c22015-05-08 21:09:13 -05001444 struct proto *prot, int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445{
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001446 struct sock *sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447
Pavel Emelyanov154adbc2007-11-01 00:38:43 -07001448 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 if (sk) {
Pavel Emelyanov154adbc2007-11-01 00:38:43 -07001450 sk->sk_family = family;
1451 /*
1452 * See comment in struct sock definition to understand
1453 * why we need sk_prot_creator -acme
1454 */
1455 sk->sk_prot = sk->sk_prot_creator = prot;
1456 sock_lock_init(sk);
Eric W. Biederman26abe142015-05-08 21:10:31 -05001457 sk->sk_net_refcnt = kern ? 0 : 1;
1458 if (likely(sk->sk_net_refcnt))
1459 get_net(net);
1460 sock_net_set(sk, net);
Jarek Poplawskid66ee052009-08-30 23:15:36 +00001461 atomic_set(&sk->sk_wmem_alloc, 1);
Herbert Xuf8451722010-05-24 00:12:34 -07001462
Johannes Weiner2d758072016-10-07 17:00:58 -07001463 mem_cgroup_sk_alloc(sk);
Johannes Weinerd979a392016-09-19 14:44:38 -07001464 cgroup_sk_alloc(&sk->sk_cgrp_data);
Tejun Heo2a56a1f2015-12-07 17:38:52 -05001465 sock_update_classid(&sk->sk_cgrp_data);
1466 sock_update_netprioidx(&sk->sk_cgrp_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 }
Frank Filza79af592005-09-27 15:23:38 -07001468
Pavel Emelyanov2e4afe72007-11-01 00:36:26 -07001469 return sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470}
Eric Dumazet2a915252009-05-27 11:30:05 +00001471EXPORT_SYMBOL(sk_alloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472
Eric Dumazeta4298e42016-04-01 08:52:12 -07001473/* Sockets having SOCK_RCU_FREE will call this function after one RCU
1474 * grace period. This is the case for UDP sockets and TCP listeners.
1475 */
1476static void __sk_destruct(struct rcu_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477{
Eric Dumazeta4298e42016-04-01 08:52:12 -07001478 struct sock *sk = container_of(head, struct sock, sk_rcu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 struct sk_filter *filter;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480
1481 if (sk->sk_destruct)
1482 sk->sk_destruct(sk);
1483
Paul E. McKenneya898def2010-02-22 17:04:49 -08001484 filter = rcu_dereference_check(sk->sk_filter,
1485 atomic_read(&sk->sk_wmem_alloc) == 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 if (filter) {
Pavel Emelyanov309dd5f2007-10-17 21:21:51 -07001487 sk_filter_uncharge(sk, filter);
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00001488 RCU_INIT_POINTER(sk->sk_filter, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 }
Craig Gallek538950a2016-01-04 17:41:47 -05001490 if (rcu_access_pointer(sk->sk_reuseport_cb))
1491 reuseport_detach_sock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492
Eric Dumazet08e29af2011-11-28 12:04:18 +00001493 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494
1495 if (atomic_read(&sk->sk_omem_alloc))
Joe Perchese005d192012-05-16 19:58:40 +00001496 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1497 __func__, atomic_read(&sk->sk_omem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001499 if (sk->sk_peer_cred)
1500 put_cred(sk->sk_peer_cred);
1501 put_pid(sk->sk_peer_pid);
Eric W. Biederman26abe142015-05-08 21:10:31 -05001502 if (likely(sk->sk_net_refcnt))
1503 put_net(sock_net(sk));
Pavel Emelyanovc308c1b22007-11-01 00:33:50 -07001504 sk_prot_free(sk->sk_prot_creator, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505}
Eric Dumazet2b85a342009-06-11 02:55:43 -07001506
Eric Dumazeta4298e42016-04-01 08:52:12 -07001507void sk_destruct(struct sock *sk)
1508{
1509 if (sock_flag(sk, SOCK_RCU_FREE))
1510 call_rcu(&sk->sk_rcu, __sk_destruct);
1511 else
1512 __sk_destruct(&sk->sk_rcu);
1513}
1514
Craig Gallekeb4cb002015-06-15 11:26:18 -04001515static void __sk_free(struct sock *sk)
1516{
Craig Gallekb9226222015-06-30 12:49:32 -04001517 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
Craig Gallekeb4cb002015-06-15 11:26:18 -04001518 sock_diag_broadcast_destroy(sk);
1519 else
1520 sk_destruct(sk);
1521}
1522
Eric Dumazet2b85a342009-06-11 02:55:43 -07001523void sk_free(struct sock *sk)
1524{
1525 /*
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001526 * We subtract one from sk_wmem_alloc and can know if
Eric Dumazet2b85a342009-06-11 02:55:43 -07001527 * some packets are still in some tx queue.
1528 * If not null, sock_wfree() will call __sk_free(sk) later
1529 */
1530 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1531 __sk_free(sk);
1532}
Eric Dumazet2a915252009-05-27 11:30:05 +00001533EXPORT_SYMBOL(sk_free);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534
Paolo Abeni581319c2017-03-09 13:54:08 +01001535static void sk_init_common(struct sock *sk)
1536{
1537 skb_queue_head_init(&sk->sk_receive_queue);
1538 skb_queue_head_init(&sk->sk_write_queue);
1539 skb_queue_head_init(&sk->sk_error_queue);
1540
1541 rwlock_init(&sk->sk_callback_lock);
1542 lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1543 af_rlock_keys + sk->sk_family,
1544 af_family_rlock_key_strings[sk->sk_family]);
1545 lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1546 af_wlock_keys + sk->sk_family,
1547 af_family_wlock_key_strings[sk->sk_family]);
1548 lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1549 af_elock_keys + sk->sk_family,
1550 af_family_elock_key_strings[sk->sk_family]);
1551 lockdep_set_class_and_name(&sk->sk_callback_lock,
1552 af_callback_keys + sk->sk_family,
1553 af_family_clock_key_strings[sk->sk_family]);
1554}
1555
Eric Dumazete56c57d2011-11-08 17:07:07 -05001556/**
1557 * sk_clone_lock - clone a socket, and lock its clone
1558 * @sk: the socket to clone
1559 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1560 *
1561 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1562 */
1563struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001564{
Pavel Emelyanov8fd1d172007-11-01 00:37:32 -07001565 struct sock *newsk;
Alexei Starovoitov278571b2014-07-30 20:34:12 -07001566 bool is_charged = true;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001567
Pavel Emelyanov8fd1d172007-11-01 00:37:32 -07001568 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001569 if (newsk != NULL) {
1570 struct sk_filter *filter;
1571
Venkat Yekkirala892c1412006-08-04 23:08:56 -07001572 sock_copy(newsk, sk);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001573
1574 /* SANITY */
Sowmini Varadhan8a681732015-07-30 15:50:36 +02001575 if (likely(newsk->sk_net_refcnt))
1576 get_net(sock_net(newsk));
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001577 sk_node_init(&newsk->sk_node);
1578 sock_lock_init(newsk);
1579 bh_lock_sock(newsk);
Eric Dumazetfa438cc2007-03-04 16:05:44 -08001580 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
Zhu Yi8eae9392010-03-04 18:01:40 +00001581 newsk->sk_backlog.len = 0;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001582
1583 atomic_set(&newsk->sk_rmem_alloc, 0);
Eric Dumazet2b85a342009-06-11 02:55:43 -07001584 /*
1585 * sk_wmem_alloc set to one (see sk_free() and sock_wfree())
1586 */
1587 atomic_set(&newsk->sk_wmem_alloc, 1);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001588 atomic_set(&newsk->sk_omem_alloc, 0);
Paolo Abeni581319c2017-03-09 13:54:08 +01001589 sk_init_common(newsk);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001590
1591 newsk->sk_dst_cache = NULL;
Julian Anastasov9b8805a2017-02-06 23:14:11 +02001592 newsk->sk_dst_pending_confirm = 0;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001593 newsk->sk_wmem_queued = 0;
1594 newsk->sk_forward_alloc = 0;
Eric Dumazet9caad862016-04-01 08:52:20 -07001595 atomic_set(&newsk->sk_drops, 0);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001596 newsk->sk_send_head = NULL;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001597 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1598
1599 sock_reset_flag(newsk, SOCK_DONE);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001600
Eric Dumazet0d7da9d2010-10-25 03:47:05 +00001601 filter = rcu_dereference_protected(newsk->sk_filter, 1);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001602 if (filter != NULL)
Alexei Starovoitov278571b2014-07-30 20:34:12 -07001603 /* though it's an empty new sock, the charging may fail
1604 * if sysctl_optmem_max was changed between creation of
1605 * original socket and cloning
1606 */
1607 is_charged = sk_filter_charge(newsk, filter);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001608
Eric Dumazetd188ba82015-12-08 07:22:02 -08001609 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
Arnaldo Carvalho de Melo94352d42017-03-01 16:35:08 -03001610 sk_free_unlock_clone(newsk);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001611 newsk = NULL;
1612 goto out;
1613 }
Craig Gallekfa463492016-02-10 11:50:39 -05001614 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001615
1616 newsk->sk_err = 0;
Eric Dumazete551c322016-10-28 13:40:24 -07001617 newsk->sk_err_soft = 0;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001618 newsk->sk_priority = 0;
Eric Dumazet2c8c56e2014-11-11 05:54:28 -08001619 newsk->sk_incoming_cpu = raw_smp_processor_id();
Eric Dumazet33cf7c92015-03-11 18:53:14 -07001620 atomic64_set(&newsk->sk_cookie, 0);
Johannes Weinerd979a392016-09-19 14:44:38 -07001621
Johannes Weiner2d758072016-10-07 17:00:58 -07001622 mem_cgroup_sk_alloc(newsk);
Johannes Weinerd979a392016-09-19 14:44:38 -07001623 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1624
Eric Dumazet4dc6dc72009-07-15 23:13:10 +00001625 /*
1626 * Before updating sk_refcnt, we must commit prior changes to memory
1627 * (Documentation/RCU/rculist_nulls.txt for details)
1628 */
1629 smp_wmb();
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001630 atomic_set(&newsk->sk_refcnt, 2);
1631
1632 /*
1633 * Increment the counter in the same struct proto as the master
1634 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1635 * is the same as sk->sk_prot->socks, as this field was copied
1636 * with memcpy).
1637 *
1638 * This _changes_ the previous behaviour, where
1639 * tcp_create_openreq_child always was incrementing the
1640 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1641 * to be taken into account in all callers. -acme
1642 */
1643 sk_refcnt_debug_inc(newsk);
David S. Miller972692e2008-06-17 22:41:38 -07001644 sk_set_socket(newsk, NULL);
Eric Dumazet43815482010-04-29 11:01:49 +00001645 newsk->sk_wq = NULL;
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001646
1647 if (newsk->sk_prot->sockets_allocated)
Glauber Costa180d8cd2011-12-11 21:47:02 +00001648 sk_sockets_allocated_inc(newsk);
Octavian Purdila704da5602010-01-08 00:00:09 -08001649
Hannes Frederic Sowa080a2702015-10-26 13:51:37 +01001650 if (sock_needs_netstamp(sk) &&
1651 newsk->sk_flags & SK_FLAGS_TIMESTAMP)
Octavian Purdila704da5602010-01-08 00:00:09 -08001652 net_enable_timestamp();
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001653 }
1654out:
1655 return newsk;
1656}
Eric Dumazete56c57d2011-11-08 17:07:07 -05001657EXPORT_SYMBOL_GPL(sk_clone_lock);
Arnaldo Carvalho de Melo87d11ce2005-08-09 20:10:12 -07001658
Arnaldo Carvalho de Melo94352d42017-03-01 16:35:08 -03001659void sk_free_unlock_clone(struct sock *sk)
1660{
1661 /* It is still raw copy of parent, so invalidate
1662 * destructor and make plain sk_free() */
1663 sk->sk_destruct = NULL;
1664 bh_unlock_sock(sk);
1665 sk_free(sk);
1666}
1667EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
1668
Andi Kleen99580892007-04-20 17:12:43 -07001669void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1670{
Eric Dumazetd6a4e262015-05-26 08:55:28 -07001671 u32 max_segs = 1;
1672
Eric Dumazet6bd4f352015-12-02 21:53:57 -08001673 sk_dst_set(sk, dst);
Andi Kleen99580892007-04-20 17:12:43 -07001674 sk->sk_route_caps = dst->dev->features;
1675 if (sk->sk_route_caps & NETIF_F_GSO)
Herbert Xu4fcd6b92007-05-31 22:15:50 -07001676 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
Eric Dumazeta4654192010-05-16 00:36:33 -07001677 sk->sk_route_caps &= ~sk->sk_route_nocaps;
Andi Kleen99580892007-04-20 17:12:43 -07001678 if (sk_can_gso(sk)) {
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001679 if (dst->header_len) {
Andi Kleen99580892007-04-20 17:12:43 -07001680 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001681 } else {
Andi Kleen99580892007-04-20 17:12:43 -07001682 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001683 sk->sk_gso_max_size = dst->dev->gso_max_size;
Eric Dumazetd6a4e262015-05-26 08:55:28 -07001684 max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07001685 }
Andi Kleen99580892007-04-20 17:12:43 -07001686 }
Eric Dumazetd6a4e262015-05-26 08:55:28 -07001687 sk->sk_gso_max_segs = max_segs;
Andi Kleen99580892007-04-20 17:12:43 -07001688}
1689EXPORT_SYMBOL_GPL(sk_setup_caps);
1690
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691/*
1692 * Simple resource managers for sockets.
1693 */
1694
1695
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001696/*
1697 * Write buffer destructor automatically called from kfree_skb.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 */
1699void sock_wfree(struct sk_buff *skb)
1700{
1701 struct sock *sk = skb->sk;
Eric Dumazetd99927f2009-09-24 10:49:24 +00001702 unsigned int len = skb->truesize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703
Eric Dumazetd99927f2009-09-24 10:49:24 +00001704 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1705 /*
1706 * Keep a reference on sk_wmem_alloc, this will be released
1707 * after sk_write_space() call
1708 */
1709 atomic_sub(len - 1, &sk->sk_wmem_alloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 sk->sk_write_space(sk);
Eric Dumazetd99927f2009-09-24 10:49:24 +00001711 len = 1;
1712 }
Eric Dumazet2b85a342009-06-11 02:55:43 -07001713 /*
Eric Dumazetd99927f2009-09-24 10:49:24 +00001714 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
1715 * could not do because of in-flight packets
Eric Dumazet2b85a342009-06-11 02:55:43 -07001716 */
Eric Dumazetd99927f2009-09-24 10:49:24 +00001717 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
Eric Dumazet2b85a342009-06-11 02:55:43 -07001718 __sk_free(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719}
Eric Dumazet2a915252009-05-27 11:30:05 +00001720EXPORT_SYMBOL(sock_wfree);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721
Eric Dumazet1d2077a2016-05-02 10:56:27 -07001722/* This variant of sock_wfree() is used by TCP,
1723 * since it sets SOCK_USE_WRITE_QUEUE.
1724 */
1725void __sock_wfree(struct sk_buff *skb)
1726{
1727 struct sock *sk = skb->sk;
1728
1729 if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1730 __sk_free(sk);
1731}
1732
Eric Dumazet9e17f8a2015-11-01 15:36:55 -08001733void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1734{
1735 skb_orphan(skb);
1736 skb->sk = sk;
1737#ifdef CONFIG_INET
1738 if (unlikely(!sk_fullsock(sk))) {
1739 skb->destructor = sock_edemux;
1740 sock_hold(sk);
1741 return;
1742 }
1743#endif
1744 skb->destructor = sock_wfree;
1745 skb_set_hash_from_sk(skb, sk);
1746 /*
1747 * We used to take a refcount on sk, but following operation
1748 * is enough to guarantee sk_free() wont free this sock until
1749 * all in-flight packets are completed
1750 */
1751 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1752}
1753EXPORT_SYMBOL(skb_set_owner_w);
1754
Eric Dumazet1d2077a2016-05-02 10:56:27 -07001755/* This helper is used by netem, as it can hold packets in its
1756 * delay queue. We want to allow the owner socket to send more
1757 * packets, as if they were already TX completed by a typical driver.
1758 * But we also want to keep skb->sk set because some packet schedulers
1759 * rely on it (sch_fq for example). So we set skb->truesize to a small
1760 * amount (1) and decrease sk_wmem_alloc accordingly.
1761 */
Eric Dumazetf2f872f2013-07-30 17:55:08 -07001762void skb_orphan_partial(struct sk_buff *skb)
1763{
Eric Dumazet1d2077a2016-05-02 10:56:27 -07001764 /* If this skb is a TCP pure ACK or already went here,
1765 * we have nothing to do. 2 is already a very small truesize.
1766 */
1767 if (skb->truesize <= 2)
1768 return;
1769
Eric Dumazetf2f872f2013-07-30 17:55:08 -07001770 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1771 * so we do not completely orphan skb, but transfert all
1772 * accounted bytes but one, to avoid unexpected reorders.
1773 */
1774 if (skb->destructor == sock_wfree
1775#ifdef CONFIG_INET
1776 || skb->destructor == tcp_wfree
1777#endif
1778 ) {
1779 atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1780 skb->truesize = 1;
1781 } else {
1782 skb_orphan(skb);
1783 }
1784}
1785EXPORT_SYMBOL(skb_orphan_partial);
1786
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001787/*
1788 * Read buffer destructor automatically called from kfree_skb.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 */
1790void sock_rfree(struct sk_buff *skb)
1791{
1792 struct sock *sk = skb->sk;
Eric Dumazetd361fd52010-07-10 22:45:17 +00001793 unsigned int len = skb->truesize;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794
Eric Dumazetd361fd52010-07-10 22:45:17 +00001795 atomic_sub(len, &sk->sk_rmem_alloc);
1796 sk_mem_uncharge(sk, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797}
Eric Dumazet2a915252009-05-27 11:30:05 +00001798EXPORT_SYMBOL(sock_rfree);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799
Oliver Hartkopp7768eed2015-03-10 19:03:46 +01001800/*
1801 * Buffer destructor for skbs that are not used directly in read or write
1802 * path, e.g. for error handler skbs. Automatically called from kfree_skb.
1803 */
Alexander Duyck62bccb82014-09-04 13:31:35 -04001804void sock_efree(struct sk_buff *skb)
1805{
1806 sock_put(skb->sk);
1807}
1808EXPORT_SYMBOL(sock_efree);
1809
Eric W. Biederman976d02012012-05-23 17:16:53 -06001810kuid_t sock_i_uid(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811{
Eric W. Biederman976d02012012-05-23 17:16:53 -06001812 kuid_t uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813
Eric Dumazetf064af12010-09-22 12:43:39 +00001814 read_lock_bh(&sk->sk_callback_lock);
Eric W. Biederman976d02012012-05-23 17:16:53 -06001815 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
Eric Dumazetf064af12010-09-22 12:43:39 +00001816 read_unlock_bh(&sk->sk_callback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 return uid;
1818}
Eric Dumazet2a915252009-05-27 11:30:05 +00001819EXPORT_SYMBOL(sock_i_uid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
1821unsigned long sock_i_ino(struct sock *sk)
1822{
1823 unsigned long ino;
1824
Eric Dumazetf064af12010-09-22 12:43:39 +00001825 read_lock_bh(&sk->sk_callback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
Eric Dumazetf064af12010-09-22 12:43:39 +00001827 read_unlock_bh(&sk->sk_callback_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 return ino;
1829}
Eric Dumazet2a915252009-05-27 11:30:05 +00001830EXPORT_SYMBOL(sock_i_ino);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831
1832/*
1833 * Allocate a skb from the socket's send buffer.
1834 */
Victor Fusco86a76ca2005-07-08 14:57:47 -07001835struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
Al Virodd0fc662005-10-07 07:46:04 +01001836 gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837{
1838 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
Eric Dumazet2a915252009-05-27 11:30:05 +00001839 struct sk_buff *skb = alloc_skb(size, priority);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 if (skb) {
1841 skb_set_owner_w(skb, sk);
1842 return skb;
1843 }
1844 }
1845 return NULL;
1846}
Eric Dumazet2a915252009-05-27 11:30:05 +00001847EXPORT_SYMBOL(sock_wmalloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848
1849/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 * Allocate a memory block from the socket's option memory buffer.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001851 */
Al Virodd0fc662005-10-07 07:46:04 +01001852void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853{
Eric Dumazet95c96172012-04-15 05:58:06 +00001854 if ((unsigned int)size <= sysctl_optmem_max &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1856 void *mem;
1857 /* First do the add, to avoid the race if kmalloc
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001858 * might sleep.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 */
1860 atomic_add(size, &sk->sk_omem_alloc);
1861 mem = kmalloc(size, priority);
1862 if (mem)
1863 return mem;
1864 atomic_sub(size, &sk->sk_omem_alloc);
1865 }
1866 return NULL;
1867}
Eric Dumazet2a915252009-05-27 11:30:05 +00001868EXPORT_SYMBOL(sock_kmalloc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869
Daniel Borkmann79e88652014-11-19 17:13:11 +01001870/* Free an option memory block. Note, we actually want the inline
1871 * here as this allows gcc to detect the nullify and fold away the
1872 * condition entirely.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 */
Daniel Borkmann79e88652014-11-19 17:13:11 +01001874static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1875 const bool nullify)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876{
David S. Millere53da5f2014-10-14 17:02:37 -04001877 if (WARN_ON_ONCE(!mem))
1878 return;
Daniel Borkmann79e88652014-11-19 17:13:11 +01001879 if (nullify)
1880 kzfree(mem);
1881 else
1882 kfree(mem);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 atomic_sub(size, &sk->sk_omem_alloc);
1884}
Daniel Borkmann79e88652014-11-19 17:13:11 +01001885
1886void sock_kfree_s(struct sock *sk, void *mem, int size)
1887{
1888 __sock_kfree_s(sk, mem, size, false);
1889}
Eric Dumazet2a915252009-05-27 11:30:05 +00001890EXPORT_SYMBOL(sock_kfree_s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891
Daniel Borkmann79e88652014-11-19 17:13:11 +01001892void sock_kzfree_s(struct sock *sk, void *mem, int size)
1893{
1894 __sock_kfree_s(sk, mem, size, true);
1895}
1896EXPORT_SYMBOL(sock_kzfree_s);
1897
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898/* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
1899 I think, these locks should be removed for datagram sockets.
1900 */
Eric Dumazet2a915252009-05-27 11:30:05 +00001901static long sock_wait_for_wmem(struct sock *sk, long timeo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902{
1903 DEFINE_WAIT(wait);
1904
Eric Dumazet9cd3e072015-11-29 20:03:10 -08001905 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 for (;;) {
1907 if (!timeo)
1908 break;
1909 if (signal_pending(current))
1910 break;
1911 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
Eric Dumazetaa395142010-04-20 13:03:51 +00001912 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1914 break;
1915 if (sk->sk_shutdown & SEND_SHUTDOWN)
1916 break;
1917 if (sk->sk_err)
1918 break;
1919 timeo = schedule_timeout(timeo);
1920 }
Eric Dumazetaa395142010-04-20 13:03:51 +00001921 finish_wait(sk_sleep(sk), &wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 return timeo;
1923}
1924
1925
1926/*
1927 * Generic send/receive buffer handlers
1928 */
1929
Herbert Xu4cc7f682009-02-04 16:55:54 -08001930struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1931 unsigned long data_len, int noblock,
Eric Dumazet28d64272013-08-08 14:38:47 -07001932 int *errcode, int max_page_order)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933{
Eric Dumazet2e4e4412014-09-17 04:49:49 -07001934 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 long timeo;
1936 int err;
1937
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 timeo = sock_sndtimeo(sk, noblock);
Eric Dumazet2e4e4412014-09-17 04:49:49 -07001939 for (;;) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 err = sock_error(sk);
1941 if (err != 0)
1942 goto failure;
1943
1944 err = -EPIPE;
1945 if (sk->sk_shutdown & SEND_SHUTDOWN)
1946 goto failure;
1947
Eric Dumazet2e4e4412014-09-17 04:49:49 -07001948 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1949 break;
Eric Dumazet28d64272013-08-08 14:38:47 -07001950
Eric Dumazet9cd3e072015-11-29 20:03:10 -08001951 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
Eric Dumazet2e4e4412014-09-17 04:49:49 -07001952 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1953 err = -EAGAIN;
1954 if (!timeo)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 goto failure;
Eric Dumazet2e4e4412014-09-17 04:49:49 -07001956 if (signal_pending(current))
1957 goto interrupted;
1958 timeo = sock_wait_for_wmem(sk, timeo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 }
Eric Dumazet2e4e4412014-09-17 04:49:49 -07001960 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
1961 errcode, sk->sk_allocation);
1962 if (skb)
1963 skb_set_owner_w(skb, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 return skb;
1965
1966interrupted:
1967 err = sock_intr_errno(timeo);
1968failure:
1969 *errcode = err;
1970 return NULL;
1971}
Herbert Xu4cc7f682009-02-04 16:55:54 -08001972EXPORT_SYMBOL(sock_alloc_send_pskb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001974struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 int noblock, int *errcode)
1976{
Eric Dumazet28d64272013-08-08 14:38:47 -07001977 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978}
Eric Dumazet2a915252009-05-27 11:30:05 +00001979EXPORT_SYMBOL(sock_alloc_send_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980
Willem de Bruijn39771b12016-04-02 23:08:06 -04001981int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
1982 struct sockcm_cookie *sockc)
1983{
Soheil Hassas Yeganeh3dd17e62016-04-02 23:08:09 -04001984 u32 tsflags;
1985
Willem de Bruijn39771b12016-04-02 23:08:06 -04001986 switch (cmsg->cmsg_type) {
1987 case SO_MARK:
1988 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1989 return -EPERM;
1990 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
1991 return -EINVAL;
1992 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
1993 break;
Soheil Hassas Yeganeh3dd17e62016-04-02 23:08:09 -04001994 case SO_TIMESTAMPING:
1995 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
1996 return -EINVAL;
1997
1998 tsflags = *(u32 *)CMSG_DATA(cmsg);
1999 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2000 return -EINVAL;
2001
2002 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2003 sockc->tsflags |= tsflags;
2004 break;
Soheil Hassas Yeganeh779f1ed2016-07-11 16:51:26 -04002005 /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2006 case SCM_RIGHTS:
2007 case SCM_CREDENTIALS:
2008 break;
Willem de Bruijn39771b12016-04-02 23:08:06 -04002009 default:
2010 return -EINVAL;
2011 }
2012 return 0;
2013}
2014EXPORT_SYMBOL(__sock_cmsg_send);
2015
Edward Jeef28ea362015-10-08 14:56:48 -07002016int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2017 struct sockcm_cookie *sockc)
2018{
2019 struct cmsghdr *cmsg;
Willem de Bruijn39771b12016-04-02 23:08:06 -04002020 int ret;
Edward Jeef28ea362015-10-08 14:56:48 -07002021
2022 for_each_cmsghdr(cmsg, msg) {
2023 if (!CMSG_OK(msg, cmsg))
2024 return -EINVAL;
2025 if (cmsg->cmsg_level != SOL_SOCKET)
2026 continue;
Willem de Bruijn39771b12016-04-02 23:08:06 -04002027 ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2028 if (ret)
2029 return ret;
Edward Jeef28ea362015-10-08 14:56:48 -07002030 }
2031 return 0;
2032}
2033EXPORT_SYMBOL(sock_cmsg_send);
2034
Eric Dumazet5640f762012-09-23 23:04:42 +00002035/* On 32bit arches, an skb frag is limited to 2^15 */
2036#define SKB_FRAG_PAGE_ORDER get_order(32768)
2037
Eric Dumazet400dfd32013-10-17 16:27:07 -07002038/**
2039 * skb_page_frag_refill - check that a page_frag contains enough room
2040 * @sz: minimum size of the fragment we want to get
2041 * @pfrag: pointer to page_frag
Eric Dumazet82d5e2b2014-09-08 04:00:00 -07002042 * @gfp: priority for memory allocation
Eric Dumazet400dfd32013-10-17 16:27:07 -07002043 *
2044 * Note: While this allocator tries to use high order pages, there is
2045 * no guarantee that allocations succeed. Therefore, @sz MUST be
2046 * less or equal than PAGE_SIZE.
2047 */
Eric Dumazetd9b29382014-08-27 20:49:34 -07002048bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
Eric Dumazet5640f762012-09-23 23:04:42 +00002049{
Eric Dumazet5640f762012-09-23 23:04:42 +00002050 if (pfrag->page) {
Joonsoo Kimfe896d12016-03-17 14:19:26 -07002051 if (page_ref_count(pfrag->page) == 1) {
Eric Dumazet5640f762012-09-23 23:04:42 +00002052 pfrag->offset = 0;
2053 return true;
2054 }
Eric Dumazet400dfd32013-10-17 16:27:07 -07002055 if (pfrag->offset + sz <= pfrag->size)
Eric Dumazet5640f762012-09-23 23:04:42 +00002056 return true;
2057 put_page(pfrag->page);
2058 }
2059
Eric Dumazetd9b29382014-08-27 20:49:34 -07002060 pfrag->offset = 0;
2061 if (SKB_FRAG_PAGE_ORDER) {
Mel Gormand0164ad2015-11-06 16:28:21 -08002062 /* Avoid direct reclaim but allow kswapd to wake */
2063 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2064 __GFP_COMP | __GFP_NOWARN |
2065 __GFP_NORETRY,
Eric Dumazetd9b29382014-08-27 20:49:34 -07002066 SKB_FRAG_PAGE_ORDER);
Eric Dumazet5640f762012-09-23 23:04:42 +00002067 if (likely(pfrag->page)) {
Eric Dumazetd9b29382014-08-27 20:49:34 -07002068 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
Eric Dumazet5640f762012-09-23 23:04:42 +00002069 return true;
2070 }
Eric Dumazetd9b29382014-08-27 20:49:34 -07002071 }
2072 pfrag->page = alloc_page(gfp);
2073 if (likely(pfrag->page)) {
2074 pfrag->size = PAGE_SIZE;
2075 return true;
2076 }
Eric Dumazet400dfd32013-10-17 16:27:07 -07002077 return false;
2078}
2079EXPORT_SYMBOL(skb_page_frag_refill);
2080
2081bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2082{
2083 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2084 return true;
2085
Eric Dumazet5640f762012-09-23 23:04:42 +00002086 sk_enter_memory_pressure(sk);
2087 sk_stream_moderate_sndbuf(sk);
2088 return false;
2089}
2090EXPORT_SYMBOL(sk_page_frag_refill);
2091
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092static void __lock_sock(struct sock *sk)
Namhyung Kimf39234d2010-09-08 03:48:48 +00002093 __releases(&sk->sk_lock.slock)
2094 __acquires(&sk->sk_lock.slock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095{
2096 DEFINE_WAIT(wait);
2097
Stephen Hemmingere71a4782007-04-10 20:10:33 -07002098 for (;;) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2100 TASK_UNINTERRUPTIBLE);
2101 spin_unlock_bh(&sk->sk_lock.slock);
2102 schedule();
2103 spin_lock_bh(&sk->sk_lock.slock);
Stephen Hemmingere71a4782007-04-10 20:10:33 -07002104 if (!sock_owned_by_user(sk))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105 break;
2106 }
2107 finish_wait(&sk->sk_lock.wq, &wait);
2108}
2109
2110static void __release_sock(struct sock *sk)
Namhyung Kimf39234d2010-09-08 03:48:48 +00002111 __releases(&sk->sk_lock.slock)
2112 __acquires(&sk->sk_lock.slock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113{
Eric Dumazet5413d1b2016-04-29 14:16:52 -07002114 struct sk_buff *skb, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115
Eric Dumazet5413d1b2016-04-29 14:16:52 -07002116 while ((skb = sk->sk_backlog.head) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
Eric Dumazet5413d1b2016-04-29 14:16:52 -07002118
2119 spin_unlock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120
2121 do {
Eric Dumazet5413d1b2016-04-29 14:16:52 -07002122 next = skb->next;
Eric Dumazete4cbb022012-04-30 16:07:09 +00002123 prefetch(next);
Eric Dumazet7fee2262010-05-11 23:19:48 +00002124 WARN_ON_ONCE(skb_dst_is_noref(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125 skb->next = NULL;
Peter Zijlstrac57943a2008-10-07 14:18:42 -07002126 sk_backlog_rcv(sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127
Eric Dumazet5413d1b2016-04-29 14:16:52 -07002128 cond_resched();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129
2130 skb = next;
2131 } while (skb != NULL);
2132
Eric Dumazet5413d1b2016-04-29 14:16:52 -07002133 spin_lock_bh(&sk->sk_lock.slock);
2134 }
Zhu Yi8eae9392010-03-04 18:01:40 +00002135
2136 /*
2137 * Doing the zeroing here guarantee we can not loop forever
2138 * while a wild producer attempts to flood us.
2139 */
2140 sk->sk_backlog.len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141}
2142
Eric Dumazetd41a69f2016-04-29 14:16:53 -07002143void __sk_flush_backlog(struct sock *sk)
2144{
2145 spin_lock_bh(&sk->sk_lock.slock);
2146 __release_sock(sk);
2147 spin_unlock_bh(&sk->sk_lock.slock);
2148}
2149
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150/**
2151 * sk_wait_data - wait for data to arrive at sk_receive_queue
Pavel Pisa4dc3b162005-05-01 08:59:25 -07002152 * @sk: sock to wait on
2153 * @timeo: for how long
Sabrina Dubrocadfbafc92015-07-24 18:19:25 +02002154 * @skb: last skb seen on sk_receive_queue
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 *
2156 * Now socket state including sk->sk_err is changed only under lock,
2157 * hence we may omit checks after joining wait queue.
2158 * We check receive queue before schedule() only as optimization;
2159 * it is very likely that release_sock() added new data.
2160 */
Sabrina Dubrocadfbafc92015-07-24 18:19:25 +02002161int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162{
WANG Congd9dc8b02016-11-11 10:20:50 -08002163 DEFINE_WAIT_FUNC(wait, woken_wake_function);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 int rc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165
WANG Congd9dc8b02016-11-11 10:20:50 -08002166 add_wait_queue(sk_sleep(sk), &wait);
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002167 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
WANG Congd9dc8b02016-11-11 10:20:50 -08002168 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002169 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
WANG Congd9dc8b02016-11-11 10:20:50 -08002170 remove_wait_queue(sk_sleep(sk), &wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 return rc;
2172}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173EXPORT_SYMBOL(sk_wait_data);
2174
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002175/**
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002176 * __sk_mem_raise_allocated - increase memory_allocated
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002177 * @sk: socket
2178 * @size: memory size to allocate
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002179 * @amt: pages to allocate
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002180 * @kind: allocation type
2181 *
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002182 * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002183 */
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002184int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002185{
2186 struct proto *prot = sk->sk_prot;
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002187 long allocated = sk_memory_allocated_add(sk, amt);
Johannes Weinere8056052016-01-14 15:21:14 -08002188
Johannes Weinerbaac50b2016-01-14 15:21:17 -08002189 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2190 !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
Johannes Weinere8056052016-01-14 15:21:14 -08002191 goto suppress_allocation;
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002192
2193 /* Under limit. */
Johannes Weinere8056052016-01-14 15:21:14 -08002194 if (allocated <= sk_prot_mem_limits(sk, 0)) {
Glauber Costa180d8cd2011-12-11 21:47:02 +00002195 sk_leave_memory_pressure(sk);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002196 return 1;
2197 }
2198
Johannes Weinere8056052016-01-14 15:21:14 -08002199 /* Under pressure. */
2200 if (allocated > sk_prot_mem_limits(sk, 1))
Glauber Costa180d8cd2011-12-11 21:47:02 +00002201 sk_enter_memory_pressure(sk);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002202
Johannes Weinere8056052016-01-14 15:21:14 -08002203 /* Over hard limit. */
2204 if (allocated > sk_prot_mem_limits(sk, 2))
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002205 goto suppress_allocation;
2206
2207 /* guarantee minimum buffer size under pressure */
2208 if (kind == SK_MEM_RECV) {
2209 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2210 return 1;
Glauber Costa180d8cd2011-12-11 21:47:02 +00002211
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002212 } else { /* SK_MEM_SEND */
2213 if (sk->sk_type == SOCK_STREAM) {
2214 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2215 return 1;
2216 } else if (atomic_read(&sk->sk_wmem_alloc) <
2217 prot->sysctl_wmem[0])
2218 return 1;
2219 }
2220
Glauber Costa180d8cd2011-12-11 21:47:02 +00002221 if (sk_has_memory_pressure(sk)) {
Eric Dumazet17483762008-11-25 21:16:35 -08002222 int alloc;
2223
Glauber Costa180d8cd2011-12-11 21:47:02 +00002224 if (!sk_under_memory_pressure(sk))
Eric Dumazet17483762008-11-25 21:16:35 -08002225 return 1;
Glauber Costa180d8cd2011-12-11 21:47:02 +00002226 alloc = sk_sockets_allocated_read_positive(sk);
2227 if (sk_prot_mem_limits(sk, 2) > alloc *
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002228 sk_mem_pages(sk->sk_wmem_queued +
2229 atomic_read(&sk->sk_rmem_alloc) +
2230 sk->sk_forward_alloc))
2231 return 1;
2232 }
2233
2234suppress_allocation:
2235
2236 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2237 sk_stream_moderate_sndbuf(sk);
2238
2239 /* Fail only if socket is _under_ its sndbuf.
2240 * In this case we cannot block, so that we have to fail.
2241 */
2242 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2243 return 1;
2244 }
2245
Satoru Moriya3847ce32011-06-17 12:00:03 +00002246 trace_sock_exceed_buf_limit(sk, prot, allocated);
2247
Glauber Costa0e90b312012-01-20 04:57:16 +00002248 sk_memory_allocated_sub(sk, amt);
Glauber Costa180d8cd2011-12-11 21:47:02 +00002249
Johannes Weinerbaac50b2016-01-14 15:21:17 -08002250 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2251 mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
Johannes Weinere8056052016-01-14 15:21:14 -08002252
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002253 return 0;
2254}
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002255EXPORT_SYMBOL(__sk_mem_raise_allocated);
2256
2257/**
2258 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2259 * @sk: socket
2260 * @size: memory size to allocate
2261 * @kind: allocation type
2262 *
2263 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2264 * rmem allocation. This function assumes that protocols which have
2265 * memory_pressure use sk_wmem_queued as write buffer accounting.
2266 */
2267int __sk_mem_schedule(struct sock *sk, int size, int kind)
2268{
2269 int ret, amt = sk_mem_pages(size);
2270
2271 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2272 ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2273 if (!ret)
2274 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2275 return ret;
2276}
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002277EXPORT_SYMBOL(__sk_mem_schedule);
2278
2279/**
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002280 * __sk_mem_reduce_allocated - reclaim memory_allocated
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002281 * @sk: socket
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002282 * @amount: number of quanta
2283 *
2284 * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002285 */
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002286void __sk_mem_reduce_allocated(struct sock *sk, int amount)
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002287{
Eric Dumazet1a24e042015-05-15 12:39:25 -07002288 sk_memory_allocated_sub(sk, amount);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002289
Johannes Weinerbaac50b2016-01-14 15:21:17 -08002290 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2291 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
Johannes Weinere8056052016-01-14 15:21:14 -08002292
Glauber Costa180d8cd2011-12-11 21:47:02 +00002293 if (sk_under_memory_pressure(sk) &&
2294 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2295 sk_leave_memory_pressure(sk);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002296}
Paolo Abenif8c3bf02016-10-21 13:55:45 +02002297EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2298
2299/**
2300 * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2301 * @sk: socket
2302 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2303 */
2304void __sk_mem_reclaim(struct sock *sk, int amount)
2305{
2306 amount >>= SK_MEM_QUANTUM_SHIFT;
2307 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2308 __sk_mem_reduce_allocated(sk, amount);
2309}
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002310EXPORT_SYMBOL(__sk_mem_reclaim);
2311
samanthakumar627d2d62016-04-05 12:41:16 -04002312int sk_set_peek_off(struct sock *sk, int val)
2313{
2314 if (val < 0)
2315 return -EINVAL;
2316
2317 sk->sk_peek_off = val;
2318 return 0;
2319}
2320EXPORT_SYMBOL_GPL(sk_set_peek_off);
Hideo Aoki3ab224b2007-12-31 00:11:19 -08002321
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322/*
2323 * Set of default routines for initialising struct proto_ops when
2324 * the protocol does not support a particular function. In certain
2325 * cases where it makes no sense for a protocol to have a "do nothing"
2326 * function, some default processing is provided.
2327 */
2328
2329int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2330{
2331 return -EOPNOTSUPP;
2332}
Eric Dumazet2a915252009-05-27 11:30:05 +00002333EXPORT_SYMBOL(sock_no_bind);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002335int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336 int len, int flags)
2337{
2338 return -EOPNOTSUPP;
2339}
Eric Dumazet2a915252009-05-27 11:30:05 +00002340EXPORT_SYMBOL(sock_no_connect);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341
2342int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2343{
2344 return -EOPNOTSUPP;
2345}
Eric Dumazet2a915252009-05-27 11:30:05 +00002346EXPORT_SYMBOL(sock_no_socketpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347
2348int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2349{
2350 return -EOPNOTSUPP;
2351}
Eric Dumazet2a915252009-05-27 11:30:05 +00002352EXPORT_SYMBOL(sock_no_accept);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002354int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 int *len, int peer)
2356{
2357 return -EOPNOTSUPP;
2358}
Eric Dumazet2a915252009-05-27 11:30:05 +00002359EXPORT_SYMBOL(sock_no_getname);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360
Eric Dumazet2a915252009-05-27 11:30:05 +00002361unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362{
2363 return 0;
2364}
Eric Dumazet2a915252009-05-27 11:30:05 +00002365EXPORT_SYMBOL(sock_no_poll);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366
2367int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2368{
2369 return -EOPNOTSUPP;
2370}
Eric Dumazet2a915252009-05-27 11:30:05 +00002371EXPORT_SYMBOL(sock_no_ioctl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372
2373int sock_no_listen(struct socket *sock, int backlog)
2374{
2375 return -EOPNOTSUPP;
2376}
Eric Dumazet2a915252009-05-27 11:30:05 +00002377EXPORT_SYMBOL(sock_no_listen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378
2379int sock_no_shutdown(struct socket *sock, int how)
2380{
2381 return -EOPNOTSUPP;
2382}
Eric Dumazet2a915252009-05-27 11:30:05 +00002383EXPORT_SYMBOL(sock_no_shutdown);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384
2385int sock_no_setsockopt(struct socket *sock, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -07002386 char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387{
2388 return -EOPNOTSUPP;
2389}
Eric Dumazet2a915252009-05-27 11:30:05 +00002390EXPORT_SYMBOL(sock_no_setsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391
2392int sock_no_getsockopt(struct socket *sock, int level, int optname,
2393 char __user *optval, int __user *optlen)
2394{
2395 return -EOPNOTSUPP;
2396}
Eric Dumazet2a915252009-05-27 11:30:05 +00002397EXPORT_SYMBOL(sock_no_getsockopt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398
Ying Xue1b784142015-03-02 15:37:48 +08002399int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400{
2401 return -EOPNOTSUPP;
2402}
Eric Dumazet2a915252009-05-27 11:30:05 +00002403EXPORT_SYMBOL(sock_no_sendmsg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404
Ying Xue1b784142015-03-02 15:37:48 +08002405int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2406 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407{
2408 return -EOPNOTSUPP;
2409}
Eric Dumazet2a915252009-05-27 11:30:05 +00002410EXPORT_SYMBOL(sock_no_recvmsg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411
2412int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2413{
2414 /* Mirror missing mmap method error code */
2415 return -ENODEV;
2416}
Eric Dumazet2a915252009-05-27 11:30:05 +00002417EXPORT_SYMBOL(sock_no_mmap);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418
2419ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2420{
2421 ssize_t res;
2422 struct msghdr msg = {.msg_flags = flags};
2423 struct kvec iov;
2424 char *kaddr = kmap(page);
2425 iov.iov_base = kaddr + offset;
2426 iov.iov_len = size;
2427 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2428 kunmap(page);
2429 return res;
2430}
Eric Dumazet2a915252009-05-27 11:30:05 +00002431EXPORT_SYMBOL(sock_no_sendpage);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432
2433/*
2434 * Default Socket Callbacks
2435 */
2436
2437static void sock_def_wakeup(struct sock *sk)
2438{
Eric Dumazet43815482010-04-29 11:01:49 +00002439 struct socket_wq *wq;
2440
2441 rcu_read_lock();
2442 wq = rcu_dereference(sk->sk_wq);
Herbert Xu1ce0bf52015-11-26 13:55:39 +08002443 if (skwq_has_sleeper(wq))
Eric Dumazet43815482010-04-29 11:01:49 +00002444 wake_up_interruptible_all(&wq->wait);
2445 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446}
2447
2448static void sock_def_error_report(struct sock *sk)
2449{
Eric Dumazet43815482010-04-29 11:01:49 +00002450 struct socket_wq *wq;
2451
2452 rcu_read_lock();
2453 wq = rcu_dereference(sk->sk_wq);
Herbert Xu1ce0bf52015-11-26 13:55:39 +08002454 if (skwq_has_sleeper(wq))
Eric Dumazet43815482010-04-29 11:01:49 +00002455 wake_up_interruptible_poll(&wq->wait, POLLERR);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08002456 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
Eric Dumazet43815482010-04-29 11:01:49 +00002457 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458}
2459
David S. Miller676d2362014-04-11 16:15:36 -04002460static void sock_def_readable(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461{
Eric Dumazet43815482010-04-29 11:01:49 +00002462 struct socket_wq *wq;
2463
2464 rcu_read_lock();
2465 wq = rcu_dereference(sk->sk_wq);
Herbert Xu1ce0bf52015-11-26 13:55:39 +08002466 if (skwq_has_sleeper(wq))
Eric Dumazet2c6607c2011-01-06 10:54:29 -08002467 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
Davide Libenzi37e55402009-03-31 15:24:21 -07002468 POLLRDNORM | POLLRDBAND);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08002469 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
Eric Dumazet43815482010-04-29 11:01:49 +00002470 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471}
2472
2473static void sock_def_write_space(struct sock *sk)
2474{
Eric Dumazet43815482010-04-29 11:01:49 +00002475 struct socket_wq *wq;
2476
2477 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478
2479 /* Do not wake up a writer until he can make "significant"
2480 * progress. --DaveM
2481 */
Stephen Hemmingere71a4782007-04-10 20:10:33 -07002482 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
Eric Dumazet43815482010-04-29 11:01:49 +00002483 wq = rcu_dereference(sk->sk_wq);
Herbert Xu1ce0bf52015-11-26 13:55:39 +08002484 if (skwq_has_sleeper(wq))
Eric Dumazet43815482010-04-29 11:01:49 +00002485 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
Davide Libenzi37e55402009-03-31 15:24:21 -07002486 POLLWRNORM | POLLWRBAND);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487
2488 /* Should agree with poll, otherwise some programs break */
2489 if (sock_writeable(sk))
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08002490 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 }
2492
Eric Dumazet43815482010-04-29 11:01:49 +00002493 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494}
2495
2496static void sock_def_destruct(struct sock *sk)
2497{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498}
2499
2500void sk_send_sigurg(struct sock *sk)
2501{
2502 if (sk->sk_socket && sk->sk_socket->file)
2503 if (send_sigurg(&sk->sk_socket->file->f_owner))
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +08002504 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002505}
Eric Dumazet2a915252009-05-27 11:30:05 +00002506EXPORT_SYMBOL(sk_send_sigurg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507
2508void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2509 unsigned long expires)
2510{
2511 if (!mod_timer(timer, expires))
2512 sock_hold(sk);
2513}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514EXPORT_SYMBOL(sk_reset_timer);
2515
2516void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2517{
Ying Xue25cc4ae2013-02-03 20:32:57 +00002518 if (del_timer(timer))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 __sock_put(sk);
2520}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521EXPORT_SYMBOL(sk_stop_timer);
2522
2523void sock_init_data(struct socket *sock, struct sock *sk)
2524{
Paolo Abeni581319c2017-03-09 13:54:08 +01002525 sk_init_common(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 sk->sk_send_head = NULL;
2527
2528 init_timer(&sk->sk_timer);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002529
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 sk->sk_allocation = GFP_KERNEL;
2531 sk->sk_rcvbuf = sysctl_rmem_default;
2532 sk->sk_sndbuf = sysctl_wmem_default;
2533 sk->sk_state = TCP_CLOSE;
David S. Miller972692e2008-06-17 22:41:38 -07002534 sk_set_socket(sk, sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535
2536 sock_set_flag(sk, SOCK_ZAPPED);
2537
Stephen Hemmingere71a4782007-04-10 20:10:33 -07002538 if (sock) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539 sk->sk_type = sock->type;
Eric Dumazet43815482010-04-29 11:01:49 +00002540 sk->sk_wq = sock->wq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 sock->sk = sk;
Lorenzo Colitti86741ec2016-11-04 02:23:41 +09002542 sk->sk_uid = SOCK_INODE(sock)->i_uid;
2543 } else {
Eric Dumazet43815482010-04-29 11:01:49 +00002544 sk->sk_wq = NULL;
Lorenzo Colitti86741ec2016-11-04 02:23:41 +09002545 sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
2546 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 sk->sk_state_change = sock_def_wakeup;
2549 sk->sk_data_ready = sock_def_readable;
2550 sk->sk_write_space = sock_def_write_space;
2551 sk->sk_error_report = sock_def_error_report;
2552 sk->sk_destruct = sock_def_destruct;
2553
Eric Dumazet5640f762012-09-23 23:04:42 +00002554 sk->sk_frag.page = NULL;
2555 sk->sk_frag.offset = 0;
Pavel Emelyanovef64a542012-02-21 07:31:34 +00002556 sk->sk_peek_off = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557
Eric W. Biederman109f6e32010-06-13 03:30:14 +00002558 sk->sk_peer_pid = NULL;
2559 sk->sk_peer_cred = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 sk->sk_write_pending = 0;
2561 sk->sk_rcvlowat = 1;
2562 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2563 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2564
Eric Dumazetf37f0af2008-04-13 21:39:26 -07002565 sk->sk_stamp = ktime_set(-1L, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566
Cong Wange0d10952013-08-01 11:10:25 +08002567#ifdef CONFIG_NET_RX_BUSY_POLL
Eliezer Tamir06021292013-06-10 11:39:50 +03002568 sk->sk_napi_id = 0;
Eliezer Tamir64b0dc52013-07-10 17:13:36 +03002569 sk->sk_ll_usec = sysctl_net_busy_read;
Eliezer Tamir06021292013-06-10 11:39:50 +03002570#endif
2571
Eric Dumazet62748f32013-09-24 08:20:52 -07002572 sk->sk_max_pacing_rate = ~0U;
Eric Dumazet7eec4172013-10-08 15:16:00 -07002573 sk->sk_pacing_rate = ~0U;
Eric Dumazet70da2682015-10-08 19:33:21 -07002574 sk->sk_incoming_cpu = -1;
Eric Dumazet4dc6dc72009-07-15 23:13:10 +00002575 /*
2576 * Before updating sk_refcnt, we must commit prior changes to memory
2577 * (Documentation/RCU/rculist_nulls.txt for details)
2578 */
2579 smp_wmb();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580 atomic_set(&sk->sk_refcnt, 1);
Wang Chen33c732c2007-11-13 20:30:01 -08002581 atomic_set(&sk->sk_drops, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582}
Eric Dumazet2a915252009-05-27 11:30:05 +00002583EXPORT_SYMBOL(sock_init_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002585void lock_sock_nested(struct sock *sk, int subclass)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586{
2587 might_sleep();
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002588 spin_lock_bh(&sk->sk_lock.slock);
John Heffnerd2e91172007-09-12 10:44:19 +02002589 if (sk->sk_lock.owned)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590 __lock_sock(sk);
John Heffnerd2e91172007-09-12 10:44:19 +02002591 sk->sk_lock.owned = 1;
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002592 spin_unlock(&sk->sk_lock.slock);
2593 /*
2594 * The sk_lock has mutex_lock() semantics here:
2595 */
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08002596 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002597 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598}
Peter Zijlstrafcc70d52006-11-08 22:44:35 -08002599EXPORT_SYMBOL(lock_sock_nested);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002601void release_sock(struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602{
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002603 spin_lock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604 if (sk->sk_backlog.tail)
2605 __release_sock(sk);
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002606
Eric Dumazetc3f9b012014-03-10 09:50:11 -07002607 /* Warning : release_cb() might need to release sk ownership,
2608 * ie call sock_release_ownership(sk) before us.
2609 */
Eric Dumazet46d3cea2012-07-11 05:50:31 +00002610 if (sk->sk_prot->release_cb)
2611 sk->sk_prot->release_cb(sk);
2612
Eric Dumazetc3f9b012014-03-10 09:50:11 -07002613 sock_release_ownership(sk);
Ingo Molnara5b5bb92006-07-03 00:25:35 -07002614 if (waitqueue_active(&sk->sk_lock.wq))
2615 wake_up(&sk->sk_lock.wq);
2616 spin_unlock_bh(&sk->sk_lock.slock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617}
2618EXPORT_SYMBOL(release_sock);
2619
Eric Dumazet8a74ad62010-05-26 19:20:18 +00002620/**
2621 * lock_sock_fast - fast version of lock_sock
2622 * @sk: socket
2623 *
2624 * This version should be used for very small section, where process wont block
2625 * return false if fast path is taken
2626 * sk_lock.slock locked, owned = 0, BH disabled
2627 * return true if slow path is taken
2628 * sk_lock.slock unlocked, owned = 1, BH enabled
2629 */
2630bool lock_sock_fast(struct sock *sk)
2631{
2632 might_sleep();
2633 spin_lock_bh(&sk->sk_lock.slock);
2634
2635 if (!sk->sk_lock.owned)
2636 /*
2637 * Note : We must disable BH
2638 */
2639 return false;
2640
2641 __lock_sock(sk);
2642 sk->sk_lock.owned = 1;
2643 spin_unlock(&sk->sk_lock.slock);
2644 /*
2645 * The sk_lock has mutex_lock() semantics here:
2646 */
2647 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2648 local_bh_enable();
2649 return true;
2650}
2651EXPORT_SYMBOL(lock_sock_fast);
2652
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002654{
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002655 struct timeval tv;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656 if (!sock_flag(sk, SOCK_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +00002657 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002658 tv = ktime_to_timeval(sk->sk_stamp);
2659 if (tv.tv_sec == -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660 return -ENOENT;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002661 if (tv.tv_sec == 0) {
2662 sk->sk_stamp = ktime_get_real();
2663 tv = ktime_to_timeval(sk->sk_stamp);
2664 }
2665 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002666}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002667EXPORT_SYMBOL(sock_get_timestamp);
2668
Eric Dumazetae40eb12007-03-18 17:33:16 -07002669int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2670{
2671 struct timespec ts;
2672 if (!sock_flag(sk, SOCK_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +00002673 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
Eric Dumazetae40eb12007-03-18 17:33:16 -07002674 ts = ktime_to_timespec(sk->sk_stamp);
2675 if (ts.tv_sec == -1)
2676 return -ENOENT;
2677 if (ts.tv_sec == 0) {
2678 sk->sk_stamp = ktime_get_real();
2679 ts = ktime_to_timespec(sk->sk_stamp);
2680 }
2681 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2682}
2683EXPORT_SYMBOL(sock_get_timestampns);
2684
Patrick Ohly20d49472009-02-12 05:03:38 +00002685void sock_enable_timestamp(struct sock *sk, int flag)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002686{
Patrick Ohly20d49472009-02-12 05:03:38 +00002687 if (!sock_flag(sk, flag)) {
Eric Dumazet08e29af2011-11-28 12:04:18 +00002688 unsigned long previous_flags = sk->sk_flags;
2689
Patrick Ohly20d49472009-02-12 05:03:38 +00002690 sock_set_flag(sk, flag);
2691 /*
2692 * we just set one of the two flags which require net
2693 * time stamping, but time stamping might have been on
2694 * already because of the other one
2695 */
Hannes Frederic Sowa080a2702015-10-26 13:51:37 +01002696 if (sock_needs_netstamp(sk) &&
2697 !(previous_flags & SK_FLAGS_TIMESTAMP))
Patrick Ohly20d49472009-02-12 05:03:38 +00002698 net_enable_timestamp();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002699 }
2700}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002701
Richard Cochrancb820f82013-07-19 19:40:09 +02002702int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2703 int level, int type)
2704{
2705 struct sock_exterr_skb *serr;
Willem de Bruijn364a9e92014-08-31 21:30:27 -04002706 struct sk_buff *skb;
Richard Cochrancb820f82013-07-19 19:40:09 +02002707 int copied, err;
2708
2709 err = -EAGAIN;
Willem de Bruijn364a9e92014-08-31 21:30:27 -04002710 skb = sock_dequeue_err_skb(sk);
Richard Cochrancb820f82013-07-19 19:40:09 +02002711 if (skb == NULL)
2712 goto out;
2713
2714 copied = skb->len;
2715 if (copied > len) {
2716 msg->msg_flags |= MSG_TRUNC;
2717 copied = len;
2718 }
David S. Miller51f3d022014-11-05 16:46:40 -05002719 err = skb_copy_datagram_msg(skb, 0, msg, copied);
Richard Cochrancb820f82013-07-19 19:40:09 +02002720 if (err)
2721 goto out_free_skb;
2722
2723 sock_recv_timestamp(msg, sk, skb);
2724
2725 serr = SKB_EXT_ERR(skb);
2726 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2727
2728 msg->msg_flags |= MSG_ERRQUEUE;
2729 err = copied;
2730
Richard Cochrancb820f82013-07-19 19:40:09 +02002731out_free_skb:
2732 kfree_skb(skb);
2733out:
2734 return err;
2735}
2736EXPORT_SYMBOL(sock_recv_errqueue);
2737
Linus Torvalds1da177e2005-04-16 15:20:36 -07002738/*
2739 * Get a socket option on an socket.
2740 *
2741 * FIX: POSIX 1003.1g is very ambiguous here. It states that
2742 * asynchronous errors should be reported by getsockopt. We assume
2743 * this means if you specify SO_ERROR (otherwise whats the point of it).
2744 */
2745int sock_common_getsockopt(struct socket *sock, int level, int optname,
2746 char __user *optval, int __user *optlen)
2747{
2748 struct sock *sk = sock->sk;
2749
2750 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2751}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752EXPORT_SYMBOL(sock_common_getsockopt);
2753
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002754#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002755int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2756 char __user *optval, int __user *optlen)
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002757{
2758 struct sock *sk = sock->sk;
2759
Johannes Berg1e51f952007-03-06 13:44:06 -08002760 if (sk->sk_prot->compat_getsockopt != NULL)
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002761 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2762 optval, optlen);
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002763 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2764}
2765EXPORT_SYMBOL(compat_sock_common_getsockopt);
2766#endif
2767
Ying Xue1b784142015-03-02 15:37:48 +08002768int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2769 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770{
2771 struct sock *sk = sock->sk;
2772 int addr_len = 0;
2773 int err;
2774
Ying Xue1b784142015-03-02 15:37:48 +08002775 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776 flags & ~MSG_DONTWAIT, &addr_len);
2777 if (err >= 0)
2778 msg->msg_namelen = addr_len;
2779 return err;
2780}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002781EXPORT_SYMBOL(sock_common_recvmsg);
2782
2783/*
2784 * Set socket options on an inet socket.
2785 */
2786int sock_common_setsockopt(struct socket *sock, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -07002787 char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002788{
2789 struct sock *sk = sock->sk;
2790
2791 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2792}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002793EXPORT_SYMBOL(sock_common_setsockopt);
2794
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002795#ifdef CONFIG_COMPAT
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002796int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
David S. Millerb7058842009-09-30 16:12:20 -07002797 char __user *optval, unsigned int optlen)
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002798{
2799 struct sock *sk = sock->sk;
2800
Arnaldo Carvalho de Melo543d9cf2006-03-20 22:48:35 -08002801 if (sk->sk_prot->compat_setsockopt != NULL)
2802 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2803 optval, optlen);
Dmitry Mishin3fdadf72006-03-20 22:45:21 -08002804 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2805}
2806EXPORT_SYMBOL(compat_sock_common_setsockopt);
2807#endif
2808
Linus Torvalds1da177e2005-04-16 15:20:36 -07002809void sk_common_release(struct sock *sk)
2810{
2811 if (sk->sk_prot->destroy)
2812 sk->sk_prot->destroy(sk);
2813
2814 /*
2815 * Observation: when sock_common_release is called, processes have
2816 * no access to socket. But net still has.
2817 * Step one, detach it from networking:
2818 *
2819 * A. Remove from hash tables.
2820 */
2821
2822 sk->sk_prot->unhash(sk);
2823
2824 /*
2825 * In this point socket cannot receive new packets, but it is possible
2826 * that some packets are in flight because some CPU runs receiver and
2827 * did hash table lookup before we unhashed socket. They will achieve
2828 * receive queue and will be purged by socket destructor.
2829 *
2830 * Also we still have packets pending on receive queue and probably,
2831 * our own packets waiting in device queues. sock_destroy will drain
2832 * receive queue, but transmitted packets will delay socket destruction
2833 * until the last reference will be released.
2834 */
2835
2836 sock_orphan(sk);
2837
2838 xfrm_sk_free_policy(sk);
2839
Arnaldo Carvalho de Meloe6848972005-08-09 19:45:38 -07002840 sk_refcnt_debug_release(sk);
Eric Dumazet5640f762012-09-23 23:04:42 +00002841
2842 if (sk->sk_frag.page) {
2843 put_page(sk->sk_frag.page);
2844 sk->sk_frag.page = NULL;
2845 }
2846
Linus Torvalds1da177e2005-04-16 15:20:36 -07002847 sock_put(sk);
2848}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002849EXPORT_SYMBOL(sk_common_release);
2850
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002851#ifdef CONFIG_PROC_FS
2852#define PROTO_INUSE_NR 64 /* should be enough for the first time */
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002853struct prot_inuse {
2854 int val[PROTO_INUSE_NR];
2855};
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002856
2857static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002858
2859#ifdef CONFIG_NET_NS
2860void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2861{
Eric Dumazetd6d9ca02010-07-19 10:48:49 +00002862 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002863}
2864EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2865
2866int sock_prot_inuse_get(struct net *net, struct proto *prot)
2867{
2868 int cpu, idx = prot->inuse_idx;
2869 int res = 0;
2870
2871 for_each_possible_cpu(cpu)
2872 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2873
2874 return res >= 0 ? res : 0;
2875}
2876EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2877
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002878static int __net_init sock_inuse_init_net(struct net *net)
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002879{
2880 net->core.inuse = alloc_percpu(struct prot_inuse);
2881 return net->core.inuse ? 0 : -ENOMEM;
2882}
2883
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002884static void __net_exit sock_inuse_exit_net(struct net *net)
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002885{
2886 free_percpu(net->core.inuse);
2887}
2888
2889static struct pernet_operations net_inuse_ops = {
2890 .init = sock_inuse_init_net,
2891 .exit = sock_inuse_exit_net,
2892};
2893
2894static __init int net_inuse_init(void)
2895{
2896 if (register_pernet_subsys(&net_inuse_ops))
2897 panic("Cannot initialize net inuse counters");
2898
2899 return 0;
2900}
2901
2902core_initcall(net_inuse_init);
2903#else
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002904static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2905
Pavel Emelyanovc29a0bc2008-03-31 19:41:46 -07002906void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002907{
Eric Dumazetd6d9ca02010-07-19 10:48:49 +00002908 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002909}
2910EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2911
Pavel Emelyanovc29a0bc2008-03-31 19:41:46 -07002912int sock_prot_inuse_get(struct net *net, struct proto *prot)
Pavel Emelyanov1338d462008-03-28 16:38:43 -07002913{
2914 int cpu, idx = prot->inuse_idx;
2915 int res = 0;
2916
2917 for_each_possible_cpu(cpu)
2918 res += per_cpu(prot_inuse, cpu).val[idx];
2919
2920 return res >= 0 ? res : 0;
2921}
2922EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
Pavel Emelyanov70ee1152008-03-31 19:42:16 -07002923#endif
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002924
2925static void assign_proto_idx(struct proto *prot)
2926{
2927 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2928
2929 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
Joe Perchese005d192012-05-16 19:58:40 +00002930 pr_err("PROTO_INUSE_NR exhausted\n");
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07002931 return;
2932 }
2933
2934 set_bit(prot->inuse_idx, proto_inuse_idx);
2935}
2936
2937static void release_proto_idx(struct proto *prot)
2938{
2939 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2940 clear_bit(prot->inuse_idx, proto_inuse_idx);
2941}
2942#else
2943static inline void assign_proto_idx(struct proto *prot)
2944{
2945}
2946
2947static inline void release_proto_idx(struct proto *prot)
2948{
2949}
2950#endif
2951
Eric Dumazet0159dfd2015-03-12 16:44:07 -07002952static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
2953{
2954 if (!rsk_prot)
2955 return;
2956 kfree(rsk_prot->slab_name);
2957 rsk_prot->slab_name = NULL;
Julia Lawalladf78ed2015-09-13 14:15:18 +02002958 kmem_cache_destroy(rsk_prot->slab);
2959 rsk_prot->slab = NULL;
Eric Dumazet0159dfd2015-03-12 16:44:07 -07002960}
2961
2962static int req_prot_init(const struct proto *prot)
2963{
2964 struct request_sock_ops *rsk_prot = prot->rsk_prot;
2965
2966 if (!rsk_prot)
2967 return 0;
2968
2969 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
2970 prot->name);
2971 if (!rsk_prot->slab_name)
2972 return -ENOMEM;
2973
2974 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
2975 rsk_prot->obj_size, 0,
Eric Dumazete96f78a2015-10-03 06:27:28 -07002976 prot->slab_flags, NULL);
Eric Dumazet0159dfd2015-03-12 16:44:07 -07002977
2978 if (!rsk_prot->slab) {
2979 pr_crit("%s: Can't create request sock SLAB cache!\n",
2980 prot->name);
2981 return -ENOMEM;
2982 }
2983 return 0;
2984}
2985
Linus Torvalds1da177e2005-04-16 15:20:36 -07002986int proto_register(struct proto *prot, int alloc_slab)
2987{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002988 if (alloc_slab) {
2989 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
Eric Dumazet271b72c2008-10-29 02:11:14 -07002990 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2991 NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002992
2993 if (prot->slab == NULL) {
Joe Perchese005d192012-05-16 19:58:40 +00002994 pr_crit("%s: Can't create sock SLAB cache!\n",
2995 prot->name);
Pavel Emelyanov60e76632008-03-28 16:39:10 -07002996 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002997 }
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07002998
Eric Dumazet0159dfd2015-03-12 16:44:07 -07002999 if (req_prot_init(prot))
3000 goto out_free_request_sock_slab;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07003001
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08003002 if (prot->twsk_prot != NULL) {
Alexey Dobriyanfaf23422010-02-17 09:34:12 +00003003 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07003004
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08003005 if (prot->twsk_prot->twsk_slab_name == NULL)
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07003006 goto out_free_request_sock_slab;
3007
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08003008 prot->twsk_prot->twsk_slab =
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08003009 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08003010 prot->twsk_prot->twsk_obj_size,
Eric Dumazet3ab5aee2008-11-16 19:40:17 -08003011 0,
Eric Dumazet52db70d2015-04-10 06:07:18 -07003012 prot->slab_flags,
Paul Mundt20c2df82007-07-20 10:11:58 +09003013 NULL);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08003014 if (prot->twsk_prot->twsk_slab == NULL)
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07003015 goto out_free_timewait_sock_slab_name;
3016 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003017 }
3018
Glauber Costa36b77a52011-12-16 00:51:59 +00003019 mutex_lock(&proto_list_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003020 list_add(&prot->node, &proto_list);
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07003021 assign_proto_idx(prot);
Glauber Costa36b77a52011-12-16 00:51:59 +00003022 mutex_unlock(&proto_list_mutex);
Pavel Emelyanovb733c002007-11-07 02:23:38 -08003023 return 0;
3024
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07003025out_free_timewait_sock_slab_name:
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08003026 kfree(prot->twsk_prot->twsk_slab_name);
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07003027out_free_request_sock_slab:
Eric Dumazet0159dfd2015-03-12 16:44:07 -07003028 req_prot_cleanup(prot->rsk_prot);
3029
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07003030 kmem_cache_destroy(prot->slab);
3031 prot->slab = NULL;
Pavel Emelyanovb733c002007-11-07 02:23:38 -08003032out:
3033 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003034}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003035EXPORT_SYMBOL(proto_register);
3036
3037void proto_unregister(struct proto *prot)
3038{
Glauber Costa36b77a52011-12-16 00:51:59 +00003039 mutex_lock(&proto_list_mutex);
Pavel Emelyanov13ff3d62008-03-28 16:38:17 -07003040 release_proto_idx(prot);
Patrick McHardy0a3f4352005-09-06 19:47:50 -07003041 list_del(&prot->node);
Glauber Costa36b77a52011-12-16 00:51:59 +00003042 mutex_unlock(&proto_list_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003043
Julia Lawalladf78ed2015-09-13 14:15:18 +02003044 kmem_cache_destroy(prot->slab);
3045 prot->slab = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003046
Eric Dumazet0159dfd2015-03-12 16:44:07 -07003047 req_prot_cleanup(prot->rsk_prot);
Arnaldo Carvalho de Melo2e6599c2005-06-18 22:46:52 -07003048
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08003049 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08003050 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
Catalin Marinas7e56b5d2008-11-21 16:45:22 -08003051 kfree(prot->twsk_prot->twsk_slab_name);
Arnaldo Carvalho de Melo6d6ee432005-12-13 23:25:19 -08003052 prot->twsk_prot->twsk_slab = NULL;
Arnaldo Carvalho de Melo8feaf0c02005-08-09 20:09:30 -07003053 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003054}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003055EXPORT_SYMBOL(proto_unregister);
3056
3057#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07003058static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
Glauber Costa36b77a52011-12-16 00:51:59 +00003059 __acquires(proto_list_mutex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003060{
Glauber Costa36b77a52011-12-16 00:51:59 +00003061 mutex_lock(&proto_list_mutex);
Pavel Emelianov60f04382007-07-09 13:15:14 -07003062 return seq_list_start_head(&proto_list, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003063}
3064
3065static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3066{
Pavel Emelianov60f04382007-07-09 13:15:14 -07003067 return seq_list_next(v, &proto_list, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003068}
3069
3070static void proto_seq_stop(struct seq_file *seq, void *v)
Glauber Costa36b77a52011-12-16 00:51:59 +00003071 __releases(proto_list_mutex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003072{
Glauber Costa36b77a52011-12-16 00:51:59 +00003073 mutex_unlock(&proto_list_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003074}
3075
3076static char proto_method_implemented(const void *method)
3077{
3078 return method == NULL ? 'n' : 'y';
3079}
Glauber Costa180d8cd2011-12-11 21:47:02 +00003080static long sock_prot_memory_allocated(struct proto *proto)
3081{
Jeffrin Josecb75a362012-04-25 19:17:29 +05303082 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
Glauber Costa180d8cd2011-12-11 21:47:02 +00003083}
3084
3085static char *sock_prot_memory_pressure(struct proto *proto)
3086{
3087 return proto->memory_pressure != NULL ?
3088 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3089}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003090
3091static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3092{
Glauber Costa180d8cd2011-12-11 21:47:02 +00003093
Eric Dumazet8d987e52010-11-09 23:24:26 +00003094 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
Linus Torvalds1da177e2005-04-16 15:20:36 -07003095 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3096 proto->name,
3097 proto->obj_size,
Eric Dumazet14e943d2008-11-19 15:14:01 -08003098 sock_prot_inuse_get(seq_file_net(seq), proto),
Glauber Costa180d8cd2011-12-11 21:47:02 +00003099 sock_prot_memory_allocated(proto),
3100 sock_prot_memory_pressure(proto),
Linus Torvalds1da177e2005-04-16 15:20:36 -07003101 proto->max_header,
3102 proto->slab == NULL ? "no" : "yes",
3103 module_name(proto->owner),
3104 proto_method_implemented(proto->close),
3105 proto_method_implemented(proto->connect),
3106 proto_method_implemented(proto->disconnect),
3107 proto_method_implemented(proto->accept),
3108 proto_method_implemented(proto->ioctl),
3109 proto_method_implemented(proto->init),
3110 proto_method_implemented(proto->destroy),
3111 proto_method_implemented(proto->shutdown),
3112 proto_method_implemented(proto->setsockopt),
3113 proto_method_implemented(proto->getsockopt),
3114 proto_method_implemented(proto->sendmsg),
3115 proto_method_implemented(proto->recvmsg),
3116 proto_method_implemented(proto->sendpage),
3117 proto_method_implemented(proto->bind),
3118 proto_method_implemented(proto->backlog_rcv),
3119 proto_method_implemented(proto->hash),
3120 proto_method_implemented(proto->unhash),
3121 proto_method_implemented(proto->get_port),
3122 proto_method_implemented(proto->enter_memory_pressure));
3123}
3124
3125static int proto_seq_show(struct seq_file *seq, void *v)
3126{
Pavel Emelianov60f04382007-07-09 13:15:14 -07003127 if (v == &proto_list)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003128 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3129 "protocol",
3130 "size",
3131 "sockets",
3132 "memory",
3133 "press",
3134 "maxhdr",
3135 "slab",
3136 "module",
3137 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3138 else
Pavel Emelianov60f04382007-07-09 13:15:14 -07003139 proto_seq_printf(seq, list_entry(v, struct proto, node));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003140 return 0;
3141}
3142
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003143static const struct seq_operations proto_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003144 .start = proto_seq_start,
3145 .next = proto_seq_next,
3146 .stop = proto_seq_stop,
3147 .show = proto_seq_show,
3148};
3149
3150static int proto_seq_open(struct inode *inode, struct file *file)
3151{
Eric Dumazet14e943d2008-11-19 15:14:01 -08003152 return seq_open_net(inode, file, &proto_seq_ops,
3153 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003154}
3155
Arjan van de Ven9a321442007-02-12 00:55:35 -08003156static const struct file_operations proto_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003157 .owner = THIS_MODULE,
3158 .open = proto_seq_open,
3159 .read = seq_read,
3160 .llseek = seq_lseek,
Eric Dumazet14e943d2008-11-19 15:14:01 -08003161 .release = seq_release_net,
3162};
3163
3164static __net_init int proto_init_net(struct net *net)
3165{
Gao fengd4beaa62013-02-18 01:34:54 +00003166 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
Eric Dumazet14e943d2008-11-19 15:14:01 -08003167 return -ENOMEM;
3168
3169 return 0;
3170}
3171
3172static __net_exit void proto_exit_net(struct net *net)
3173{
Gao fengece31ff2013-02-18 01:34:56 +00003174 remove_proc_entry("protocols", net->proc_net);
Eric Dumazet14e943d2008-11-19 15:14:01 -08003175}
3176
3177
3178static __net_initdata struct pernet_operations proto_net_ops = {
3179 .init = proto_init_net,
3180 .exit = proto_exit_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003181};
3182
3183static int __init proto_init(void)
3184{
Eric Dumazet14e943d2008-11-19 15:14:01 -08003185 return register_pernet_subsys(&proto_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003186}
3187
3188subsys_initcall(proto_init);
3189
3190#endif /* PROC_FS */