| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * INET		An implementation of the TCP/IP protocol suite for the LINUX | 
 | 3 |  *		operating system.  INET is implemented using the  BSD Socket | 
 | 4 |  *		interface as the means of communication with the user level. | 
 | 5 |  * | 
 | 6 |  *		Generic socket support routines. Memory allocators, socket lock/release | 
 | 7 |  *		handler for protocols to use and generic option handler. | 
 | 8 |  * | 
 | 9 |  * | 
| Jesper Juhl | 02c30a8 | 2005-05-05 16:16:16 -0700 | [diff] [blame] | 10 |  * Authors:	Ross Biro | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 11 |  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | 
 | 12 |  *		Florian La Roche, <flla@stud.uni-sb.de> | 
 | 13 |  *		Alan Cox, <A.Cox@swansea.ac.uk> | 
 | 14 |  * | 
 | 15 |  * Fixes: | 
 | 16 |  *		Alan Cox	: 	Numerous verify_area() problems | 
 | 17 |  *		Alan Cox	:	Connecting on a connecting socket | 
 | 18 |  *					now returns an error for tcp. | 
 | 19 |  *		Alan Cox	:	sock->protocol is set correctly. | 
 | 20 |  *					and is not sometimes left as 0. | 
 | 21 |  *		Alan Cox	:	connect handles icmp errors on a | 
 | 22 |  *					connect properly. Unfortunately there | 
 | 23 |  *					is a restart syscall nasty there. I | 
 | 24 |  *					can't match BSD without hacking the C | 
 | 25 |  *					library. Ideas urgently sought! | 
 | 26 |  *		Alan Cox	:	Disallow bind() to addresses that are | 
 | 27 |  *					not ours - especially broadcast ones!! | 
 | 28 |  *		Alan Cox	:	Socket 1024 _IS_ ok for users. (fencepost) | 
 | 29 |  *		Alan Cox	:	sock_wfree/sock_rfree don't destroy sockets, | 
 | 30 |  *					instead they leave that for the DESTROY timer. | 
 | 31 |  *		Alan Cox	:	Clean up error flag in accept | 
 | 32 |  *		Alan Cox	:	TCP ack handling is buggy, the DESTROY timer | 
 | 33 |  *					was buggy. Put a remove_sock() in the handler | 
 | 34 |  *					for memory when we hit 0. Also altered the timer | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 35 |  *					code. The ACK stuff can wait and needs major | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 36 |  *					TCP layer surgery. | 
 | 37 |  *		Alan Cox	:	Fixed TCP ack bug, removed remove sock | 
 | 38 |  *					and fixed timer/inet_bh race. | 
 | 39 |  *		Alan Cox	:	Added zapped flag for TCP | 
 | 40 |  *		Alan Cox	:	Move kfree_skb into skbuff.c and tidied up surplus code | 
 | 41 |  *		Alan Cox	:	for new sk_buff allocations wmalloc/rmalloc now call alloc_skb | 
 | 42 |  *		Alan Cox	:	kfree_s calls now are kfree_skbmem so we can track skb resources | 
 | 43 |  *		Alan Cox	:	Supports socket option broadcast now as does udp. Packet and raw need fixing. | 
 | 44 |  *		Alan Cox	:	Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... | 
 | 45 |  *		Rick Sladkey	:	Relaxed UDP rules for matching packets. | 
 | 46 |  *		C.E.Hawkins	:	IFF_PROMISC/SIOCGHWADDR support | 
 | 47 |  *	Pauline Middelink	:	identd support | 
 | 48 |  *		Alan Cox	:	Fixed connect() taking signals I think. | 
 | 49 |  *		Alan Cox	:	SO_LINGER supported | 
 | 50 |  *		Alan Cox	:	Error reporting fixes | 
 | 51 |  *		Anonymous	:	inet_create tidied up (sk->reuse setting) | 
 | 52 |  *		Alan Cox	:	inet sockets don't set sk->type! | 
 | 53 |  *		Alan Cox	:	Split socket option code | 
 | 54 |  *		Alan Cox	:	Callbacks | 
 | 55 |  *		Alan Cox	:	Nagle flag for Charles & Johannes stuff | 
 | 56 |  *		Alex		:	Removed restriction on inet fioctl | 
 | 57 |  *		Alan Cox	:	Splitting INET from NET core | 
 | 58 |  *		Alan Cox	:	Fixed bogus SO_TYPE handling in getsockopt() | 
 | 59 |  *		Adam Caldwell	:	Missing return in SO_DONTROUTE/SO_DEBUG code | 
 | 60 |  *		Alan Cox	:	Split IP from generic code | 
 | 61 |  *		Alan Cox	:	New kfree_skbmem() | 
 | 62 |  *		Alan Cox	:	Make SO_DEBUG superuser only. | 
 | 63 |  *		Alan Cox	:	Allow anyone to clear SO_DEBUG | 
 | 64 |  *					(compatibility fix) | 
 | 65 |  *		Alan Cox	:	Added optimistic memory grabbing for AF_UNIX throughput. | 
 | 66 |  *		Alan Cox	:	Allocator for a socket is settable. | 
 | 67 |  *		Alan Cox	:	SO_ERROR includes soft errors. | 
 | 68 |  *		Alan Cox	:	Allow NULL arguments on some SO_ opts | 
 | 69 |  *		Alan Cox	: 	Generic socket allocation to make hooks | 
 | 70 |  *					easier (suggested by Craig Metz). | 
 | 71 |  *		Michael Pall	:	SO_ERROR returns positive errno again | 
 | 72 |  *              Steve Whitehouse:       Added default destructor to free | 
 | 73 |  *                                      protocol private data. | 
 | 74 |  *              Steve Whitehouse:       Added various other default routines | 
 | 75 |  *                                      common to several socket families. | 
 | 76 |  *              Chris Evans     :       Call suser() check last on F_SETOWN | 
 | 77 |  *		Jay Schulist	:	Added SO_ATTACH_FILTER and SO_DETACH_FILTER. | 
 | 78 |  *		Andi Kleen	:	Add sock_kmalloc()/sock_kfree_s() | 
 | 79 |  *		Andi Kleen	:	Fix write_space callback | 
 | 80 |  *		Chris Evans	:	Security fixes - signedness again | 
 | 81 |  *		Arnaldo C. Melo :       cleanups, use skb_queue_purge | 
 | 82 |  * | 
 | 83 |  * To Fix: | 
 | 84 |  * | 
 | 85 |  * | 
 | 86 |  *		This program is free software; you can redistribute it and/or | 
 | 87 |  *		modify it under the terms of the GNU General Public License | 
 | 88 |  *		as published by the Free Software Foundation; either version | 
 | 89 |  *		2 of the License, or (at your option) any later version. | 
 | 90 |  */ | 
 | 91 |  | 
| Randy Dunlap | 4fc268d | 2006-01-11 12:17:47 -0800 | [diff] [blame] | 92 | #include <linux/capability.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 93 | #include <linux/errno.h> | 
 | 94 | #include <linux/types.h> | 
 | 95 | #include <linux/socket.h> | 
 | 96 | #include <linux/in.h> | 
 | 97 | #include <linux/kernel.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 98 | #include <linux/module.h> | 
 | 99 | #include <linux/proc_fs.h> | 
 | 100 | #include <linux/seq_file.h> | 
 | 101 | #include <linux/sched.h> | 
 | 102 | #include <linux/timer.h> | 
 | 103 | #include <linux/string.h> | 
 | 104 | #include <linux/sockios.h> | 
 | 105 | #include <linux/net.h> | 
 | 106 | #include <linux/mm.h> | 
 | 107 | #include <linux/slab.h> | 
 | 108 | #include <linux/interrupt.h> | 
 | 109 | #include <linux/poll.h> | 
 | 110 | #include <linux/tcp.h> | 
 | 111 | #include <linux/init.h> | 
| Al Viro | a1f8e7f | 2006-10-19 16:08:53 -0400 | [diff] [blame] | 112 | #include <linux/highmem.h> | 
| Eric W. Biederman | 3f551f9 | 2010-06-13 03:28:59 +0000 | [diff] [blame] | 113 | #include <linux/user_namespace.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 114 |  | 
 | 115 | #include <asm/uaccess.h> | 
 | 116 | #include <asm/system.h> | 
 | 117 |  | 
 | 118 | #include <linux/netdevice.h> | 
 | 119 | #include <net/protocol.h> | 
 | 120 | #include <linux/skbuff.h> | 
| Eric W. Biederman | 457c4cb | 2007-09-12 12:01:34 +0200 | [diff] [blame] | 121 | #include <net/net_namespace.h> | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 122 | #include <net/request_sock.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 123 | #include <net/sock.h> | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 124 | #include <linux/net_tstamp.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 125 | #include <net/xfrm.h> | 
 | 126 | #include <linux/ipsec.h> | 
| Herbert Xu | f845172 | 2010-05-24 00:12:34 -0700 | [diff] [blame] | 127 | #include <net/cls_cgroup.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 128 |  | 
 | 129 | #include <linux/filter.h> | 
 | 130 |  | 
 | 131 | #ifdef CONFIG_INET | 
 | 132 | #include <net/tcp.h> | 
 | 133 | #endif | 
 | 134 |  | 
| Ingo Molnar | da21f24 | 2006-07-03 00:25:12 -0700 | [diff] [blame] | 135 | /* | 
 | 136 |  * Each address family might have different locking rules, so we have | 
 | 137 |  * one slock key per address family: | 
 | 138 |  */ | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 139 | static struct lock_class_key af_family_keys[AF_MAX]; | 
 | 140 | static struct lock_class_key af_family_slock_keys[AF_MAX]; | 
 | 141 |  | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 142 | /* | 
 | 143 |  * Make lock validator output more readable. (we pre-construct these | 
 | 144 |  * strings build-time, so that runtime initialization of socket | 
 | 145 |  * locks is fast): | 
 | 146 |  */ | 
| Jan Engelhardt | 36cbd3d | 2009-08-05 10:42:58 -0700 | [diff] [blame] | 147 | static const char *const af_family_key_strings[AF_MAX+1] = { | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 148 |   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     , | 
 | 149 |   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK", | 
 | 150 |   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   , | 
 | 151 |   "sk_lock-AF_X25"   , "sk_lock-AF_INET6"    , "sk_lock-AF_ROSE"     , | 
 | 152 |   "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI"  , "sk_lock-AF_SECURITY" , | 
 | 153 |   "sk_lock-AF_KEY"   , "sk_lock-AF_NETLINK"  , "sk_lock-AF_PACKET"   , | 
 | 154 |   "sk_lock-AF_ASH"   , "sk_lock-AF_ECONET"   , "sk_lock-AF_ATMSVC"   , | 
| Andy Grover | cbd151b | 2009-02-26 23:43:19 -0800 | [diff] [blame] | 155 |   "sk_lock-AF_RDS"   , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     , | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 156 |   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      , | 
| Oliver Hartkopp | cd05acf | 2007-12-16 15:59:24 -0800 | [diff] [blame] | 157 |   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      , | 
| David Howells | 17926a7 | 2007-04-26 15:48:28 -0700 | [diff] [blame] | 158 |   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        , | 
| Remi Denis-Courmont | bce7b15 | 2008-09-22 19:51:15 -0700 | [diff] [blame] | 159 |   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   , | 
| Alex Lorca | fe33147 | 2010-06-07 01:01:22 -0700 | [diff] [blame] | 160 |   "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , | 
| Remi Denis-Courmont | bce7b15 | 2008-09-22 19:51:15 -0700 | [diff] [blame] | 161 |   "sk_lock-AF_MAX" | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 162 | }; | 
| Jan Engelhardt | 36cbd3d | 2009-08-05 10:42:58 -0700 | [diff] [blame] | 163 | static const char *const af_family_slock_key_strings[AF_MAX+1] = { | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 164 |   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     , | 
 | 165 |   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK", | 
 | 166 |   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   , | 
 | 167 |   "slock-AF_X25"   , "slock-AF_INET6"    , "slock-AF_ROSE"     , | 
 | 168 |   "slock-AF_DECnet", "slock-AF_NETBEUI"  , "slock-AF_SECURITY" , | 
 | 169 |   "slock-AF_KEY"   , "slock-AF_NETLINK"  , "slock-AF_PACKET"   , | 
 | 170 |   "slock-AF_ASH"   , "slock-AF_ECONET"   , "slock-AF_ATMSVC"   , | 
| Andy Grover | cbd151b | 2009-02-26 23:43:19 -0800 | [diff] [blame] | 171 |   "slock-AF_RDS"   , "slock-AF_SNA"      , "slock-AF_IRDA"     , | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 172 |   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      , | 
| Oliver Hartkopp | cd05acf | 2007-12-16 15:59:24 -0800 | [diff] [blame] | 173 |   "slock-27"       , "slock-28"          , "slock-AF_CAN"      , | 
| David Howells | 17926a7 | 2007-04-26 15:48:28 -0700 | [diff] [blame] | 174 |   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     , | 
| Remi Denis-Courmont | bce7b15 | 2008-09-22 19:51:15 -0700 | [diff] [blame] | 175 |   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   , | 
| Alex Lorca | fe33147 | 2010-06-07 01:01:22 -0700 | [diff] [blame] | 176 |   "slock-AF_IEEE802154", "slock-AF_CAIF" , | 
| Remi Denis-Courmont | bce7b15 | 2008-09-22 19:51:15 -0700 | [diff] [blame] | 177 |   "slock-AF_MAX" | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 178 | }; | 
| Jan Engelhardt | 36cbd3d | 2009-08-05 10:42:58 -0700 | [diff] [blame] | 179 | static const char *const af_family_clock_key_strings[AF_MAX+1] = { | 
| Peter Zijlstra | 443aef0 | 2007-07-19 01:49:00 -0700 | [diff] [blame] | 180 |   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     , | 
 | 181 |   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK", | 
 | 182 |   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   , | 
 | 183 |   "clock-AF_X25"   , "clock-AF_INET6"    , "clock-AF_ROSE"     , | 
 | 184 |   "clock-AF_DECnet", "clock-AF_NETBEUI"  , "clock-AF_SECURITY" , | 
 | 185 |   "clock-AF_KEY"   , "clock-AF_NETLINK"  , "clock-AF_PACKET"   , | 
 | 186 |   "clock-AF_ASH"   , "clock-AF_ECONET"   , "clock-AF_ATMSVC"   , | 
| Andy Grover | cbd151b | 2009-02-26 23:43:19 -0800 | [diff] [blame] | 187 |   "clock-AF_RDS"   , "clock-AF_SNA"      , "clock-AF_IRDA"     , | 
| Peter Zijlstra | 443aef0 | 2007-07-19 01:49:00 -0700 | [diff] [blame] | 188 |   "clock-AF_PPPOX" , "clock-AF_WANPIPE"  , "clock-AF_LLC"      , | 
| Oliver Hartkopp | b4942af | 2008-07-23 14:06:04 -0700 | [diff] [blame] | 189 |   "clock-27"       , "clock-28"          , "clock-AF_CAN"      , | 
| David Howells | e51f802 | 2007-07-21 19:30:16 -0700 | [diff] [blame] | 190 |   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     , | 
| Remi Denis-Courmont | bce7b15 | 2008-09-22 19:51:15 -0700 | [diff] [blame] | 191 |   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   , | 
| Alex Lorca | fe33147 | 2010-06-07 01:01:22 -0700 | [diff] [blame] | 192 |   "clock-AF_IEEE802154", "clock-AF_CAIF" , | 
| Remi Denis-Courmont | bce7b15 | 2008-09-22 19:51:15 -0700 | [diff] [blame] | 193 |   "clock-AF_MAX" | 
| Peter Zijlstra | 443aef0 | 2007-07-19 01:49:00 -0700 | [diff] [blame] | 194 | }; | 
| Ingo Molnar | da21f24 | 2006-07-03 00:25:12 -0700 | [diff] [blame] | 195 |  | 
 | 196 | /* | 
 | 197 |  * sk_callback_lock locking rules are per-address-family, | 
 | 198 |  * so split the lock classes by using a per-AF key: | 
 | 199 |  */ | 
 | 200 | static struct lock_class_key af_callback_keys[AF_MAX]; | 
 | 201 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 202 | /* Take into consideration the size of the struct sk_buff overhead in the | 
 | 203 |  * determination of these values, since that is non-constant across | 
 | 204 |  * platforms.  This makes socket queueing behavior and performance | 
 | 205 |  * not depend upon such differences. | 
 | 206 |  */ | 
 | 207 | #define _SK_MEM_PACKETS		256 | 
 | 208 | #define _SK_MEM_OVERHEAD	(sizeof(struct sk_buff) + 256) | 
 | 209 | #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) | 
 | 210 | #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) | 
 | 211 |  | 
 | 212 | /* Run time adjustable parameters. */ | 
| Brian Haley | ab32ea5 | 2006-09-22 14:15:41 -0700 | [diff] [blame] | 213 | __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; | 
 | 214 | __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; | 
 | 215 | __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; | 
 | 216 | __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 217 |  | 
 | 218 | /* Maximal space eaten by iovec or ancilliary data plus some space */ | 
| Brian Haley | ab32ea5 | 2006-09-22 14:15:41 -0700 | [diff] [blame] | 219 | int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 220 | EXPORT_SYMBOL(sysctl_optmem_max); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 |  | 
| Herbert Xu | f845172 | 2010-05-24 00:12:34 -0700 | [diff] [blame] | 222 | #if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP) | 
 | 223 | int net_cls_subsys_id = -1; | 
 | 224 | EXPORT_SYMBOL_GPL(net_cls_subsys_id); | 
 | 225 | #endif | 
 | 226 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 227 | static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) | 
 | 228 | { | 
 | 229 | 	struct timeval tv; | 
 | 230 |  | 
 | 231 | 	if (optlen < sizeof(tv)) | 
 | 232 | 		return -EINVAL; | 
 | 233 | 	if (copy_from_user(&tv, optval, sizeof(tv))) | 
 | 234 | 		return -EFAULT; | 
| Vasily Averin | ba78073 | 2007-05-24 16:58:54 -0700 | [diff] [blame] | 235 | 	if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) | 
 | 236 | 		return -EDOM; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 237 |  | 
| Vasily Averin | ba78073 | 2007-05-24 16:58:54 -0700 | [diff] [blame] | 238 | 	if (tv.tv_sec < 0) { | 
| Andrew Morton | 6f11df8 | 2007-07-09 13:16:00 -0700 | [diff] [blame] | 239 | 		static int warned __read_mostly; | 
 | 240 |  | 
| Vasily Averin | ba78073 | 2007-05-24 16:58:54 -0700 | [diff] [blame] | 241 | 		*timeo_p = 0; | 
| Ilpo Järvinen | 50aab54 | 2008-05-02 16:20:10 -0700 | [diff] [blame] | 242 | 		if (warned < 10 && net_ratelimit()) { | 
| Vasily Averin | ba78073 | 2007-05-24 16:58:54 -0700 | [diff] [blame] | 243 | 			warned++; | 
 | 244 | 			printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " | 
 | 245 | 			       "tries to set negative timeout\n", | 
| Pavel Emelyanov | ba25f9d | 2007-10-18 23:40:40 -0700 | [diff] [blame] | 246 | 				current->comm, task_pid_nr(current)); | 
| Ilpo Järvinen | 50aab54 | 2008-05-02 16:20:10 -0700 | [diff] [blame] | 247 | 		} | 
| Vasily Averin | ba78073 | 2007-05-24 16:58:54 -0700 | [diff] [blame] | 248 | 		return 0; | 
 | 249 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 250 | 	*timeo_p = MAX_SCHEDULE_TIMEOUT; | 
 | 251 | 	if (tv.tv_sec == 0 && tv.tv_usec == 0) | 
 | 252 | 		return 0; | 
 | 253 | 	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1)) | 
 | 254 | 		*timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ); | 
 | 255 | 	return 0; | 
 | 256 | } | 
 | 257 |  | 
 | 258 | static void sock_warn_obsolete_bsdism(const char *name) | 
 | 259 | { | 
 | 260 | 	static int warned; | 
 | 261 | 	static char warncomm[TASK_COMM_LEN]; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 262 | 	if (strcmp(warncomm, current->comm) && warned < 5) { | 
 | 263 | 		strcpy(warncomm,  current->comm); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 264 | 		printk(KERN_WARNING "process `%s' is using obsolete " | 
 | 265 | 		       "%s SO_BSDCOMPAT\n", warncomm, name); | 
 | 266 | 		warned++; | 
 | 267 | 	} | 
 | 268 | } | 
 | 269 |  | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 270 | static void sock_disable_timestamp(struct sock *sk, int flag) | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 271 | { | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 272 | 	if (sock_flag(sk, flag)) { | 
 | 273 | 		sock_reset_flag(sk, flag); | 
 | 274 | 		if (!sock_flag(sk, SOCK_TIMESTAMP) && | 
 | 275 | 		    !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) { | 
 | 276 | 			net_disable_timestamp(); | 
 | 277 | 		} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 278 | 	} | 
 | 279 | } | 
 | 280 |  | 
 | 281 |  | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 282 | int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | 
 | 283 | { | 
| Eric Dumazet | 766e9037 | 2009-10-14 20:40:11 -0700 | [diff] [blame] | 284 | 	int err; | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 285 | 	int skb_len; | 
| Neil Horman | 3b88578 | 2009-10-12 13:26:31 -0700 | [diff] [blame] | 286 | 	unsigned long flags; | 
 | 287 | 	struct sk_buff_head *list = &sk->sk_receive_queue; | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 288 |  | 
| Rami Rosen | 9ee6b7f | 2008-05-14 03:50:03 -0700 | [diff] [blame] | 289 | 	/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 290 | 	   number of warnings when compiling with -W --ANK | 
 | 291 | 	 */ | 
 | 292 | 	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= | 
 | 293 | 	    (unsigned)sk->sk_rcvbuf) { | 
| Eric Dumazet | 766e9037 | 2009-10-14 20:40:11 -0700 | [diff] [blame] | 294 | 		atomic_inc(&sk->sk_drops); | 
 | 295 | 		return -ENOMEM; | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 296 | 	} | 
 | 297 |  | 
| Dmitry Mishin | fda9ef5 | 2006-08-31 15:28:39 -0700 | [diff] [blame] | 298 | 	err = sk_filter(sk, skb); | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 299 | 	if (err) | 
| Eric Dumazet | 766e9037 | 2009-10-14 20:40:11 -0700 | [diff] [blame] | 300 | 		return err; | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 301 |  | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 302 | 	if (!sk_rmem_schedule(sk, skb->truesize)) { | 
| Eric Dumazet | 766e9037 | 2009-10-14 20:40:11 -0700 | [diff] [blame] | 303 | 		atomic_inc(&sk->sk_drops); | 
 | 304 | 		return -ENOBUFS; | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 305 | 	} | 
 | 306 |  | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 307 | 	skb->dev = NULL; | 
 | 308 | 	skb_set_owner_r(skb, sk); | 
| David S. Miller | 49ad959 | 2008-12-17 22:11:38 -0800 | [diff] [blame] | 309 |  | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 310 | 	/* Cache the SKB length before we tack it onto the receive | 
 | 311 | 	 * queue.  Once it is added it no longer belongs to us and | 
 | 312 | 	 * may be freed by other threads of control pulling packets | 
 | 313 | 	 * from the queue. | 
 | 314 | 	 */ | 
 | 315 | 	skb_len = skb->len; | 
 | 316 |  | 
| Eric Dumazet | 7fee226 | 2010-05-11 23:19:48 +0000 | [diff] [blame] | 317 | 	/* we escape from rcu protected region, make sure we dont leak | 
 | 318 | 	 * a norefcounted dst | 
 | 319 | 	 */ | 
 | 320 | 	skb_dst_force(skb); | 
 | 321 |  | 
| Neil Horman | 3b88578 | 2009-10-12 13:26:31 -0700 | [diff] [blame] | 322 | 	spin_lock_irqsave(&list->lock, flags); | 
 | 323 | 	skb->dropcount = atomic_read(&sk->sk_drops); | 
 | 324 | 	__skb_queue_tail(list, skb); | 
 | 325 | 	spin_unlock_irqrestore(&list->lock, flags); | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 326 |  | 
 | 327 | 	if (!sock_flag(sk, SOCK_DEAD)) | 
 | 328 | 		sk->sk_data_ready(sk, skb_len); | 
| Eric Dumazet | 766e9037 | 2009-10-14 20:40:11 -0700 | [diff] [blame] | 329 | 	return 0; | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 330 | } | 
 | 331 | EXPORT_SYMBOL(sock_queue_rcv_skb); | 
 | 332 |  | 
| Arnaldo Carvalho de Melo | 58a5a7b | 2006-11-16 14:06:06 -0200 | [diff] [blame] | 333 | int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 334 | { | 
 | 335 | 	int rc = NET_RX_SUCCESS; | 
 | 336 |  | 
| Dmitry Mishin | fda9ef5 | 2006-08-31 15:28:39 -0700 | [diff] [blame] | 337 | 	if (sk_filter(sk, skb)) | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 338 | 		goto discard_and_relse; | 
 | 339 |  | 
 | 340 | 	skb->dev = NULL; | 
 | 341 |  | 
| Eric Dumazet | c377411 | 2010-04-27 15:13:20 -0700 | [diff] [blame] | 342 | 	if (sk_rcvqueues_full(sk, skb)) { | 
 | 343 | 		atomic_inc(&sk->sk_drops); | 
 | 344 | 		goto discard_and_relse; | 
 | 345 | 	} | 
| Arnaldo Carvalho de Melo | 58a5a7b | 2006-11-16 14:06:06 -0200 | [diff] [blame] | 346 | 	if (nested) | 
 | 347 | 		bh_lock_sock_nested(sk); | 
 | 348 | 	else | 
 | 349 | 		bh_lock_sock(sk); | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 350 | 	if (!sock_owned_by_user(sk)) { | 
 | 351 | 		/* | 
 | 352 | 		 * trylock + unlock semantics: | 
 | 353 | 		 */ | 
 | 354 | 		mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); | 
 | 355 |  | 
| Peter Zijlstra | c57943a | 2008-10-07 14:18:42 -0700 | [diff] [blame] | 356 | 		rc = sk_backlog_rcv(sk, skb); | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 357 |  | 
 | 358 | 		mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); | 
| Zhu Yi | a3a858f | 2010-03-04 18:01:47 +0000 | [diff] [blame] | 359 | 	} else if (sk_add_backlog(sk, skb)) { | 
| Zhu Yi | 8eae939 | 2010-03-04 18:01:40 +0000 | [diff] [blame] | 360 | 		bh_unlock_sock(sk); | 
 | 361 | 		atomic_inc(&sk->sk_drops); | 
 | 362 | 		goto discard_and_relse; | 
 | 363 | 	} | 
 | 364 |  | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 365 | 	bh_unlock_sock(sk); | 
 | 366 | out: | 
 | 367 | 	sock_put(sk); | 
 | 368 | 	return rc; | 
 | 369 | discard_and_relse: | 
 | 370 | 	kfree_skb(skb); | 
 | 371 | 	goto out; | 
 | 372 | } | 
 | 373 | EXPORT_SYMBOL(sk_receive_skb); | 
 | 374 |  | 
| Krishna Kumar | ea94ff3 | 2009-10-19 23:46:45 +0000 | [diff] [blame] | 375 | void sk_reset_txq(struct sock *sk) | 
 | 376 | { | 
 | 377 | 	sk_tx_queue_clear(sk); | 
 | 378 | } | 
 | 379 | EXPORT_SYMBOL(sk_reset_txq); | 
 | 380 |  | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 381 | struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) | 
 | 382 | { | 
| Eric Dumazet | b6c6712 | 2010-04-08 23:03:29 +0000 | [diff] [blame] | 383 | 	struct dst_entry *dst = __sk_dst_get(sk); | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 384 |  | 
 | 385 | 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | 
| Krishna Kumar | e022f0b | 2009-10-19 23:46:20 +0000 | [diff] [blame] | 386 | 		sk_tx_queue_clear(sk); | 
| Eric Dumazet | b6c6712 | 2010-04-08 23:03:29 +0000 | [diff] [blame] | 387 | 		rcu_assign_pointer(sk->sk_dst_cache, NULL); | 
| Denis Vlasenko | f0088a5 | 2006-03-28 01:08:21 -0800 | [diff] [blame] | 388 | 		dst_release(dst); | 
 | 389 | 		return NULL; | 
 | 390 | 	} | 
 | 391 |  | 
 | 392 | 	return dst; | 
 | 393 | } | 
 | 394 | EXPORT_SYMBOL(__sk_dst_check); | 
 | 395 |  | 
 | 396 | struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) | 
 | 397 | { | 
 | 398 | 	struct dst_entry *dst = sk_dst_get(sk); | 
 | 399 |  | 
 | 400 | 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { | 
 | 401 | 		sk_dst_reset(sk); | 
 | 402 | 		dst_release(dst); | 
 | 403 | 		return NULL; | 
 | 404 | 	} | 
 | 405 |  | 
 | 406 | 	return dst; | 
 | 407 | } | 
 | 408 | EXPORT_SYMBOL(sk_dst_check); | 
 | 409 |  | 
| David S. Miller | 4878809 | 2007-09-14 16:41:03 -0700 | [diff] [blame] | 410 | static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) | 
 | 411 | { | 
 | 412 | 	int ret = -ENOPROTOOPT; | 
 | 413 | #ifdef CONFIG_NETDEVICES | 
| YOSHIFUJI Hideaki | 3b1e0a6 | 2008-03-26 02:26:21 +0900 | [diff] [blame] | 414 | 	struct net *net = sock_net(sk); | 
| David S. Miller | 4878809 | 2007-09-14 16:41:03 -0700 | [diff] [blame] | 415 | 	char devname[IFNAMSIZ]; | 
 | 416 | 	int index; | 
 | 417 |  | 
 | 418 | 	/* Sorry... */ | 
 | 419 | 	ret = -EPERM; | 
 | 420 | 	if (!capable(CAP_NET_RAW)) | 
 | 421 | 		goto out; | 
 | 422 |  | 
 | 423 | 	ret = -EINVAL; | 
 | 424 | 	if (optlen < 0) | 
 | 425 | 		goto out; | 
 | 426 |  | 
 | 427 | 	/* Bind this socket to a particular device like "eth0", | 
 | 428 | 	 * as specified in the passed interface name. If the | 
 | 429 | 	 * name is "" or the option length is zero the socket | 
 | 430 | 	 * is not bound. | 
 | 431 | 	 */ | 
 | 432 | 	if (optlen > IFNAMSIZ - 1) | 
 | 433 | 		optlen = IFNAMSIZ - 1; | 
 | 434 | 	memset(devname, 0, sizeof(devname)); | 
 | 435 |  | 
 | 436 | 	ret = -EFAULT; | 
 | 437 | 	if (copy_from_user(devname, optval, optlen)) | 
 | 438 | 		goto out; | 
 | 439 |  | 
| David S. Miller | 000ba2e | 2009-11-05 22:37:11 -0800 | [diff] [blame] | 440 | 	index = 0; | 
 | 441 | 	if (devname[0] != '\0') { | 
| Eric Dumazet | bf8e56b | 2009-11-05 21:03:39 -0800 | [diff] [blame] | 442 | 		struct net_device *dev; | 
| David S. Miller | 4878809 | 2007-09-14 16:41:03 -0700 | [diff] [blame] | 443 |  | 
| Eric Dumazet | bf8e56b | 2009-11-05 21:03:39 -0800 | [diff] [blame] | 444 | 		rcu_read_lock(); | 
 | 445 | 		dev = dev_get_by_name_rcu(net, devname); | 
 | 446 | 		if (dev) | 
 | 447 | 			index = dev->ifindex; | 
 | 448 | 		rcu_read_unlock(); | 
| David S. Miller | 4878809 | 2007-09-14 16:41:03 -0700 | [diff] [blame] | 449 | 		ret = -ENODEV; | 
 | 450 | 		if (!dev) | 
 | 451 | 			goto out; | 
| David S. Miller | 4878809 | 2007-09-14 16:41:03 -0700 | [diff] [blame] | 452 | 	} | 
 | 453 |  | 
 | 454 | 	lock_sock(sk); | 
 | 455 | 	sk->sk_bound_dev_if = index; | 
 | 456 | 	sk_dst_reset(sk); | 
 | 457 | 	release_sock(sk); | 
 | 458 |  | 
 | 459 | 	ret = 0; | 
 | 460 |  | 
 | 461 | out: | 
 | 462 | #endif | 
 | 463 |  | 
 | 464 | 	return ret; | 
 | 465 | } | 
 | 466 |  | 
| Pavel Emelyanov | c0ef877 | 2007-11-15 03:03:19 -0800 | [diff] [blame] | 467 | static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) | 
 | 468 | { | 
 | 469 | 	if (valbool) | 
 | 470 | 		sock_set_flag(sk, bit); | 
 | 471 | 	else | 
 | 472 | 		sock_reset_flag(sk, bit); | 
 | 473 | } | 
 | 474 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 475 | /* | 
 | 476 |  *	This is meant for all protocols to use and covers goings on | 
 | 477 |  *	at the socket level. Everything here is generic. | 
 | 478 |  */ | 
 | 479 |  | 
 | 480 | int sock_setsockopt(struct socket *sock, int level, int optname, | 
| David S. Miller | b705884 | 2009-09-30 16:12:20 -0700 | [diff] [blame] | 481 | 		    char __user *optval, unsigned int optlen) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 482 | { | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 483 | 	struct sock *sk = sock->sk; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 484 | 	int val; | 
 | 485 | 	int valbool; | 
 | 486 | 	struct linger ling; | 
 | 487 | 	int ret = 0; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 488 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 489 | 	/* | 
 | 490 | 	 *	Options without arguments | 
 | 491 | 	 */ | 
 | 492 |  | 
| David S. Miller | 4878809 | 2007-09-14 16:41:03 -0700 | [diff] [blame] | 493 | 	if (optname == SO_BINDTODEVICE) | 
 | 494 | 		return sock_bindtodevice(sk, optval, optlen); | 
 | 495 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 496 | 	if (optlen < sizeof(int)) | 
 | 497 | 		return -EINVAL; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 498 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 499 | 	if (get_user(val, (int __user *)optval)) | 
 | 500 | 		return -EFAULT; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 501 |  | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 502 | 	valbool = val ? 1 : 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 503 |  | 
 | 504 | 	lock_sock(sk); | 
 | 505 |  | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 506 | 	switch (optname) { | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 507 | 	case SO_DEBUG: | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 508 | 		if (val && !capable(CAP_NET_ADMIN)) | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 509 | 			ret = -EACCES; | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 510 | 		else | 
| Pavel Emelyanov | c0ef877 | 2007-11-15 03:03:19 -0800 | [diff] [blame] | 511 | 			sock_valbool_flag(sk, SOCK_DBG, valbool); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 512 | 		break; | 
 | 513 | 	case SO_REUSEADDR: | 
 | 514 | 		sk->sk_reuse = valbool; | 
 | 515 | 		break; | 
 | 516 | 	case SO_TYPE: | 
| Jan Engelhardt | 49c794e | 2009-08-04 07:28:28 +0000 | [diff] [blame] | 517 | 	case SO_PROTOCOL: | 
| Jan Engelhardt | 0d6038e | 2009-08-04 07:28:29 +0000 | [diff] [blame] | 518 | 	case SO_DOMAIN: | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 519 | 	case SO_ERROR: | 
 | 520 | 		ret = -ENOPROTOOPT; | 
 | 521 | 		break; | 
 | 522 | 	case SO_DONTROUTE: | 
| Pavel Emelyanov | c0ef877 | 2007-11-15 03:03:19 -0800 | [diff] [blame] | 523 | 		sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 524 | 		break; | 
 | 525 | 	case SO_BROADCAST: | 
 | 526 | 		sock_valbool_flag(sk, SOCK_BROADCAST, valbool); | 
 | 527 | 		break; | 
 | 528 | 	case SO_SNDBUF: | 
 | 529 | 		/* Don't error on this BSD doesn't and if you think | 
 | 530 | 		   about it this is right. Otherwise apps have to | 
 | 531 | 		   play 'guess the biggest size' games. RCVBUF/SNDBUF | 
 | 532 | 		   are treated in BSD as hints */ | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 533 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 534 | 		if (val > sysctl_wmem_max) | 
 | 535 | 			val = sysctl_wmem_max; | 
| Patrick McHardy | b0573de | 2005-08-09 19:30:51 -0700 | [diff] [blame] | 536 | set_sndbuf: | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 537 | 		sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | 
 | 538 | 		if ((val * 2) < SOCK_MIN_SNDBUF) | 
 | 539 | 			sk->sk_sndbuf = SOCK_MIN_SNDBUF; | 
 | 540 | 		else | 
 | 541 | 			sk->sk_sndbuf = val * 2; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 542 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 543 | 		/* | 
 | 544 | 		 *	Wake up sending tasks if we | 
 | 545 | 		 *	upped the value. | 
 | 546 | 		 */ | 
 | 547 | 		sk->sk_write_space(sk); | 
 | 548 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 549 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 550 | 	case SO_SNDBUFFORCE: | 
 | 551 | 		if (!capable(CAP_NET_ADMIN)) { | 
 | 552 | 			ret = -EPERM; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 553 | 			break; | 
 | 554 | 		} | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 555 | 		goto set_sndbuf; | 
 | 556 |  | 
 | 557 | 	case SO_RCVBUF: | 
 | 558 | 		/* Don't error on this BSD doesn't and if you think | 
 | 559 | 		   about it this is right. Otherwise apps have to | 
 | 560 | 		   play 'guess the biggest size' games. RCVBUF/SNDBUF | 
 | 561 | 		   are treated in BSD as hints */ | 
 | 562 |  | 
 | 563 | 		if (val > sysctl_rmem_max) | 
 | 564 | 			val = sysctl_rmem_max; | 
 | 565 | set_rcvbuf: | 
 | 566 | 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | 
 | 567 | 		/* | 
 | 568 | 		 * We double it on the way in to account for | 
 | 569 | 		 * "struct sk_buff" etc. overhead.   Applications | 
 | 570 | 		 * assume that the SO_RCVBUF setting they make will | 
 | 571 | 		 * allow that much actual data to be received on that | 
 | 572 | 		 * socket. | 
 | 573 | 		 * | 
 | 574 | 		 * Applications are unaware that "struct sk_buff" and | 
 | 575 | 		 * other overheads allocate from the receive buffer | 
 | 576 | 		 * during socket buffer allocation. | 
 | 577 | 		 * | 
 | 578 | 		 * And after considering the possible alternatives, | 
 | 579 | 		 * returning the value we actually used in getsockopt | 
 | 580 | 		 * is the most desirable behavior. | 
 | 581 | 		 */ | 
 | 582 | 		if ((val * 2) < SOCK_MIN_RCVBUF) | 
 | 583 | 			sk->sk_rcvbuf = SOCK_MIN_RCVBUF; | 
 | 584 | 		else | 
 | 585 | 			sk->sk_rcvbuf = val * 2; | 
 | 586 | 		break; | 
 | 587 |  | 
 | 588 | 	case SO_RCVBUFFORCE: | 
 | 589 | 		if (!capable(CAP_NET_ADMIN)) { | 
 | 590 | 			ret = -EPERM; | 
 | 591 | 			break; | 
 | 592 | 		} | 
 | 593 | 		goto set_rcvbuf; | 
 | 594 |  | 
 | 595 | 	case SO_KEEPALIVE: | 
 | 596 | #ifdef CONFIG_INET | 
 | 597 | 		if (sk->sk_protocol == IPPROTO_TCP) | 
 | 598 | 			tcp_set_keepalive(sk, valbool); | 
 | 599 | #endif | 
 | 600 | 		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); | 
 | 601 | 		break; | 
 | 602 |  | 
 | 603 | 	case SO_OOBINLINE: | 
 | 604 | 		sock_valbool_flag(sk, SOCK_URGINLINE, valbool); | 
 | 605 | 		break; | 
 | 606 |  | 
 | 607 | 	case SO_NO_CHECK: | 
 | 608 | 		sk->sk_no_check = valbool; | 
 | 609 | 		break; | 
 | 610 |  | 
 | 611 | 	case SO_PRIORITY: | 
 | 612 | 		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) | 
 | 613 | 			sk->sk_priority = val; | 
 | 614 | 		else | 
 | 615 | 			ret = -EPERM; | 
 | 616 | 		break; | 
 | 617 |  | 
 | 618 | 	case SO_LINGER: | 
 | 619 | 		if (optlen < sizeof(ling)) { | 
 | 620 | 			ret = -EINVAL;	/* 1003.1g */ | 
 | 621 | 			break; | 
 | 622 | 		} | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 623 | 		if (copy_from_user(&ling, optval, sizeof(ling))) { | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 624 | 			ret = -EFAULT; | 
 | 625 | 			break; | 
 | 626 | 		} | 
 | 627 | 		if (!ling.l_onoff) | 
 | 628 | 			sock_reset_flag(sk, SOCK_LINGER); | 
 | 629 | 		else { | 
 | 630 | #if (BITS_PER_LONG == 32) | 
 | 631 | 			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ) | 
 | 632 | 				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT; | 
 | 633 | 			else | 
 | 634 | #endif | 
 | 635 | 				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ; | 
 | 636 | 			sock_set_flag(sk, SOCK_LINGER); | 
 | 637 | 		} | 
 | 638 | 		break; | 
 | 639 |  | 
 | 640 | 	case SO_BSDCOMPAT: | 
 | 641 | 		sock_warn_obsolete_bsdism("setsockopt"); | 
 | 642 | 		break; | 
 | 643 |  | 
 | 644 | 	case SO_PASSCRED: | 
 | 645 | 		if (valbool) | 
 | 646 | 			set_bit(SOCK_PASSCRED, &sock->flags); | 
 | 647 | 		else | 
 | 648 | 			clear_bit(SOCK_PASSCRED, &sock->flags); | 
 | 649 | 		break; | 
 | 650 |  | 
 | 651 | 	case SO_TIMESTAMP: | 
| Eric Dumazet | 92f37fd | 2007-03-25 22:14:49 -0700 | [diff] [blame] | 652 | 	case SO_TIMESTAMPNS: | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 653 | 		if (valbool)  { | 
| Eric Dumazet | 92f37fd | 2007-03-25 22:14:49 -0700 | [diff] [blame] | 654 | 			if (optname == SO_TIMESTAMP) | 
 | 655 | 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS); | 
 | 656 | 			else | 
 | 657 | 				sock_set_flag(sk, SOCK_RCVTSTAMPNS); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 658 | 			sock_set_flag(sk, SOCK_RCVTSTAMP); | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 659 | 			sock_enable_timestamp(sk, SOCK_TIMESTAMP); | 
| Eric Dumazet | 92f37fd | 2007-03-25 22:14:49 -0700 | [diff] [blame] | 660 | 		} else { | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 661 | 			sock_reset_flag(sk, SOCK_RCVTSTAMP); | 
| Eric Dumazet | 92f37fd | 2007-03-25 22:14:49 -0700 | [diff] [blame] | 662 | 			sock_reset_flag(sk, SOCK_RCVTSTAMPNS); | 
 | 663 | 		} | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 664 | 		break; | 
 | 665 |  | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 666 | 	case SO_TIMESTAMPING: | 
 | 667 | 		if (val & ~SOF_TIMESTAMPING_MASK) { | 
| Rémi Denis-Courmont | f249fb7 | 2009-07-20 00:47:04 +0000 | [diff] [blame] | 668 | 			ret = -EINVAL; | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 669 | 			break; | 
 | 670 | 		} | 
 | 671 | 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, | 
 | 672 | 				  val & SOF_TIMESTAMPING_TX_HARDWARE); | 
 | 673 | 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, | 
 | 674 | 				  val & SOF_TIMESTAMPING_TX_SOFTWARE); | 
 | 675 | 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, | 
 | 676 | 				  val & SOF_TIMESTAMPING_RX_HARDWARE); | 
 | 677 | 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE) | 
 | 678 | 			sock_enable_timestamp(sk, | 
 | 679 | 					      SOCK_TIMESTAMPING_RX_SOFTWARE); | 
 | 680 | 		else | 
 | 681 | 			sock_disable_timestamp(sk, | 
 | 682 | 					       SOCK_TIMESTAMPING_RX_SOFTWARE); | 
 | 683 | 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, | 
 | 684 | 				  val & SOF_TIMESTAMPING_SOFTWARE); | 
 | 685 | 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, | 
 | 686 | 				  val & SOF_TIMESTAMPING_SYS_HARDWARE); | 
 | 687 | 		sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE, | 
 | 688 | 				  val & SOF_TIMESTAMPING_RAW_HARDWARE); | 
 | 689 | 		break; | 
 | 690 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 691 | 	case SO_RCVLOWAT: | 
 | 692 | 		if (val < 0) | 
 | 693 | 			val = INT_MAX; | 
 | 694 | 		sk->sk_rcvlowat = val ? : 1; | 
 | 695 | 		break; | 
 | 696 |  | 
 | 697 | 	case SO_RCVTIMEO: | 
 | 698 | 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen); | 
 | 699 | 		break; | 
 | 700 |  | 
 | 701 | 	case SO_SNDTIMEO: | 
 | 702 | 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); | 
 | 703 | 		break; | 
 | 704 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 705 | 	case SO_ATTACH_FILTER: | 
 | 706 | 		ret = -EINVAL; | 
 | 707 | 		if (optlen == sizeof(struct sock_fprog)) { | 
 | 708 | 			struct sock_fprog fprog; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 709 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 710 | 			ret = -EFAULT; | 
 | 711 | 			if (copy_from_user(&fprog, optval, sizeof(fprog))) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 712 | 				break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 713 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 714 | 			ret = sk_attach_filter(&fprog, sk); | 
 | 715 | 		} | 
 | 716 | 		break; | 
 | 717 |  | 
 | 718 | 	case SO_DETACH_FILTER: | 
| Pavel Emelyanov | 55b3332 | 2007-10-17 21:21:26 -0700 | [diff] [blame] | 719 | 		ret = sk_detach_filter(sk); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 720 | 		break; | 
 | 721 |  | 
 | 722 | 	case SO_PASSSEC: | 
 | 723 | 		if (valbool) | 
 | 724 | 			set_bit(SOCK_PASSSEC, &sock->flags); | 
 | 725 | 		else | 
 | 726 | 			clear_bit(SOCK_PASSSEC, &sock->flags); | 
 | 727 | 		break; | 
| Laszlo Attila Toth | 4a19ec5 | 2008-01-30 19:08:16 -0800 | [diff] [blame] | 728 | 	case SO_MARK: | 
 | 729 | 		if (!capable(CAP_NET_ADMIN)) | 
 | 730 | 			ret = -EPERM; | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 731 | 		else | 
| Laszlo Attila Toth | 4a19ec5 | 2008-01-30 19:08:16 -0800 | [diff] [blame] | 732 | 			sk->sk_mark = val; | 
| Laszlo Attila Toth | 4a19ec5 | 2008-01-30 19:08:16 -0800 | [diff] [blame] | 733 | 		break; | 
| Catherine Zhang | 877ce7c | 2006-06-29 12:27:47 -0700 | [diff] [blame] | 734 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 735 | 		/* We implement the SO_SNDLOWAT etc to | 
 | 736 | 		   not be settable (1003.1g 5.3) */ | 
| Neil Horman | 3b88578 | 2009-10-12 13:26:31 -0700 | [diff] [blame] | 737 | 	case SO_RXQ_OVFL: | 
 | 738 | 		if (valbool) | 
 | 739 | 			sock_set_flag(sk, SOCK_RXQ_OVFL); | 
 | 740 | 		else | 
 | 741 | 			sock_reset_flag(sk, SOCK_RXQ_OVFL); | 
 | 742 | 		break; | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 743 | 	default: | 
 | 744 | 		ret = -ENOPROTOOPT; | 
 | 745 | 		break; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 746 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 747 | 	release_sock(sk); | 
 | 748 | 	return ret; | 
 | 749 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 750 | EXPORT_SYMBOL(sock_setsockopt); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 751 |  | 
 | 752 |  | 
| Eric W. Biederman | 3f551f9 | 2010-06-13 03:28:59 +0000 | [diff] [blame] | 753 | void cred_to_ucred(struct pid *pid, const struct cred *cred, | 
 | 754 | 		   struct ucred *ucred) | 
 | 755 | { | 
 | 756 | 	ucred->pid = pid_vnr(pid); | 
 | 757 | 	ucred->uid = ucred->gid = -1; | 
 | 758 | 	if (cred) { | 
 | 759 | 		struct user_namespace *current_ns = current_user_ns(); | 
 | 760 |  | 
 | 761 | 		ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid); | 
 | 762 | 		ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid); | 
 | 763 | 	} | 
 | 764 | } | 
| David S. Miller | 3924773 | 2010-06-16 16:18:25 -0700 | [diff] [blame] | 765 | EXPORT_SYMBOL_GPL(cred_to_ucred); | 
| Eric W. Biederman | 3f551f9 | 2010-06-13 03:28:59 +0000 | [diff] [blame] | 766 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 767 | int sock_getsockopt(struct socket *sock, int level, int optname, | 
 | 768 | 		    char __user *optval, int __user *optlen) | 
 | 769 | { | 
 | 770 | 	struct sock *sk = sock->sk; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 771 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 772 | 	union { | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 773 | 		int val; | 
 | 774 | 		struct linger ling; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 775 | 		struct timeval tm; | 
 | 776 | 	} v; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 777 |  | 
| H Hartley Sweeten | 4d0392b | 2010-01-15 01:08:58 -0800 | [diff] [blame] | 778 | 	int lv = sizeof(int); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 779 | 	int len; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 780 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 781 | 	if (get_user(len, optlen)) | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 782 | 		return -EFAULT; | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 783 | 	if (len < 0) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 784 | 		return -EINVAL; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 785 |  | 
| Eugene Teo | 50fee1d | 2009-02-23 15:38:41 -0800 | [diff] [blame] | 786 | 	memset(&v, 0, sizeof(v)); | 
| Clément Lecigne | df0bca0 | 2009-02-12 16:59:09 -0800 | [diff] [blame] | 787 |  | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 788 | 	switch (optname) { | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 789 | 	case SO_DEBUG: | 
 | 790 | 		v.val = sock_flag(sk, SOCK_DBG); | 
 | 791 | 		break; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 792 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 793 | 	case SO_DONTROUTE: | 
 | 794 | 		v.val = sock_flag(sk, SOCK_LOCALROUTE); | 
 | 795 | 		break; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 796 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 797 | 	case SO_BROADCAST: | 
 | 798 | 		v.val = !!sock_flag(sk, SOCK_BROADCAST); | 
 | 799 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 800 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 801 | 	case SO_SNDBUF: | 
 | 802 | 		v.val = sk->sk_sndbuf; | 
 | 803 | 		break; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 804 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 805 | 	case SO_RCVBUF: | 
 | 806 | 		v.val = sk->sk_rcvbuf; | 
 | 807 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 808 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 809 | 	case SO_REUSEADDR: | 
 | 810 | 		v.val = sk->sk_reuse; | 
 | 811 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 812 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 813 | 	case SO_KEEPALIVE: | 
 | 814 | 		v.val = !!sock_flag(sk, SOCK_KEEPOPEN); | 
 | 815 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 816 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 817 | 	case SO_TYPE: | 
 | 818 | 		v.val = sk->sk_type; | 
 | 819 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 820 |  | 
| Jan Engelhardt | 49c794e | 2009-08-04 07:28:28 +0000 | [diff] [blame] | 821 | 	case SO_PROTOCOL: | 
 | 822 | 		v.val = sk->sk_protocol; | 
 | 823 | 		break; | 
 | 824 |  | 
| Jan Engelhardt | 0d6038e | 2009-08-04 07:28:29 +0000 | [diff] [blame] | 825 | 	case SO_DOMAIN: | 
 | 826 | 		v.val = sk->sk_family; | 
 | 827 | 		break; | 
 | 828 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 829 | 	case SO_ERROR: | 
 | 830 | 		v.val = -sock_error(sk); | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 831 | 		if (v.val == 0) | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 832 | 			v.val = xchg(&sk->sk_err_soft, 0); | 
 | 833 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 834 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 835 | 	case SO_OOBINLINE: | 
 | 836 | 		v.val = !!sock_flag(sk, SOCK_URGINLINE); | 
 | 837 | 		break; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 838 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 839 | 	case SO_NO_CHECK: | 
 | 840 | 		v.val = sk->sk_no_check; | 
 | 841 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 842 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 843 | 	case SO_PRIORITY: | 
 | 844 | 		v.val = sk->sk_priority; | 
 | 845 | 		break; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 846 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 847 | 	case SO_LINGER: | 
 | 848 | 		lv		= sizeof(v.ling); | 
 | 849 | 		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER); | 
 | 850 | 		v.ling.l_linger	= sk->sk_lingertime / HZ; | 
 | 851 | 		break; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 852 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 853 | 	case SO_BSDCOMPAT: | 
 | 854 | 		sock_warn_obsolete_bsdism("getsockopt"); | 
 | 855 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 856 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 857 | 	case SO_TIMESTAMP: | 
| Eric Dumazet | 92f37fd | 2007-03-25 22:14:49 -0700 | [diff] [blame] | 858 | 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && | 
 | 859 | 				!sock_flag(sk, SOCK_RCVTSTAMPNS); | 
 | 860 | 		break; | 
 | 861 |  | 
 | 862 | 	case SO_TIMESTAMPNS: | 
 | 863 | 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 864 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 865 |  | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 866 | 	case SO_TIMESTAMPING: | 
 | 867 | 		v.val = 0; | 
 | 868 | 		if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) | 
 | 869 | 			v.val |= SOF_TIMESTAMPING_TX_HARDWARE; | 
 | 870 | 		if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) | 
 | 871 | 			v.val |= SOF_TIMESTAMPING_TX_SOFTWARE; | 
 | 872 | 		if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE)) | 
 | 873 | 			v.val |= SOF_TIMESTAMPING_RX_HARDWARE; | 
 | 874 | 		if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) | 
 | 875 | 			v.val |= SOF_TIMESTAMPING_RX_SOFTWARE; | 
 | 876 | 		if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) | 
 | 877 | 			v.val |= SOF_TIMESTAMPING_SOFTWARE; | 
 | 878 | 		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)) | 
 | 879 | 			v.val |= SOF_TIMESTAMPING_SYS_HARDWARE; | 
 | 880 | 		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) | 
 | 881 | 			v.val |= SOF_TIMESTAMPING_RAW_HARDWARE; | 
 | 882 | 		break; | 
 | 883 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 884 | 	case SO_RCVTIMEO: | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 885 | 		lv = sizeof(struct timeval); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 886 | 		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { | 
 | 887 | 			v.tm.tv_sec = 0; | 
 | 888 | 			v.tm.tv_usec = 0; | 
 | 889 | 		} else { | 
 | 890 | 			v.tm.tv_sec = sk->sk_rcvtimeo / HZ; | 
 | 891 | 			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 892 | 		} | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 893 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 894 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 895 | 	case SO_SNDTIMEO: | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 896 | 		lv = sizeof(struct timeval); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 897 | 		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) { | 
 | 898 | 			v.tm.tv_sec = 0; | 
 | 899 | 			v.tm.tv_usec = 0; | 
 | 900 | 		} else { | 
 | 901 | 			v.tm.tv_sec = sk->sk_sndtimeo / HZ; | 
 | 902 | 			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ; | 
 | 903 | 		} | 
 | 904 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 905 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 906 | 	case SO_RCVLOWAT: | 
 | 907 | 		v.val = sk->sk_rcvlowat; | 
 | 908 | 		break; | 
| Catherine Zhang | 877ce7c | 2006-06-29 12:27:47 -0700 | [diff] [blame] | 909 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 910 | 	case SO_SNDLOWAT: | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 911 | 		v.val = 1; | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 912 | 		break; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 913 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 914 | 	case SO_PASSCRED: | 
 | 915 | 		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0; | 
 | 916 | 		break; | 
 | 917 |  | 
 | 918 | 	case SO_PEERCRED: | 
| Eric W. Biederman | 109f6e3 | 2010-06-13 03:30:14 +0000 | [diff] [blame] | 919 | 	{ | 
 | 920 | 		struct ucred peercred; | 
 | 921 | 		if (len > sizeof(peercred)) | 
 | 922 | 			len = sizeof(peercred); | 
 | 923 | 		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); | 
 | 924 | 		if (copy_to_user(optval, &peercred, len)) | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 925 | 			return -EFAULT; | 
 | 926 | 		goto lenout; | 
| Eric W. Biederman | 109f6e3 | 2010-06-13 03:30:14 +0000 | [diff] [blame] | 927 | 	} | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 928 |  | 
 | 929 | 	case SO_PEERNAME: | 
 | 930 | 	{ | 
 | 931 | 		char address[128]; | 
 | 932 |  | 
 | 933 | 		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2)) | 
 | 934 | 			return -ENOTCONN; | 
 | 935 | 		if (lv < len) | 
 | 936 | 			return -EINVAL; | 
 | 937 | 		if (copy_to_user(optval, address, len)) | 
 | 938 | 			return -EFAULT; | 
 | 939 | 		goto lenout; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 940 | 	} | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 941 |  | 
 | 942 | 	/* Dubious BSD thing... Probably nobody even uses it, but | 
 | 943 | 	 * the UNIX standard wants it for whatever reason... -DaveM | 
 | 944 | 	 */ | 
 | 945 | 	case SO_ACCEPTCONN: | 
 | 946 | 		v.val = sk->sk_state == TCP_LISTEN; | 
 | 947 | 		break; | 
 | 948 |  | 
 | 949 | 	case SO_PASSSEC: | 
 | 950 | 		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0; | 
 | 951 | 		break; | 
 | 952 |  | 
 | 953 | 	case SO_PEERSEC: | 
 | 954 | 		return security_socket_getpeersec_stream(sock, optval, optlen, len); | 
 | 955 |  | 
| Laszlo Attila Toth | 4a19ec5 | 2008-01-30 19:08:16 -0800 | [diff] [blame] | 956 | 	case SO_MARK: | 
 | 957 | 		v.val = sk->sk_mark; | 
 | 958 | 		break; | 
 | 959 |  | 
| Neil Horman | 3b88578 | 2009-10-12 13:26:31 -0700 | [diff] [blame] | 960 | 	case SO_RXQ_OVFL: | 
 | 961 | 		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); | 
 | 962 | 		break; | 
 | 963 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 964 | 	default: | 
 | 965 | 		return -ENOPROTOOPT; | 
 | 966 | 	} | 
 | 967 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 968 | 	if (len > lv) | 
 | 969 | 		len = lv; | 
 | 970 | 	if (copy_to_user(optval, &v, len)) | 
 | 971 | 		return -EFAULT; | 
 | 972 | lenout: | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 973 | 	if (put_user(len, optlen)) | 
 | 974 | 		return -EFAULT; | 
 | 975 | 	return 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 976 | } | 
 | 977 |  | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 978 | /* | 
 | 979 |  * Initialize an sk_lock. | 
 | 980 |  * | 
 | 981 |  * (We also register the sk_lock with the lock validator.) | 
 | 982 |  */ | 
| Dave Jones | b6f99a2 | 2007-03-22 12:27:49 -0700 | [diff] [blame] | 983 | static inline void sock_lock_init(struct sock *sk) | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 984 | { | 
| Peter Zijlstra | ed07536 | 2006-12-06 20:35:24 -0800 | [diff] [blame] | 985 | 	sock_lock_init_class_and_name(sk, | 
 | 986 | 			af_family_slock_key_strings[sk->sk_family], | 
 | 987 | 			af_family_slock_keys + sk->sk_family, | 
 | 988 | 			af_family_key_strings[sk->sk_family], | 
 | 989 | 			af_family_keys + sk->sk_family); | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 990 | } | 
 | 991 |  | 
| Eric Dumazet | 4dc6dc7 | 2009-07-15 23:13:10 +0000 | [diff] [blame] | 992 | /* | 
 | 993 |  * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, | 
 | 994 |  * even temporarly, because of RCU lookups. sk_node should also be left as is. | 
 | 995 |  */ | 
| Pavel Emelyanov | f1a6c4d | 2007-11-01 00:29:45 -0700 | [diff] [blame] | 996 | static void sock_copy(struct sock *nsk, const struct sock *osk) | 
 | 997 | { | 
 | 998 | #ifdef CONFIG_SECURITY_NETWORK | 
 | 999 | 	void *sptr = nsk->sk_security; | 
 | 1000 | #endif | 
| Eric Dumazet | 4dc6dc7 | 2009-07-15 23:13:10 +0000 | [diff] [blame] | 1001 | 	BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != | 
| Krishna Kumar | e022f0b | 2009-10-19 23:46:20 +0000 | [diff] [blame] | 1002 | 		     sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + | 
 | 1003 | 		     sizeof(osk->sk_tx_queue_mapping)); | 
| Eric Dumazet | 4dc6dc7 | 2009-07-15 23:13:10 +0000 | [diff] [blame] | 1004 | 	memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, | 
 | 1005 | 	       osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); | 
| Pavel Emelyanov | f1a6c4d | 2007-11-01 00:29:45 -0700 | [diff] [blame] | 1006 | #ifdef CONFIG_SECURITY_NETWORK | 
 | 1007 | 	nsk->sk_security = sptr; | 
 | 1008 | 	security_sk_clone(osk, nsk); | 
 | 1009 | #endif | 
 | 1010 | } | 
 | 1011 |  | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1012 | static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, | 
 | 1013 | 		int family) | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1014 | { | 
 | 1015 | 	struct sock *sk; | 
 | 1016 | 	struct kmem_cache *slab; | 
 | 1017 |  | 
 | 1018 | 	slab = prot->slab; | 
| Eric Dumazet | e912b11 | 2009-07-08 19:36:05 +0000 | [diff] [blame] | 1019 | 	if (slab != NULL) { | 
 | 1020 | 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); | 
 | 1021 | 		if (!sk) | 
 | 1022 | 			return sk; | 
 | 1023 | 		if (priority & __GFP_ZERO) { | 
 | 1024 | 			/* | 
 | 1025 | 			 * caches using SLAB_DESTROY_BY_RCU should let | 
 | 1026 | 			 * sk_node.next un-modified. Special care is taken | 
 | 1027 | 			 * when initializing object to zero. | 
 | 1028 | 			 */ | 
 | 1029 | 			if (offsetof(struct sock, sk_node.next) != 0) | 
 | 1030 | 				memset(sk, 0, offsetof(struct sock, sk_node.next)); | 
 | 1031 | 			memset(&sk->sk_node.pprev, 0, | 
 | 1032 | 			       prot->obj_size - offsetof(struct sock, | 
 | 1033 | 							 sk_node.pprev)); | 
 | 1034 | 		} | 
 | 1035 | 	} | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1036 | 	else | 
 | 1037 | 		sk = kmalloc(prot->obj_size, priority); | 
 | 1038 |  | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1039 | 	if (sk != NULL) { | 
| Vegard Nossum | a98b65a | 2009-02-26 14:46:57 +0100 | [diff] [blame] | 1040 | 		kmemcheck_annotate_bitfield(sk, flags); | 
 | 1041 |  | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1042 | 		if (security_sk_alloc(sk, family, priority)) | 
 | 1043 | 			goto out_free; | 
 | 1044 |  | 
 | 1045 | 		if (!try_module_get(prot->owner)) | 
 | 1046 | 			goto out_free_sec; | 
| Krishna Kumar | e022f0b | 2009-10-19 23:46:20 +0000 | [diff] [blame] | 1047 | 		sk_tx_queue_clear(sk); | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1048 | 	} | 
 | 1049 |  | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1050 | 	return sk; | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1051 |  | 
 | 1052 | out_free_sec: | 
 | 1053 | 	security_sk_free(sk); | 
 | 1054 | out_free: | 
 | 1055 | 	if (slab != NULL) | 
 | 1056 | 		kmem_cache_free(slab, sk); | 
 | 1057 | 	else | 
 | 1058 | 		kfree(sk); | 
 | 1059 | 	return NULL; | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1060 | } | 
 | 1061 |  | 
 | 1062 | static void sk_prot_free(struct proto *prot, struct sock *sk) | 
 | 1063 | { | 
 | 1064 | 	struct kmem_cache *slab; | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1065 | 	struct module *owner; | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1066 |  | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1067 | 	owner = prot->owner; | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1068 | 	slab = prot->slab; | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1069 |  | 
 | 1070 | 	security_sk_free(sk); | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1071 | 	if (slab != NULL) | 
 | 1072 | 		kmem_cache_free(slab, sk); | 
 | 1073 | 	else | 
 | 1074 | 		kfree(sk); | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1075 | 	module_put(owner); | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1076 | } | 
 | 1077 |  | 
| Herbert Xu | f845172 | 2010-05-24 00:12:34 -0700 | [diff] [blame] | 1078 | #ifdef CONFIG_CGROUPS | 
 | 1079 | void sock_update_classid(struct sock *sk) | 
 | 1080 | { | 
 | 1081 | 	u32 classid = task_cls_classid(current); | 
 | 1082 |  | 
 | 1083 | 	if (classid && classid != sk->sk_classid) | 
 | 1084 | 		sk->sk_classid = classid; | 
 | 1085 | } | 
| Herbert Xu | 8286274 | 2010-05-24 00:14:10 -0700 | [diff] [blame] | 1086 | EXPORT_SYMBOL(sock_update_classid); | 
| Herbert Xu | f845172 | 2010-05-24 00:12:34 -0700 | [diff] [blame] | 1087 | #endif | 
 | 1088 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1089 | /** | 
 | 1090 |  *	sk_alloc - All socket objects are allocated here | 
| Randy Dunlap | c4ea43c | 2007-10-12 21:17:49 -0700 | [diff] [blame] | 1091 |  *	@net: the applicable net namespace | 
| Pavel Pisa | 4dc3b16 | 2005-05-01 08:59:25 -0700 | [diff] [blame] | 1092 |  *	@family: protocol family | 
 | 1093 |  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) | 
 | 1094 |  *	@prot: struct proto associated with this new sock instance | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1095 |  */ | 
| Eric W. Biederman | 1b8d7ae | 2007-10-08 23:24:22 -0700 | [diff] [blame] | 1096 | struct sock *sk_alloc(struct net *net, int family, gfp_t priority, | 
| Pavel Emelyanov | 6257ff2 | 2007-11-01 00:39:31 -0700 | [diff] [blame] | 1097 | 		      struct proto *prot) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1098 | { | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1099 | 	struct sock *sk; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1100 |  | 
| Pavel Emelyanov | 154adbc | 2007-11-01 00:38:43 -0700 | [diff] [blame] | 1101 | 	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1102 | 	if (sk) { | 
| Pavel Emelyanov | 154adbc | 2007-11-01 00:38:43 -0700 | [diff] [blame] | 1103 | 		sk->sk_family = family; | 
 | 1104 | 		/* | 
 | 1105 | 		 * See comment in struct sock definition to understand | 
 | 1106 | 		 * why we need sk_prot_creator -acme | 
 | 1107 | 		 */ | 
 | 1108 | 		sk->sk_prot = sk->sk_prot_creator = prot; | 
 | 1109 | 		sock_lock_init(sk); | 
| YOSHIFUJI Hideaki | 3b1e0a6 | 2008-03-26 02:26:21 +0900 | [diff] [blame] | 1110 | 		sock_net_set(sk, get_net(net)); | 
| Jarek Poplawski | d66ee05 | 2009-08-30 23:15:36 +0000 | [diff] [blame] | 1111 | 		atomic_set(&sk->sk_wmem_alloc, 1); | 
| Herbert Xu | f845172 | 2010-05-24 00:12:34 -0700 | [diff] [blame] | 1112 |  | 
 | 1113 | 		sock_update_classid(sk); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1114 | 	} | 
| Frank Filz | a79af59 | 2005-09-27 15:23:38 -0700 | [diff] [blame] | 1115 |  | 
| Pavel Emelyanov | 2e4afe7 | 2007-11-01 00:36:26 -0700 | [diff] [blame] | 1116 | 	return sk; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1117 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1118 | EXPORT_SYMBOL(sk_alloc); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1119 |  | 
| Eric Dumazet | 2b85a34 | 2009-06-11 02:55:43 -0700 | [diff] [blame] | 1120 | static void __sk_free(struct sock *sk) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1121 | { | 
 | 1122 | 	struct sk_filter *filter; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1123 |  | 
 | 1124 | 	if (sk->sk_destruct) | 
 | 1125 | 		sk->sk_destruct(sk); | 
 | 1126 |  | 
| Paul E. McKenney | a898def | 2010-02-22 17:04:49 -0800 | [diff] [blame] | 1127 | 	filter = rcu_dereference_check(sk->sk_filter, | 
 | 1128 | 				       atomic_read(&sk->sk_wmem_alloc) == 0); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1129 | 	if (filter) { | 
| Pavel Emelyanov | 309dd5f | 2007-10-17 21:21:51 -0700 | [diff] [blame] | 1130 | 		sk_filter_uncharge(sk, filter); | 
| Dmitry Mishin | fda9ef5 | 2006-08-31 15:28:39 -0700 | [diff] [blame] | 1131 | 		rcu_assign_pointer(sk->sk_filter, NULL); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1132 | 	} | 
 | 1133 |  | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 1134 | 	sock_disable_timestamp(sk, SOCK_TIMESTAMP); | 
 | 1135 | 	sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1136 |  | 
 | 1137 | 	if (atomic_read(&sk->sk_omem_alloc)) | 
 | 1138 | 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", | 
| Harvey Harrison | 0dc4787 | 2008-03-05 20:47:47 -0800 | [diff] [blame] | 1139 | 		       __func__, atomic_read(&sk->sk_omem_alloc)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1140 |  | 
| Eric W. Biederman | 109f6e3 | 2010-06-13 03:30:14 +0000 | [diff] [blame] | 1141 | 	if (sk->sk_peer_cred) | 
 | 1142 | 		put_cred(sk->sk_peer_cred); | 
 | 1143 | 	put_pid(sk->sk_peer_pid); | 
| YOSHIFUJI Hideaki | 3b1e0a6 | 2008-03-26 02:26:21 +0900 | [diff] [blame] | 1144 | 	put_net(sock_net(sk)); | 
| Pavel Emelyanov | c308c1b | 2007-11-01 00:33:50 -0700 | [diff] [blame] | 1145 | 	sk_prot_free(sk->sk_prot_creator, sk); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1146 | } | 
| Eric Dumazet | 2b85a34 | 2009-06-11 02:55:43 -0700 | [diff] [blame] | 1147 |  | 
 | 1148 | void sk_free(struct sock *sk) | 
 | 1149 | { | 
 | 1150 | 	/* | 
 | 1151 | 	 * We substract one from sk_wmem_alloc and can know if | 
 | 1152 | 	 * some packets are still in some tx queue. | 
 | 1153 | 	 * If not null, sock_wfree() will call __sk_free(sk) later | 
 | 1154 | 	 */ | 
 | 1155 | 	if (atomic_dec_and_test(&sk->sk_wmem_alloc)) | 
 | 1156 | 		__sk_free(sk); | 
 | 1157 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1158 | EXPORT_SYMBOL(sk_free); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1159 |  | 
| Denis V. Lunev | edf0208 | 2008-02-29 11:18:32 -0800 | [diff] [blame] | 1160 | /* | 
 | 1161 |  * Last sock_put should drop referrence to sk->sk_net. It has already | 
 | 1162 |  * been dropped in sk_change_net. Taking referrence to stopping namespace | 
 | 1163 |  * is not an option. | 
 | 1164 |  * Take referrence to a socket to remove it from hash _alive_ and after that | 
 | 1165 |  * destroy it in the context of init_net. | 
 | 1166 |  */ | 
 | 1167 | void sk_release_kernel(struct sock *sk) | 
 | 1168 | { | 
 | 1169 | 	if (sk == NULL || sk->sk_socket == NULL) | 
 | 1170 | 		return; | 
 | 1171 |  | 
 | 1172 | 	sock_hold(sk); | 
 | 1173 | 	sock_release(sk->sk_socket); | 
| Denis V. Lunev | 65a18ec | 2008-04-16 01:59:46 -0700 | [diff] [blame] | 1174 | 	release_net(sock_net(sk)); | 
| YOSHIFUJI Hideaki | 3b1e0a6 | 2008-03-26 02:26:21 +0900 | [diff] [blame] | 1175 | 	sock_net_set(sk, get_net(&init_net)); | 
| Denis V. Lunev | edf0208 | 2008-02-29 11:18:32 -0800 | [diff] [blame] | 1176 | 	sock_put(sk); | 
 | 1177 | } | 
| David S. Miller | 45af175 | 2008-02-29 11:33:19 -0800 | [diff] [blame] | 1178 | EXPORT_SYMBOL(sk_release_kernel); | 
| Denis V. Lunev | edf0208 | 2008-02-29 11:18:32 -0800 | [diff] [blame] | 1179 |  | 
| Al Viro | dd0fc66 | 2005-10-07 07:46:04 +0100 | [diff] [blame] | 1180 | struct sock *sk_clone(const struct sock *sk, const gfp_t priority) | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1181 | { | 
| Pavel Emelyanov | 8fd1d17 | 2007-11-01 00:37:32 -0700 | [diff] [blame] | 1182 | 	struct sock *newsk; | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1183 |  | 
| Pavel Emelyanov | 8fd1d17 | 2007-11-01 00:37:32 -0700 | [diff] [blame] | 1184 | 	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1185 | 	if (newsk != NULL) { | 
 | 1186 | 		struct sk_filter *filter; | 
 | 1187 |  | 
| Venkat Yekkirala | 892c141 | 2006-08-04 23:08:56 -0700 | [diff] [blame] | 1188 | 		sock_copy(newsk, sk); | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1189 |  | 
 | 1190 | 		/* SANITY */ | 
| YOSHIFUJI Hideaki | 3b1e0a6 | 2008-03-26 02:26:21 +0900 | [diff] [blame] | 1191 | 		get_net(sock_net(newsk)); | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1192 | 		sk_node_init(&newsk->sk_node); | 
 | 1193 | 		sock_lock_init(newsk); | 
 | 1194 | 		bh_lock_sock(newsk); | 
| Eric Dumazet | fa438cc | 2007-03-04 16:05:44 -0800 | [diff] [blame] | 1195 | 		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL; | 
| Zhu Yi | 8eae939 | 2010-03-04 18:01:40 +0000 | [diff] [blame] | 1196 | 		newsk->sk_backlog.len = 0; | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1197 |  | 
 | 1198 | 		atomic_set(&newsk->sk_rmem_alloc, 0); | 
| Eric Dumazet | 2b85a34 | 2009-06-11 02:55:43 -0700 | [diff] [blame] | 1199 | 		/* | 
 | 1200 | 		 * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) | 
 | 1201 | 		 */ | 
 | 1202 | 		atomic_set(&newsk->sk_wmem_alloc, 1); | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1203 | 		atomic_set(&newsk->sk_omem_alloc, 0); | 
 | 1204 | 		skb_queue_head_init(&newsk->sk_receive_queue); | 
 | 1205 | 		skb_queue_head_init(&newsk->sk_write_queue); | 
| Chris Leech | 97fc2f0 | 2006-05-23 17:55:33 -0700 | [diff] [blame] | 1206 | #ifdef CONFIG_NET_DMA | 
 | 1207 | 		skb_queue_head_init(&newsk->sk_async_wait_queue); | 
 | 1208 | #endif | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1209 |  | 
| Eric Dumazet | b6c6712 | 2010-04-08 23:03:29 +0000 | [diff] [blame] | 1210 | 		spin_lock_init(&newsk->sk_dst_lock); | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1211 | 		rwlock_init(&newsk->sk_callback_lock); | 
| Peter Zijlstra | 443aef0 | 2007-07-19 01:49:00 -0700 | [diff] [blame] | 1212 | 		lockdep_set_class_and_name(&newsk->sk_callback_lock, | 
 | 1213 | 				af_callback_keys + newsk->sk_family, | 
 | 1214 | 				af_family_clock_key_strings[newsk->sk_family]); | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1215 |  | 
 | 1216 | 		newsk->sk_dst_cache	= NULL; | 
 | 1217 | 		newsk->sk_wmem_queued	= 0; | 
 | 1218 | 		newsk->sk_forward_alloc = 0; | 
 | 1219 | 		newsk->sk_send_head	= NULL; | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1220 | 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; | 
 | 1221 |  | 
 | 1222 | 		sock_reset_flag(newsk, SOCK_DONE); | 
 | 1223 | 		skb_queue_head_init(&newsk->sk_error_queue); | 
 | 1224 |  | 
 | 1225 | 		filter = newsk->sk_filter; | 
 | 1226 | 		if (filter != NULL) | 
 | 1227 | 			sk_filter_charge(newsk, filter); | 
 | 1228 |  | 
 | 1229 | 		if (unlikely(xfrm_sk_clone_policy(newsk))) { | 
 | 1230 | 			/* It is still raw copy of parent, so invalidate | 
 | 1231 | 			 * destructor and make plain sk_free() */ | 
 | 1232 | 			newsk->sk_destruct = NULL; | 
 | 1233 | 			sk_free(newsk); | 
 | 1234 | 			newsk = NULL; | 
 | 1235 | 			goto out; | 
 | 1236 | 		} | 
 | 1237 |  | 
 | 1238 | 		newsk->sk_err	   = 0; | 
 | 1239 | 		newsk->sk_priority = 0; | 
| Eric Dumazet | 4dc6dc7 | 2009-07-15 23:13:10 +0000 | [diff] [blame] | 1240 | 		/* | 
 | 1241 | 		 * Before updating sk_refcnt, we must commit prior changes to memory | 
 | 1242 | 		 * (Documentation/RCU/rculist_nulls.txt for details) | 
 | 1243 | 		 */ | 
 | 1244 | 		smp_wmb(); | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1245 | 		atomic_set(&newsk->sk_refcnt, 2); | 
 | 1246 |  | 
 | 1247 | 		/* | 
 | 1248 | 		 * Increment the counter in the same struct proto as the master | 
 | 1249 | 		 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that | 
 | 1250 | 		 * is the same as sk->sk_prot->socks, as this field was copied | 
 | 1251 | 		 * with memcpy). | 
 | 1252 | 		 * | 
 | 1253 | 		 * This _changes_ the previous behaviour, where | 
 | 1254 | 		 * tcp_create_openreq_child always was incrementing the | 
 | 1255 | 		 * equivalent to tcp_prot->socks (inet_sock_nr), so this have | 
 | 1256 | 		 * to be taken into account in all callers. -acme | 
 | 1257 | 		 */ | 
 | 1258 | 		sk_refcnt_debug_inc(newsk); | 
| David S. Miller | 972692e | 2008-06-17 22:41:38 -0700 | [diff] [blame] | 1259 | 		sk_set_socket(newsk, NULL); | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1260 | 		newsk->sk_wq = NULL; | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1261 |  | 
 | 1262 | 		if (newsk->sk_prot->sockets_allocated) | 
| Eric Dumazet | 1748376 | 2008-11-25 21:16:35 -0800 | [diff] [blame] | 1263 | 			percpu_counter_inc(newsk->sk_prot->sockets_allocated); | 
| Octavian Purdila | 704da560 | 2010-01-08 00:00:09 -0800 | [diff] [blame] | 1264 |  | 
 | 1265 | 		if (sock_flag(newsk, SOCK_TIMESTAMP) || | 
 | 1266 | 		    sock_flag(newsk, SOCK_TIMESTAMPING_RX_SOFTWARE)) | 
 | 1267 | 			net_enable_timestamp(); | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1268 | 	} | 
 | 1269 | out: | 
 | 1270 | 	return newsk; | 
 | 1271 | } | 
| Arnaldo Carvalho de Melo | 87d11ce | 2005-08-09 20:10:12 -0700 | [diff] [blame] | 1272 | EXPORT_SYMBOL_GPL(sk_clone); | 
 | 1273 |  | 
| Andi Kleen | 9958089 | 2007-04-20 17:12:43 -0700 | [diff] [blame] | 1274 | void sk_setup_caps(struct sock *sk, struct dst_entry *dst) | 
 | 1275 | { | 
 | 1276 | 	__sk_dst_set(sk, dst); | 
 | 1277 | 	sk->sk_route_caps = dst->dev->features; | 
 | 1278 | 	if (sk->sk_route_caps & NETIF_F_GSO) | 
| Herbert Xu | 4fcd6b9 | 2007-05-31 22:15:50 -0700 | [diff] [blame] | 1279 | 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; | 
| Eric Dumazet | a465419 | 2010-05-16 00:36:33 -0700 | [diff] [blame] | 1280 | 	sk->sk_route_caps &= ~sk->sk_route_nocaps; | 
| Andi Kleen | 9958089 | 2007-04-20 17:12:43 -0700 | [diff] [blame] | 1281 | 	if (sk_can_gso(sk)) { | 
| Peter P Waskiewicz Jr | 82cc1a7 | 2008-03-21 03:43:19 -0700 | [diff] [blame] | 1282 | 		if (dst->header_len) { | 
| Andi Kleen | 9958089 | 2007-04-20 17:12:43 -0700 | [diff] [blame] | 1283 | 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK; | 
| Peter P Waskiewicz Jr | 82cc1a7 | 2008-03-21 03:43:19 -0700 | [diff] [blame] | 1284 | 		} else { | 
| Andi Kleen | 9958089 | 2007-04-20 17:12:43 -0700 | [diff] [blame] | 1285 | 			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; | 
| Peter P Waskiewicz Jr | 82cc1a7 | 2008-03-21 03:43:19 -0700 | [diff] [blame] | 1286 | 			sk->sk_gso_max_size = dst->dev->gso_max_size; | 
 | 1287 | 		} | 
| Andi Kleen | 9958089 | 2007-04-20 17:12:43 -0700 | [diff] [blame] | 1288 | 	} | 
 | 1289 | } | 
 | 1290 | EXPORT_SYMBOL_GPL(sk_setup_caps); | 
 | 1291 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1292 | void __init sk_init(void) | 
 | 1293 | { | 
| Jan Beulich | 4481374 | 2009-09-21 17:03:05 -0700 | [diff] [blame] | 1294 | 	if (totalram_pages <= 4096) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1295 | 		sysctl_wmem_max = 32767; | 
 | 1296 | 		sysctl_rmem_max = 32767; | 
 | 1297 | 		sysctl_wmem_default = 32767; | 
 | 1298 | 		sysctl_rmem_default = 32767; | 
| Jan Beulich | 4481374 | 2009-09-21 17:03:05 -0700 | [diff] [blame] | 1299 | 	} else if (totalram_pages >= 131072) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1300 | 		sysctl_wmem_max = 131071; | 
 | 1301 | 		sysctl_rmem_max = 131071; | 
 | 1302 | 	} | 
 | 1303 | } | 
 | 1304 |  | 
 | 1305 | /* | 
 | 1306 |  *	Simple resource managers for sockets. | 
 | 1307 |  */ | 
 | 1308 |  | 
 | 1309 |  | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1310 | /* | 
 | 1311 |  * Write buffer destructor automatically called from kfree_skb. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1312 |  */ | 
 | 1313 | void sock_wfree(struct sk_buff *skb) | 
 | 1314 | { | 
 | 1315 | 	struct sock *sk = skb->sk; | 
| Eric Dumazet | d99927f | 2009-09-24 10:49:24 +0000 | [diff] [blame] | 1316 | 	unsigned int len = skb->truesize; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1317 |  | 
| Eric Dumazet | d99927f | 2009-09-24 10:49:24 +0000 | [diff] [blame] | 1318 | 	if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { | 
 | 1319 | 		/* | 
 | 1320 | 		 * Keep a reference on sk_wmem_alloc, this will be released | 
 | 1321 | 		 * after sk_write_space() call | 
 | 1322 | 		 */ | 
 | 1323 | 		atomic_sub(len - 1, &sk->sk_wmem_alloc); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1324 | 		sk->sk_write_space(sk); | 
| Eric Dumazet | d99927f | 2009-09-24 10:49:24 +0000 | [diff] [blame] | 1325 | 		len = 1; | 
 | 1326 | 	} | 
| Eric Dumazet | 2b85a34 | 2009-06-11 02:55:43 -0700 | [diff] [blame] | 1327 | 	/* | 
| Eric Dumazet | d99927f | 2009-09-24 10:49:24 +0000 | [diff] [blame] | 1328 | 	 * if sk_wmem_alloc reaches 0, we must finish what sk_free() | 
 | 1329 | 	 * could not do because of in-flight packets | 
| Eric Dumazet | 2b85a34 | 2009-06-11 02:55:43 -0700 | [diff] [blame] | 1330 | 	 */ | 
| Eric Dumazet | d99927f | 2009-09-24 10:49:24 +0000 | [diff] [blame] | 1331 | 	if (atomic_sub_and_test(len, &sk->sk_wmem_alloc)) | 
| Eric Dumazet | 2b85a34 | 2009-06-11 02:55:43 -0700 | [diff] [blame] | 1332 | 		__sk_free(sk); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1333 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1334 | EXPORT_SYMBOL(sock_wfree); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1335 |  | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1336 | /* | 
 | 1337 |  * Read buffer destructor automatically called from kfree_skb. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1338 |  */ | 
 | 1339 | void sock_rfree(struct sk_buff *skb) | 
 | 1340 | { | 
 | 1341 | 	struct sock *sk = skb->sk; | 
 | 1342 |  | 
 | 1343 | 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc); | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 1344 | 	sk_mem_uncharge(skb->sk, skb->truesize); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1345 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1346 | EXPORT_SYMBOL(sock_rfree); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1347 |  | 
 | 1348 |  | 
 | 1349 | int sock_i_uid(struct sock *sk) | 
 | 1350 | { | 
 | 1351 | 	int uid; | 
 | 1352 |  | 
 | 1353 | 	read_lock(&sk->sk_callback_lock); | 
 | 1354 | 	uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; | 
 | 1355 | 	read_unlock(&sk->sk_callback_lock); | 
 | 1356 | 	return uid; | 
 | 1357 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1358 | EXPORT_SYMBOL(sock_i_uid); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1359 |  | 
 | 1360 | unsigned long sock_i_ino(struct sock *sk) | 
 | 1361 | { | 
 | 1362 | 	unsigned long ino; | 
 | 1363 |  | 
 | 1364 | 	read_lock(&sk->sk_callback_lock); | 
 | 1365 | 	ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; | 
 | 1366 | 	read_unlock(&sk->sk_callback_lock); | 
 | 1367 | 	return ino; | 
 | 1368 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1369 | EXPORT_SYMBOL(sock_i_ino); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1370 |  | 
 | 1371 | /* | 
 | 1372 |  * Allocate a skb from the socket's send buffer. | 
 | 1373 |  */ | 
| Victor Fusco | 86a76ca | 2005-07-08 14:57:47 -0700 | [diff] [blame] | 1374 | struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, | 
| Al Viro | dd0fc66 | 2005-10-07 07:46:04 +0100 | [diff] [blame] | 1375 | 			     gfp_t priority) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1376 | { | 
 | 1377 | 	if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1378 | 		struct sk_buff *skb = alloc_skb(size, priority); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1379 | 		if (skb) { | 
 | 1380 | 			skb_set_owner_w(skb, sk); | 
 | 1381 | 			return skb; | 
 | 1382 | 		} | 
 | 1383 | 	} | 
 | 1384 | 	return NULL; | 
 | 1385 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1386 | EXPORT_SYMBOL(sock_wmalloc); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1387 |  | 
 | 1388 | /* | 
 | 1389 |  * Allocate a skb from the socket's receive buffer. | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1390 |  */ | 
| Victor Fusco | 86a76ca | 2005-07-08 14:57:47 -0700 | [diff] [blame] | 1391 | struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, | 
| Al Viro | dd0fc66 | 2005-10-07 07:46:04 +0100 | [diff] [blame] | 1392 | 			     gfp_t priority) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1393 | { | 
 | 1394 | 	if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { | 
 | 1395 | 		struct sk_buff *skb = alloc_skb(size, priority); | 
 | 1396 | 		if (skb) { | 
 | 1397 | 			skb_set_owner_r(skb, sk); | 
 | 1398 | 			return skb; | 
 | 1399 | 		} | 
 | 1400 | 	} | 
 | 1401 | 	return NULL; | 
 | 1402 | } | 
 | 1403 |  | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1404 | /* | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1405 |  * Allocate a memory block from the socket's option memory buffer. | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1406 |  */ | 
| Al Viro | dd0fc66 | 2005-10-07 07:46:04 +0100 | [diff] [blame] | 1407 | void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1408 | { | 
 | 1409 | 	if ((unsigned)size <= sysctl_optmem_max && | 
 | 1410 | 	    atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { | 
 | 1411 | 		void *mem; | 
 | 1412 | 		/* First do the add, to avoid the race if kmalloc | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1413 | 		 * might sleep. | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1414 | 		 */ | 
 | 1415 | 		atomic_add(size, &sk->sk_omem_alloc); | 
 | 1416 | 		mem = kmalloc(size, priority); | 
 | 1417 | 		if (mem) | 
 | 1418 | 			return mem; | 
 | 1419 | 		atomic_sub(size, &sk->sk_omem_alloc); | 
 | 1420 | 	} | 
 | 1421 | 	return NULL; | 
 | 1422 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1423 | EXPORT_SYMBOL(sock_kmalloc); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1424 |  | 
 | 1425 | /* | 
 | 1426 |  * Free an option memory block. | 
 | 1427 |  */ | 
 | 1428 | void sock_kfree_s(struct sock *sk, void *mem, int size) | 
 | 1429 | { | 
 | 1430 | 	kfree(mem); | 
 | 1431 | 	atomic_sub(size, &sk->sk_omem_alloc); | 
 | 1432 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1433 | EXPORT_SYMBOL(sock_kfree_s); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1434 |  | 
 | 1435 | /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. | 
 | 1436 |    I think, these locks should be removed for datagram sockets. | 
 | 1437 |  */ | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1438 | static long sock_wait_for_wmem(struct sock *sk, long timeo) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1439 | { | 
 | 1440 | 	DEFINE_WAIT(wait); | 
 | 1441 |  | 
 | 1442 | 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 
 | 1443 | 	for (;;) { | 
 | 1444 | 		if (!timeo) | 
 | 1445 | 			break; | 
 | 1446 | 		if (signal_pending(current)) | 
 | 1447 | 			break; | 
 | 1448 | 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 
| Eric Dumazet | aa39514 | 2010-04-20 13:03:51 +0000 | [diff] [blame] | 1449 | 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1450 | 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) | 
 | 1451 | 			break; | 
 | 1452 | 		if (sk->sk_shutdown & SEND_SHUTDOWN) | 
 | 1453 | 			break; | 
 | 1454 | 		if (sk->sk_err) | 
 | 1455 | 			break; | 
 | 1456 | 		timeo = schedule_timeout(timeo); | 
 | 1457 | 	} | 
| Eric Dumazet | aa39514 | 2010-04-20 13:03:51 +0000 | [diff] [blame] | 1458 | 	finish_wait(sk_sleep(sk), &wait); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1459 | 	return timeo; | 
 | 1460 | } | 
 | 1461 |  | 
 | 1462 |  | 
 | 1463 | /* | 
 | 1464 |  *	Generic send/receive buffer handlers | 
 | 1465 |  */ | 
 | 1466 |  | 
| Herbert Xu | 4cc7f68 | 2009-02-04 16:55:54 -0800 | [diff] [blame] | 1467 | struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, | 
 | 1468 | 				     unsigned long data_len, int noblock, | 
 | 1469 | 				     int *errcode) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1470 | { | 
 | 1471 | 	struct sk_buff *skb; | 
| Al Viro | 7d877f3 | 2005-10-21 03:20:43 -0400 | [diff] [blame] | 1472 | 	gfp_t gfp_mask; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1473 | 	long timeo; | 
 | 1474 | 	int err; | 
 | 1475 |  | 
 | 1476 | 	gfp_mask = sk->sk_allocation; | 
 | 1477 | 	if (gfp_mask & __GFP_WAIT) | 
 | 1478 | 		gfp_mask |= __GFP_REPEAT; | 
 | 1479 |  | 
 | 1480 | 	timeo = sock_sndtimeo(sk, noblock); | 
 | 1481 | 	while (1) { | 
 | 1482 | 		err = sock_error(sk); | 
 | 1483 | 		if (err != 0) | 
 | 1484 | 			goto failure; | 
 | 1485 |  | 
 | 1486 | 		err = -EPIPE; | 
 | 1487 | 		if (sk->sk_shutdown & SEND_SHUTDOWN) | 
 | 1488 | 			goto failure; | 
 | 1489 |  | 
 | 1490 | 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { | 
| Larry Woodman | db38c179 | 2006-11-03 16:05:45 -0800 | [diff] [blame] | 1491 | 			skb = alloc_skb(header_len, gfp_mask); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1492 | 			if (skb) { | 
 | 1493 | 				int npages; | 
 | 1494 | 				int i; | 
 | 1495 |  | 
 | 1496 | 				/* No pages, we're done... */ | 
 | 1497 | 				if (!data_len) | 
 | 1498 | 					break; | 
 | 1499 |  | 
 | 1500 | 				npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; | 
 | 1501 | 				skb->truesize += data_len; | 
 | 1502 | 				skb_shinfo(skb)->nr_frags = npages; | 
 | 1503 | 				for (i = 0; i < npages; i++) { | 
 | 1504 | 					struct page *page; | 
 | 1505 | 					skb_frag_t *frag; | 
 | 1506 |  | 
 | 1507 | 					page = alloc_pages(sk->sk_allocation, 0); | 
 | 1508 | 					if (!page) { | 
 | 1509 | 						err = -ENOBUFS; | 
 | 1510 | 						skb_shinfo(skb)->nr_frags = i; | 
 | 1511 | 						kfree_skb(skb); | 
 | 1512 | 						goto failure; | 
 | 1513 | 					} | 
 | 1514 |  | 
 | 1515 | 					frag = &skb_shinfo(skb)->frags[i]; | 
 | 1516 | 					frag->page = page; | 
 | 1517 | 					frag->page_offset = 0; | 
 | 1518 | 					frag->size = (data_len >= PAGE_SIZE ? | 
 | 1519 | 						      PAGE_SIZE : | 
 | 1520 | 						      data_len); | 
 | 1521 | 					data_len -= PAGE_SIZE; | 
 | 1522 | 				} | 
 | 1523 |  | 
 | 1524 | 				/* Full success... */ | 
 | 1525 | 				break; | 
 | 1526 | 			} | 
 | 1527 | 			err = -ENOBUFS; | 
 | 1528 | 			goto failure; | 
 | 1529 | 		} | 
 | 1530 | 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 
 | 1531 | 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | 
 | 1532 | 		err = -EAGAIN; | 
 | 1533 | 		if (!timeo) | 
 | 1534 | 			goto failure; | 
 | 1535 | 		if (signal_pending(current)) | 
 | 1536 | 			goto interrupted; | 
 | 1537 | 		timeo = sock_wait_for_wmem(sk, timeo); | 
 | 1538 | 	} | 
 | 1539 |  | 
 | 1540 | 	skb_set_owner_w(skb, sk); | 
 | 1541 | 	return skb; | 
 | 1542 |  | 
 | 1543 | interrupted: | 
 | 1544 | 	err = sock_intr_errno(timeo); | 
 | 1545 | failure: | 
 | 1546 | 	*errcode = err; | 
 | 1547 | 	return NULL; | 
 | 1548 | } | 
| Herbert Xu | 4cc7f68 | 2009-02-04 16:55:54 -0800 | [diff] [blame] | 1549 | EXPORT_SYMBOL(sock_alloc_send_pskb); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1550 |  | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1551 | struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1552 | 				    int noblock, int *errcode) | 
 | 1553 | { | 
 | 1554 | 	return sock_alloc_send_pskb(sk, size, 0, noblock, errcode); | 
 | 1555 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1556 | EXPORT_SYMBOL(sock_alloc_send_skb); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1557 |  | 
 | 1558 | static void __lock_sock(struct sock *sk) | 
 | 1559 | { | 
 | 1560 | 	DEFINE_WAIT(wait); | 
 | 1561 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 1562 | 	for (;;) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1563 | 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, | 
 | 1564 | 					TASK_UNINTERRUPTIBLE); | 
 | 1565 | 		spin_unlock_bh(&sk->sk_lock.slock); | 
 | 1566 | 		schedule(); | 
 | 1567 | 		spin_lock_bh(&sk->sk_lock.slock); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 1568 | 		if (!sock_owned_by_user(sk)) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1569 | 			break; | 
 | 1570 | 	} | 
 | 1571 | 	finish_wait(&sk->sk_lock.wq, &wait); | 
 | 1572 | } | 
 | 1573 |  | 
 | 1574 | static void __release_sock(struct sock *sk) | 
 | 1575 | { | 
 | 1576 | 	struct sk_buff *skb = sk->sk_backlog.head; | 
 | 1577 |  | 
 | 1578 | 	do { | 
 | 1579 | 		sk->sk_backlog.head = sk->sk_backlog.tail = NULL; | 
 | 1580 | 		bh_unlock_sock(sk); | 
 | 1581 |  | 
 | 1582 | 		do { | 
 | 1583 | 			struct sk_buff *next = skb->next; | 
 | 1584 |  | 
| Eric Dumazet | 7fee226 | 2010-05-11 23:19:48 +0000 | [diff] [blame] | 1585 | 			WARN_ON_ONCE(skb_dst_is_noref(skb)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1586 | 			skb->next = NULL; | 
| Peter Zijlstra | c57943a | 2008-10-07 14:18:42 -0700 | [diff] [blame] | 1587 | 			sk_backlog_rcv(sk, skb); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1588 |  | 
 | 1589 | 			/* | 
 | 1590 | 			 * We are in process context here with softirqs | 
 | 1591 | 			 * disabled, use cond_resched_softirq() to preempt. | 
 | 1592 | 			 * This is safe to do because we've taken the backlog | 
 | 1593 | 			 * queue private: | 
 | 1594 | 			 */ | 
 | 1595 | 			cond_resched_softirq(); | 
 | 1596 |  | 
 | 1597 | 			skb = next; | 
 | 1598 | 		} while (skb != NULL); | 
 | 1599 |  | 
 | 1600 | 		bh_lock_sock(sk); | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 1601 | 	} while ((skb = sk->sk_backlog.head) != NULL); | 
| Zhu Yi | 8eae939 | 2010-03-04 18:01:40 +0000 | [diff] [blame] | 1602 |  | 
 | 1603 | 	/* | 
 | 1604 | 	 * Doing the zeroing here guarantee we can not loop forever | 
 | 1605 | 	 * while a wild producer attempts to flood us. | 
 | 1606 | 	 */ | 
 | 1607 | 	sk->sk_backlog.len = 0; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1608 | } | 
 | 1609 |  | 
 | 1610 | /** | 
 | 1611 |  * sk_wait_data - wait for data to arrive at sk_receive_queue | 
| Pavel Pisa | 4dc3b16 | 2005-05-01 08:59:25 -0700 | [diff] [blame] | 1612 |  * @sk:    sock to wait on | 
 | 1613 |  * @timeo: for how long | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1614 |  * | 
 | 1615 |  * Now socket state including sk->sk_err is changed only under lock, | 
 | 1616 |  * hence we may omit checks after joining wait queue. | 
 | 1617 |  * We check receive queue before schedule() only as optimization; | 
 | 1618 |  * it is very likely that release_sock() added new data. | 
 | 1619 |  */ | 
 | 1620 | int sk_wait_data(struct sock *sk, long *timeo) | 
 | 1621 | { | 
 | 1622 | 	int rc; | 
 | 1623 | 	DEFINE_WAIT(wait); | 
 | 1624 |  | 
| Eric Dumazet | aa39514 | 2010-04-20 13:03:51 +0000 | [diff] [blame] | 1625 | 	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1626 | 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | 
 | 1627 | 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); | 
 | 1628 | 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); | 
| Eric Dumazet | aa39514 | 2010-04-20 13:03:51 +0000 | [diff] [blame] | 1629 | 	finish_wait(sk_sleep(sk), &wait); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1630 | 	return rc; | 
 | 1631 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1632 | EXPORT_SYMBOL(sk_wait_data); | 
 | 1633 |  | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 1634 | /** | 
 | 1635 |  *	__sk_mem_schedule - increase sk_forward_alloc and memory_allocated | 
 | 1636 |  *	@sk: socket | 
 | 1637 |  *	@size: memory size to allocate | 
 | 1638 |  *	@kind: allocation type | 
 | 1639 |  * | 
 | 1640 |  *	If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means | 
 | 1641 |  *	rmem allocation. This function assumes that protocols which have | 
 | 1642 |  *	memory_pressure use sk_wmem_queued as write buffer accounting. | 
 | 1643 |  */ | 
 | 1644 | int __sk_mem_schedule(struct sock *sk, int size, int kind) | 
 | 1645 | { | 
 | 1646 | 	struct proto *prot = sk->sk_prot; | 
 | 1647 | 	int amt = sk_mem_pages(size); | 
 | 1648 | 	int allocated; | 
 | 1649 |  | 
 | 1650 | 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; | 
 | 1651 | 	allocated = atomic_add_return(amt, prot->memory_allocated); | 
 | 1652 |  | 
 | 1653 | 	/* Under limit. */ | 
 | 1654 | 	if (allocated <= prot->sysctl_mem[0]) { | 
 | 1655 | 		if (prot->memory_pressure && *prot->memory_pressure) | 
 | 1656 | 			*prot->memory_pressure = 0; | 
 | 1657 | 		return 1; | 
 | 1658 | 	} | 
 | 1659 |  | 
 | 1660 | 	/* Under pressure. */ | 
 | 1661 | 	if (allocated > prot->sysctl_mem[1]) | 
 | 1662 | 		if (prot->enter_memory_pressure) | 
| Pavel Emelyanov | 5c52ba1 | 2008-07-16 20:28:10 -0700 | [diff] [blame] | 1663 | 			prot->enter_memory_pressure(sk); | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 1664 |  | 
 | 1665 | 	/* Over hard limit. */ | 
 | 1666 | 	if (allocated > prot->sysctl_mem[2]) | 
 | 1667 | 		goto suppress_allocation; | 
 | 1668 |  | 
 | 1669 | 	/* guarantee minimum buffer size under pressure */ | 
 | 1670 | 	if (kind == SK_MEM_RECV) { | 
 | 1671 | 		if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) | 
 | 1672 | 			return 1; | 
 | 1673 | 	} else { /* SK_MEM_SEND */ | 
 | 1674 | 		if (sk->sk_type == SOCK_STREAM) { | 
 | 1675 | 			if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) | 
 | 1676 | 				return 1; | 
 | 1677 | 		} else if (atomic_read(&sk->sk_wmem_alloc) < | 
 | 1678 | 			   prot->sysctl_wmem[0]) | 
 | 1679 | 				return 1; | 
 | 1680 | 	} | 
 | 1681 |  | 
 | 1682 | 	if (prot->memory_pressure) { | 
| Eric Dumazet | 1748376 | 2008-11-25 21:16:35 -0800 | [diff] [blame] | 1683 | 		int alloc; | 
 | 1684 |  | 
 | 1685 | 		if (!*prot->memory_pressure) | 
 | 1686 | 			return 1; | 
 | 1687 | 		alloc = percpu_counter_read_positive(prot->sockets_allocated); | 
 | 1688 | 		if (prot->sysctl_mem[2] > alloc * | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 1689 | 		    sk_mem_pages(sk->sk_wmem_queued + | 
 | 1690 | 				 atomic_read(&sk->sk_rmem_alloc) + | 
 | 1691 | 				 sk->sk_forward_alloc)) | 
 | 1692 | 			return 1; | 
 | 1693 | 	} | 
 | 1694 |  | 
 | 1695 | suppress_allocation: | 
 | 1696 |  | 
 | 1697 | 	if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { | 
 | 1698 | 		sk_stream_moderate_sndbuf(sk); | 
 | 1699 |  | 
 | 1700 | 		/* Fail only if socket is _under_ its sndbuf. | 
 | 1701 | 		 * In this case we cannot block, so that we have to fail. | 
 | 1702 | 		 */ | 
 | 1703 | 		if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) | 
 | 1704 | 			return 1; | 
 | 1705 | 	} | 
 | 1706 |  | 
 | 1707 | 	/* Alas. Undo changes. */ | 
 | 1708 | 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; | 
 | 1709 | 	atomic_sub(amt, prot->memory_allocated); | 
 | 1710 | 	return 0; | 
 | 1711 | } | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 1712 | EXPORT_SYMBOL(__sk_mem_schedule); | 
 | 1713 |  | 
 | 1714 | /** | 
 | 1715 |  *	__sk_reclaim - reclaim memory_allocated | 
 | 1716 |  *	@sk: socket | 
 | 1717 |  */ | 
 | 1718 | void __sk_mem_reclaim(struct sock *sk) | 
 | 1719 | { | 
 | 1720 | 	struct proto *prot = sk->sk_prot; | 
 | 1721 |  | 
| Eric Dumazet | 680a5a5 | 2007-12-31 15:00:50 -0800 | [diff] [blame] | 1722 | 	atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 1723 | 		   prot->memory_allocated); | 
 | 1724 | 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; | 
 | 1725 |  | 
 | 1726 | 	if (prot->memory_pressure && *prot->memory_pressure && | 
 | 1727 | 	    (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) | 
 | 1728 | 		*prot->memory_pressure = 0; | 
 | 1729 | } | 
| Hideo Aoki | 3ab224b | 2007-12-31 00:11:19 -0800 | [diff] [blame] | 1730 | EXPORT_SYMBOL(__sk_mem_reclaim); | 
 | 1731 |  | 
 | 1732 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1733 | /* | 
 | 1734 |  * Set of default routines for initialising struct proto_ops when | 
 | 1735 |  * the protocol does not support a particular function. In certain | 
 | 1736 |  * cases where it makes no sense for a protocol to have a "do nothing" | 
 | 1737 |  * function, some default processing is provided. | 
 | 1738 |  */ | 
 | 1739 |  | 
 | 1740 | int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) | 
 | 1741 | { | 
 | 1742 | 	return -EOPNOTSUPP; | 
 | 1743 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1744 | EXPORT_SYMBOL(sock_no_bind); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1745 |  | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1746 | int sock_no_connect(struct socket *sock, struct sockaddr *saddr, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1747 | 		    int len, int flags) | 
 | 1748 | { | 
 | 1749 | 	return -EOPNOTSUPP; | 
 | 1750 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1751 | EXPORT_SYMBOL(sock_no_connect); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1752 |  | 
 | 1753 | int sock_no_socketpair(struct socket *sock1, struct socket *sock2) | 
 | 1754 | { | 
 | 1755 | 	return -EOPNOTSUPP; | 
 | 1756 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1757 | EXPORT_SYMBOL(sock_no_socketpair); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1758 |  | 
 | 1759 | int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) | 
 | 1760 | { | 
 | 1761 | 	return -EOPNOTSUPP; | 
 | 1762 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1763 | EXPORT_SYMBOL(sock_no_accept); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1764 |  | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1765 | int sock_no_getname(struct socket *sock, struct sockaddr *saddr, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1766 | 		    int *len, int peer) | 
 | 1767 | { | 
 | 1768 | 	return -EOPNOTSUPP; | 
 | 1769 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1770 | EXPORT_SYMBOL(sock_no_getname); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1771 |  | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1772 | unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1773 | { | 
 | 1774 | 	return 0; | 
 | 1775 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1776 | EXPORT_SYMBOL(sock_no_poll); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1777 |  | 
 | 1778 | int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | 
 | 1779 | { | 
 | 1780 | 	return -EOPNOTSUPP; | 
 | 1781 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1782 | EXPORT_SYMBOL(sock_no_ioctl); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1783 |  | 
 | 1784 | int sock_no_listen(struct socket *sock, int backlog) | 
 | 1785 | { | 
 | 1786 | 	return -EOPNOTSUPP; | 
 | 1787 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1788 | EXPORT_SYMBOL(sock_no_listen); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1789 |  | 
 | 1790 | int sock_no_shutdown(struct socket *sock, int how) | 
 | 1791 | { | 
 | 1792 | 	return -EOPNOTSUPP; | 
 | 1793 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1794 | EXPORT_SYMBOL(sock_no_shutdown); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1795 |  | 
 | 1796 | int sock_no_setsockopt(struct socket *sock, int level, int optname, | 
| David S. Miller | b705884 | 2009-09-30 16:12:20 -0700 | [diff] [blame] | 1797 | 		    char __user *optval, unsigned int optlen) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1798 | { | 
 | 1799 | 	return -EOPNOTSUPP; | 
 | 1800 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1801 | EXPORT_SYMBOL(sock_no_setsockopt); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1802 |  | 
 | 1803 | int sock_no_getsockopt(struct socket *sock, int level, int optname, | 
 | 1804 | 		    char __user *optval, int __user *optlen) | 
 | 1805 | { | 
 | 1806 | 	return -EOPNOTSUPP; | 
 | 1807 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1808 | EXPORT_SYMBOL(sock_no_getsockopt); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1809 |  | 
 | 1810 | int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, | 
 | 1811 | 		    size_t len) | 
 | 1812 | { | 
 | 1813 | 	return -EOPNOTSUPP; | 
 | 1814 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1815 | EXPORT_SYMBOL(sock_no_sendmsg); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1816 |  | 
 | 1817 | int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, | 
 | 1818 | 		    size_t len, int flags) | 
 | 1819 | { | 
 | 1820 | 	return -EOPNOTSUPP; | 
 | 1821 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1822 | EXPORT_SYMBOL(sock_no_recvmsg); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1823 |  | 
 | 1824 | int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) | 
 | 1825 | { | 
 | 1826 | 	/* Mirror missing mmap method error code */ | 
 | 1827 | 	return -ENODEV; | 
 | 1828 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1829 | EXPORT_SYMBOL(sock_no_mmap); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1830 |  | 
 | 1831 | ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) | 
 | 1832 | { | 
 | 1833 | 	ssize_t res; | 
 | 1834 | 	struct msghdr msg = {.msg_flags = flags}; | 
 | 1835 | 	struct kvec iov; | 
 | 1836 | 	char *kaddr = kmap(page); | 
 | 1837 | 	iov.iov_base = kaddr + offset; | 
 | 1838 | 	iov.iov_len = size; | 
 | 1839 | 	res = kernel_sendmsg(sock, &msg, &iov, 1, size); | 
 | 1840 | 	kunmap(page); | 
 | 1841 | 	return res; | 
 | 1842 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1843 | EXPORT_SYMBOL(sock_no_sendpage); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1844 |  | 
 | 1845 | /* | 
 | 1846 |  *	Default Socket Callbacks | 
 | 1847 |  */ | 
 | 1848 |  | 
 | 1849 | static void sock_def_wakeup(struct sock *sk) | 
 | 1850 | { | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1851 | 	struct socket_wq *wq; | 
 | 1852 |  | 
 | 1853 | 	rcu_read_lock(); | 
 | 1854 | 	wq = rcu_dereference(sk->sk_wq); | 
 | 1855 | 	if (wq_has_sleeper(wq)) | 
 | 1856 | 		wake_up_interruptible_all(&wq->wait); | 
 | 1857 | 	rcu_read_unlock(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1858 | } | 
 | 1859 |  | 
 | 1860 | static void sock_def_error_report(struct sock *sk) | 
 | 1861 | { | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1862 | 	struct socket_wq *wq; | 
 | 1863 |  | 
 | 1864 | 	rcu_read_lock(); | 
 | 1865 | 	wq = rcu_dereference(sk->sk_wq); | 
 | 1866 | 	if (wq_has_sleeper(wq)) | 
 | 1867 | 		wake_up_interruptible_poll(&wq->wait, POLLERR); | 
| Pavel Emelyanov | 8d8ad9d | 2007-11-26 20:10:50 +0800 | [diff] [blame] | 1868 | 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1869 | 	rcu_read_unlock(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1870 | } | 
 | 1871 |  | 
 | 1872 | static void sock_def_readable(struct sock *sk, int len) | 
 | 1873 | { | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1874 | 	struct socket_wq *wq; | 
 | 1875 |  | 
 | 1876 | 	rcu_read_lock(); | 
 | 1877 | 	wq = rcu_dereference(sk->sk_wq); | 
 | 1878 | 	if (wq_has_sleeper(wq)) | 
 | 1879 | 		wake_up_interruptible_sync_poll(&wq->wait, POLLIN | | 
| Davide Libenzi | 37e5540 | 2009-03-31 15:24:21 -0700 | [diff] [blame] | 1880 | 						POLLRDNORM | POLLRDBAND); | 
| Pavel Emelyanov | 8d8ad9d | 2007-11-26 20:10:50 +0800 | [diff] [blame] | 1881 | 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1882 | 	rcu_read_unlock(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1883 | } | 
 | 1884 |  | 
 | 1885 | static void sock_def_write_space(struct sock *sk) | 
 | 1886 | { | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1887 | 	struct socket_wq *wq; | 
 | 1888 |  | 
 | 1889 | 	rcu_read_lock(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1890 |  | 
 | 1891 | 	/* Do not wake up a writer until he can make "significant" | 
 | 1892 | 	 * progress.  --DaveM | 
 | 1893 | 	 */ | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 1894 | 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1895 | 		wq = rcu_dereference(sk->sk_wq); | 
 | 1896 | 		if (wq_has_sleeper(wq)) | 
 | 1897 | 			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | | 
| Davide Libenzi | 37e5540 | 2009-03-31 15:24:21 -0700 | [diff] [blame] | 1898 | 						POLLWRNORM | POLLWRBAND); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1899 |  | 
 | 1900 | 		/* Should agree with poll, otherwise some programs break */ | 
 | 1901 | 		if (sock_writeable(sk)) | 
| Pavel Emelyanov | 8d8ad9d | 2007-11-26 20:10:50 +0800 | [diff] [blame] | 1902 | 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1903 | 	} | 
 | 1904 |  | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1905 | 	rcu_read_unlock(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1906 | } | 
 | 1907 |  | 
 | 1908 | static void sock_def_destruct(struct sock *sk) | 
 | 1909 | { | 
| Jesper Juhl | a51482b | 2005-11-08 09:41:34 -0800 | [diff] [blame] | 1910 | 	kfree(sk->sk_protinfo); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1911 | } | 
 | 1912 |  | 
 | 1913 | void sk_send_sigurg(struct sock *sk) | 
 | 1914 | { | 
 | 1915 | 	if (sk->sk_socket && sk->sk_socket->file) | 
 | 1916 | 		if (send_sigurg(&sk->sk_socket->file->f_owner)) | 
| Pavel Emelyanov | 8d8ad9d | 2007-11-26 20:10:50 +0800 | [diff] [blame] | 1917 | 			sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1918 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1919 | EXPORT_SYMBOL(sk_send_sigurg); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1920 |  | 
 | 1921 | void sk_reset_timer(struct sock *sk, struct timer_list* timer, | 
 | 1922 | 		    unsigned long expires) | 
 | 1923 | { | 
 | 1924 | 	if (!mod_timer(timer, expires)) | 
 | 1925 | 		sock_hold(sk); | 
 | 1926 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1927 | EXPORT_SYMBOL(sk_reset_timer); | 
 | 1928 |  | 
 | 1929 | void sk_stop_timer(struct sock *sk, struct timer_list* timer) | 
 | 1930 | { | 
 | 1931 | 	if (timer_pending(timer) && del_timer(timer)) | 
 | 1932 | 		__sock_put(sk); | 
 | 1933 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1934 | EXPORT_SYMBOL(sk_stop_timer); | 
 | 1935 |  | 
 | 1936 | void sock_init_data(struct socket *sock, struct sock *sk) | 
 | 1937 | { | 
 | 1938 | 	skb_queue_head_init(&sk->sk_receive_queue); | 
 | 1939 | 	skb_queue_head_init(&sk->sk_write_queue); | 
 | 1940 | 	skb_queue_head_init(&sk->sk_error_queue); | 
| Chris Leech | 97fc2f0 | 2006-05-23 17:55:33 -0700 | [diff] [blame] | 1941 | #ifdef CONFIG_NET_DMA | 
 | 1942 | 	skb_queue_head_init(&sk->sk_async_wait_queue); | 
 | 1943 | #endif | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1944 |  | 
 | 1945 | 	sk->sk_send_head	=	NULL; | 
 | 1946 |  | 
 | 1947 | 	init_timer(&sk->sk_timer); | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 1948 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1949 | 	sk->sk_allocation	=	GFP_KERNEL; | 
 | 1950 | 	sk->sk_rcvbuf		=	sysctl_rmem_default; | 
 | 1951 | 	sk->sk_sndbuf		=	sysctl_wmem_default; | 
 | 1952 | 	sk->sk_state		=	TCP_CLOSE; | 
| David S. Miller | 972692e | 2008-06-17 22:41:38 -0700 | [diff] [blame] | 1953 | 	sk_set_socket(sk, sock); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1954 |  | 
 | 1955 | 	sock_set_flag(sk, SOCK_ZAPPED); | 
 | 1956 |  | 
| Stephen Hemminger | e71a478 | 2007-04-10 20:10:33 -0700 | [diff] [blame] | 1957 | 	if (sock) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1958 | 		sk->sk_type	=	sock->type; | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1959 | 		sk->sk_wq	=	sock->wq; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1960 | 		sock->sk	=	sk; | 
 | 1961 | 	} else | 
| Eric Dumazet | 4381548 | 2010-04-29 11:01:49 +0000 | [diff] [blame] | 1962 | 		sk->sk_wq	=	NULL; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1963 |  | 
| Eric Dumazet | b6c6712 | 2010-04-08 23:03:29 +0000 | [diff] [blame] | 1964 | 	spin_lock_init(&sk->sk_dst_lock); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1965 | 	rwlock_init(&sk->sk_callback_lock); | 
| Peter Zijlstra | 443aef0 | 2007-07-19 01:49:00 -0700 | [diff] [blame] | 1966 | 	lockdep_set_class_and_name(&sk->sk_callback_lock, | 
 | 1967 | 			af_callback_keys + sk->sk_family, | 
 | 1968 | 			af_family_clock_key_strings[sk->sk_family]); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1969 |  | 
 | 1970 | 	sk->sk_state_change	=	sock_def_wakeup; | 
 | 1971 | 	sk->sk_data_ready	=	sock_def_readable; | 
 | 1972 | 	sk->sk_write_space	=	sock_def_write_space; | 
 | 1973 | 	sk->sk_error_report	=	sock_def_error_report; | 
 | 1974 | 	sk->sk_destruct		=	sock_def_destruct; | 
 | 1975 |  | 
 | 1976 | 	sk->sk_sndmsg_page	=	NULL; | 
 | 1977 | 	sk->sk_sndmsg_off	=	0; | 
 | 1978 |  | 
| Eric W. Biederman | 109f6e3 | 2010-06-13 03:30:14 +0000 | [diff] [blame] | 1979 | 	sk->sk_peer_pid 	=	NULL; | 
 | 1980 | 	sk->sk_peer_cred	=	NULL; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1981 | 	sk->sk_write_pending	=	0; | 
 | 1982 | 	sk->sk_rcvlowat		=	1; | 
 | 1983 | 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT; | 
 | 1984 | 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT; | 
 | 1985 |  | 
| Eric Dumazet | f37f0af | 2008-04-13 21:39:26 -0700 | [diff] [blame] | 1986 | 	sk->sk_stamp = ktime_set(-1L, 0); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1987 |  | 
| Eric Dumazet | 4dc6dc7 | 2009-07-15 23:13:10 +0000 | [diff] [blame] | 1988 | 	/* | 
 | 1989 | 	 * Before updating sk_refcnt, we must commit prior changes to memory | 
 | 1990 | 	 * (Documentation/RCU/rculist_nulls.txt for details) | 
 | 1991 | 	 */ | 
 | 1992 | 	smp_wmb(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1993 | 	atomic_set(&sk->sk_refcnt, 1); | 
| Wang Chen | 33c732c | 2007-11-13 20:30:01 -0800 | [diff] [blame] | 1994 | 	atomic_set(&sk->sk_drops, 0); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1995 | } | 
| Eric Dumazet | 2a91525 | 2009-05-27 11:30:05 +0000 | [diff] [blame] | 1996 | EXPORT_SYMBOL(sock_init_data); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1997 |  | 
| Harvey Harrison | b5606c2 | 2008-02-13 15:03:16 -0800 | [diff] [blame] | 1998 | void lock_sock_nested(struct sock *sk, int subclass) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1999 | { | 
 | 2000 | 	might_sleep(); | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 2001 | 	spin_lock_bh(&sk->sk_lock.slock); | 
| John Heffner | d2e9117 | 2007-09-12 10:44:19 +0200 | [diff] [blame] | 2002 | 	if (sk->sk_lock.owned) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2003 | 		__lock_sock(sk); | 
| John Heffner | d2e9117 | 2007-09-12 10:44:19 +0200 | [diff] [blame] | 2004 | 	sk->sk_lock.owned = 1; | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 2005 | 	spin_unlock(&sk->sk_lock.slock); | 
 | 2006 | 	/* | 
 | 2007 | 	 * The sk_lock has mutex_lock() semantics here: | 
 | 2008 | 	 */ | 
| Peter Zijlstra | fcc70d5 | 2006-11-08 22:44:35 -0800 | [diff] [blame] | 2009 | 	mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 2010 | 	local_bh_enable(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2011 | } | 
| Peter Zijlstra | fcc70d5 | 2006-11-08 22:44:35 -0800 | [diff] [blame] | 2012 | EXPORT_SYMBOL(lock_sock_nested); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2013 |  | 
| Harvey Harrison | b5606c2 | 2008-02-13 15:03:16 -0800 | [diff] [blame] | 2014 | void release_sock(struct sock *sk) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2015 | { | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 2016 | 	/* | 
 | 2017 | 	 * The sk_lock has mutex_unlock() semantics: | 
 | 2018 | 	 */ | 
 | 2019 | 	mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); | 
 | 2020 |  | 
 | 2021 | 	spin_lock_bh(&sk->sk_lock.slock); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2022 | 	if (sk->sk_backlog.tail) | 
 | 2023 | 		__release_sock(sk); | 
| John Heffner | d2e9117 | 2007-09-12 10:44:19 +0200 | [diff] [blame] | 2024 | 	sk->sk_lock.owned = 0; | 
| Ingo Molnar | a5b5bb9 | 2006-07-03 00:25:35 -0700 | [diff] [blame] | 2025 | 	if (waitqueue_active(&sk->sk_lock.wq)) | 
 | 2026 | 		wake_up(&sk->sk_lock.wq); | 
 | 2027 | 	spin_unlock_bh(&sk->sk_lock.slock); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2028 | } | 
 | 2029 | EXPORT_SYMBOL(release_sock); | 
 | 2030 |  | 
| Eric Dumazet | 8a74ad6 | 2010-05-26 19:20:18 +0000 | [diff] [blame] | 2031 | /** | 
 | 2032 |  * lock_sock_fast - fast version of lock_sock | 
 | 2033 |  * @sk: socket | 
 | 2034 |  * | 
 | 2035 |  * This version should be used for very small section, where process wont block | 
 | 2036 |  * return false if fast path is taken | 
 | 2037 |  *   sk_lock.slock locked, owned = 0, BH disabled | 
 | 2038 |  * return true if slow path is taken | 
 | 2039 |  *   sk_lock.slock unlocked, owned = 1, BH enabled | 
 | 2040 |  */ | 
 | 2041 | bool lock_sock_fast(struct sock *sk) | 
 | 2042 | { | 
 | 2043 | 	might_sleep(); | 
 | 2044 | 	spin_lock_bh(&sk->sk_lock.slock); | 
 | 2045 |  | 
 | 2046 | 	if (!sk->sk_lock.owned) | 
 | 2047 | 		/* | 
 | 2048 | 		 * Note : We must disable BH | 
 | 2049 | 		 */ | 
 | 2050 | 		return false; | 
 | 2051 |  | 
 | 2052 | 	__lock_sock(sk); | 
 | 2053 | 	sk->sk_lock.owned = 1; | 
 | 2054 | 	spin_unlock(&sk->sk_lock.slock); | 
 | 2055 | 	/* | 
 | 2056 | 	 * The sk_lock has mutex_lock() semantics here: | 
 | 2057 | 	 */ | 
 | 2058 | 	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); | 
 | 2059 | 	local_bh_enable(); | 
 | 2060 | 	return true; | 
 | 2061 | } | 
 | 2062 | EXPORT_SYMBOL(lock_sock_fast); | 
 | 2063 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2064 | int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 2065 | { | 
| Eric Dumazet | b7aa0bf | 2007-04-19 16:16:32 -0700 | [diff] [blame] | 2066 | 	struct timeval tv; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2067 | 	if (!sock_flag(sk, SOCK_TIMESTAMP)) | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 2068 | 		sock_enable_timestamp(sk, SOCK_TIMESTAMP); | 
| Eric Dumazet | b7aa0bf | 2007-04-19 16:16:32 -0700 | [diff] [blame] | 2069 | 	tv = ktime_to_timeval(sk->sk_stamp); | 
 | 2070 | 	if (tv.tv_sec == -1) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2071 | 		return -ENOENT; | 
| Eric Dumazet | b7aa0bf | 2007-04-19 16:16:32 -0700 | [diff] [blame] | 2072 | 	if (tv.tv_sec == 0) { | 
 | 2073 | 		sk->sk_stamp = ktime_get_real(); | 
 | 2074 | 		tv = ktime_to_timeval(sk->sk_stamp); | 
 | 2075 | 	} | 
 | 2076 | 	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0; | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 2077 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2078 | EXPORT_SYMBOL(sock_get_timestamp); | 
 | 2079 |  | 
| Eric Dumazet | ae40eb1 | 2007-03-18 17:33:16 -0700 | [diff] [blame] | 2080 | int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) | 
 | 2081 | { | 
 | 2082 | 	struct timespec ts; | 
 | 2083 | 	if (!sock_flag(sk, SOCK_TIMESTAMP)) | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 2084 | 		sock_enable_timestamp(sk, SOCK_TIMESTAMP); | 
| Eric Dumazet | ae40eb1 | 2007-03-18 17:33:16 -0700 | [diff] [blame] | 2085 | 	ts = ktime_to_timespec(sk->sk_stamp); | 
 | 2086 | 	if (ts.tv_sec == -1) | 
 | 2087 | 		return -ENOENT; | 
 | 2088 | 	if (ts.tv_sec == 0) { | 
 | 2089 | 		sk->sk_stamp = ktime_get_real(); | 
 | 2090 | 		ts = ktime_to_timespec(sk->sk_stamp); | 
 | 2091 | 	} | 
 | 2092 | 	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0; | 
 | 2093 | } | 
 | 2094 | EXPORT_SYMBOL(sock_get_timestampns); | 
 | 2095 |  | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 2096 | void sock_enable_timestamp(struct sock *sk, int flag) | 
| YOSHIFUJI Hideaki | 4ec93ed | 2007-02-09 23:24:36 +0900 | [diff] [blame] | 2097 | { | 
| Patrick Ohly | 20d4947 | 2009-02-12 05:03:38 +0000 | [diff] [blame] | 2098 | 	if (!sock_flag(sk, flag)) { | 
 | 2099 | 		sock_set_flag(sk, flag); | 
 | 2100 | 		/* | 
 | 2101 | 		 * we just set one of the two flags which require net | 
 | 2102 | 		 * time stamping, but time stamping might have been on | 
 | 2103 | 		 * already because of the other one | 
 | 2104 | 		 */ | 
 | 2105 | 		if (!sock_flag(sk, | 
 | 2106 | 				flag == SOCK_TIMESTAMP ? | 
 | 2107 | 				SOCK_TIMESTAMPING_RX_SOFTWARE : | 
 | 2108 | 				SOCK_TIMESTAMP)) | 
 | 2109 | 			net_enable_timestamp(); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2110 | 	} | 
 | 2111 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2112 |  | 
 | 2113 | /* | 
 | 2114 |  *	Get a socket option on an socket. | 
 | 2115 |  * | 
 | 2116 |  *	FIX: POSIX 1003.1g is very ambiguous here. It states that | 
 | 2117 |  *	asynchronous errors should be reported by getsockopt. We assume | 
 | 2118 |  *	this means if you specify SO_ERROR (otherwise whats the point of it). | 
 | 2119 |  */ | 
 | 2120 | int sock_common_getsockopt(struct socket *sock, int level, int optname, | 
 | 2121 | 			   char __user *optval, int __user *optlen) | 
 | 2122 | { | 
 | 2123 | 	struct sock *sk = sock->sk; | 
 | 2124 |  | 
 | 2125 | 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); | 
 | 2126 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2127 | EXPORT_SYMBOL(sock_common_getsockopt); | 
 | 2128 |  | 
| Dmitry Mishin | 3fdadf7 | 2006-03-20 22:45:21 -0800 | [diff] [blame] | 2129 | #ifdef CONFIG_COMPAT | 
| Arnaldo Carvalho de Melo | 543d9cf | 2006-03-20 22:48:35 -0800 | [diff] [blame] | 2130 | int compat_sock_common_getsockopt(struct socket *sock, int level, int optname, | 
 | 2131 | 				  char __user *optval, int __user *optlen) | 
| Dmitry Mishin | 3fdadf7 | 2006-03-20 22:45:21 -0800 | [diff] [blame] | 2132 | { | 
 | 2133 | 	struct sock *sk = sock->sk; | 
 | 2134 |  | 
| Johannes Berg | 1e51f95 | 2007-03-06 13:44:06 -0800 | [diff] [blame] | 2135 | 	if (sk->sk_prot->compat_getsockopt != NULL) | 
| Arnaldo Carvalho de Melo | 543d9cf | 2006-03-20 22:48:35 -0800 | [diff] [blame] | 2136 | 		return sk->sk_prot->compat_getsockopt(sk, level, optname, | 
 | 2137 | 						      optval, optlen); | 
| Dmitry Mishin | 3fdadf7 | 2006-03-20 22:45:21 -0800 | [diff] [blame] | 2138 | 	return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); | 
 | 2139 | } | 
 | 2140 | EXPORT_SYMBOL(compat_sock_common_getsockopt); | 
 | 2141 | #endif | 
 | 2142 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2143 | int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock, | 
 | 2144 | 			struct msghdr *msg, size_t size, int flags) | 
 | 2145 | { | 
 | 2146 | 	struct sock *sk = sock->sk; | 
 | 2147 | 	int addr_len = 0; | 
 | 2148 | 	int err; | 
 | 2149 |  | 
 | 2150 | 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT, | 
 | 2151 | 				   flags & ~MSG_DONTWAIT, &addr_len); | 
 | 2152 | 	if (err >= 0) | 
 | 2153 | 		msg->msg_namelen = addr_len; | 
 | 2154 | 	return err; | 
 | 2155 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2156 | EXPORT_SYMBOL(sock_common_recvmsg); | 
 | 2157 |  | 
 | 2158 | /* | 
 | 2159 |  *	Set socket options on an inet socket. | 
 | 2160 |  */ | 
 | 2161 | int sock_common_setsockopt(struct socket *sock, int level, int optname, | 
| David S. Miller | b705884 | 2009-09-30 16:12:20 -0700 | [diff] [blame] | 2162 | 			   char __user *optval, unsigned int optlen) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2163 | { | 
 | 2164 | 	struct sock *sk = sock->sk; | 
 | 2165 |  | 
 | 2166 | 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); | 
 | 2167 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2168 | EXPORT_SYMBOL(sock_common_setsockopt); | 
 | 2169 |  | 
| Dmitry Mishin | 3fdadf7 | 2006-03-20 22:45:21 -0800 | [diff] [blame] | 2170 | #ifdef CONFIG_COMPAT | 
| Arnaldo Carvalho de Melo | 543d9cf | 2006-03-20 22:48:35 -0800 | [diff] [blame] | 2171 | int compat_sock_common_setsockopt(struct socket *sock, int level, int optname, | 
| David S. Miller | b705884 | 2009-09-30 16:12:20 -0700 | [diff] [blame] | 2172 | 				  char __user *optval, unsigned int optlen) | 
| Dmitry Mishin | 3fdadf7 | 2006-03-20 22:45:21 -0800 | [diff] [blame] | 2173 | { | 
 | 2174 | 	struct sock *sk = sock->sk; | 
 | 2175 |  | 
| Arnaldo Carvalho de Melo | 543d9cf | 2006-03-20 22:48:35 -0800 | [diff] [blame] | 2176 | 	if (sk->sk_prot->compat_setsockopt != NULL) | 
 | 2177 | 		return sk->sk_prot->compat_setsockopt(sk, level, optname, | 
 | 2178 | 						      optval, optlen); | 
| Dmitry Mishin | 3fdadf7 | 2006-03-20 22:45:21 -0800 | [diff] [blame] | 2179 | 	return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); | 
 | 2180 | } | 
 | 2181 | EXPORT_SYMBOL(compat_sock_common_setsockopt); | 
 | 2182 | #endif | 
 | 2183 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2184 | void sk_common_release(struct sock *sk) | 
 | 2185 | { | 
 | 2186 | 	if (sk->sk_prot->destroy) | 
 | 2187 | 		sk->sk_prot->destroy(sk); | 
 | 2188 |  | 
 | 2189 | 	/* | 
 | 2190 | 	 * Observation: when sock_common_release is called, processes have | 
 | 2191 | 	 * no access to socket. But net still has. | 
 | 2192 | 	 * Step one, detach it from networking: | 
 | 2193 | 	 * | 
 | 2194 | 	 * A. Remove from hash tables. | 
 | 2195 | 	 */ | 
 | 2196 |  | 
 | 2197 | 	sk->sk_prot->unhash(sk); | 
 | 2198 |  | 
 | 2199 | 	/* | 
 | 2200 | 	 * In this point socket cannot receive new packets, but it is possible | 
 | 2201 | 	 * that some packets are in flight because some CPU runs receiver and | 
 | 2202 | 	 * did hash table lookup before we unhashed socket. They will achieve | 
 | 2203 | 	 * receive queue and will be purged by socket destructor. | 
 | 2204 | 	 * | 
 | 2205 | 	 * Also we still have packets pending on receive queue and probably, | 
 | 2206 | 	 * our own packets waiting in device queues. sock_destroy will drain | 
 | 2207 | 	 * receive queue, but transmitted packets will delay socket destruction | 
 | 2208 | 	 * until the last reference will be released. | 
 | 2209 | 	 */ | 
 | 2210 |  | 
 | 2211 | 	sock_orphan(sk); | 
 | 2212 |  | 
 | 2213 | 	xfrm_sk_free_policy(sk); | 
 | 2214 |  | 
| Arnaldo Carvalho de Melo | e684897 | 2005-08-09 19:45:38 -0700 | [diff] [blame] | 2215 | 	sk_refcnt_debug_release(sk); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2216 | 	sock_put(sk); | 
 | 2217 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2218 | EXPORT_SYMBOL(sk_common_release); | 
 | 2219 |  | 
 | 2220 | static DEFINE_RWLOCK(proto_list_lock); | 
 | 2221 | static LIST_HEAD(proto_list); | 
 | 2222 |  | 
| Pavel Emelyanov | 13ff3d6 | 2008-03-28 16:38:17 -0700 | [diff] [blame] | 2223 | #ifdef CONFIG_PROC_FS | 
 | 2224 | #define PROTO_INUSE_NR	64	/* should be enough for the first time */ | 
| Pavel Emelyanov | 1338d46 | 2008-03-28 16:38:43 -0700 | [diff] [blame] | 2225 | struct prot_inuse { | 
 | 2226 | 	int val[PROTO_INUSE_NR]; | 
 | 2227 | }; | 
| Pavel Emelyanov | 13ff3d6 | 2008-03-28 16:38:17 -0700 | [diff] [blame] | 2228 |  | 
 | 2229 | static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); | 
| Pavel Emelyanov | 70ee115 | 2008-03-31 19:42:16 -0700 | [diff] [blame] | 2230 |  | 
 | 2231 | #ifdef CONFIG_NET_NS | 
 | 2232 | void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) | 
 | 2233 | { | 
 | 2234 | 	int cpu = smp_processor_id(); | 
 | 2235 | 	per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val; | 
 | 2236 | } | 
 | 2237 | EXPORT_SYMBOL_GPL(sock_prot_inuse_add); | 
 | 2238 |  | 
 | 2239 | int sock_prot_inuse_get(struct net *net, struct proto *prot) | 
 | 2240 | { | 
 | 2241 | 	int cpu, idx = prot->inuse_idx; | 
 | 2242 | 	int res = 0; | 
 | 2243 |  | 
 | 2244 | 	for_each_possible_cpu(cpu) | 
 | 2245 | 		res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; | 
 | 2246 |  | 
 | 2247 | 	return res >= 0 ? res : 0; | 
 | 2248 | } | 
 | 2249 | EXPORT_SYMBOL_GPL(sock_prot_inuse_get); | 
 | 2250 |  | 
| Alexey Dobriyan | 2c8c1e7 | 2010-01-17 03:35:32 +0000 | [diff] [blame] | 2251 | static int __net_init sock_inuse_init_net(struct net *net) | 
| Pavel Emelyanov | 70ee115 | 2008-03-31 19:42:16 -0700 | [diff] [blame] | 2252 | { | 
 | 2253 | 	net->core.inuse = alloc_percpu(struct prot_inuse); | 
 | 2254 | 	return net->core.inuse ? 0 : -ENOMEM; | 
 | 2255 | } | 
 | 2256 |  | 
| Alexey Dobriyan | 2c8c1e7 | 2010-01-17 03:35:32 +0000 | [diff] [blame] | 2257 | static void __net_exit sock_inuse_exit_net(struct net *net) | 
| Pavel Emelyanov | 70ee115 | 2008-03-31 19:42:16 -0700 | [diff] [blame] | 2258 | { | 
 | 2259 | 	free_percpu(net->core.inuse); | 
 | 2260 | } | 
 | 2261 |  | 
 | 2262 | static struct pernet_operations net_inuse_ops = { | 
 | 2263 | 	.init = sock_inuse_init_net, | 
 | 2264 | 	.exit = sock_inuse_exit_net, | 
 | 2265 | }; | 
 | 2266 |  | 
 | 2267 | static __init int net_inuse_init(void) | 
 | 2268 | { | 
 | 2269 | 	if (register_pernet_subsys(&net_inuse_ops)) | 
 | 2270 | 		panic("Cannot initialize net inuse counters"); | 
 | 2271 |  | 
 | 2272 | 	return 0; | 
 | 2273 | } | 
 | 2274 |  | 
 | 2275 | core_initcall(net_inuse_init); | 
 | 2276 | #else | 
| Pavel Emelyanov | 1338d46 | 2008-03-28 16:38:43 -0700 | [diff] [blame] | 2277 | static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); | 
 | 2278 |  | 
| Pavel Emelyanov | c29a0bc | 2008-03-31 19:41:46 -0700 | [diff] [blame] | 2279 | void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) | 
| Pavel Emelyanov | 1338d46 | 2008-03-28 16:38:43 -0700 | [diff] [blame] | 2280 | { | 
 | 2281 | 	__get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; | 
 | 2282 | } | 
 | 2283 | EXPORT_SYMBOL_GPL(sock_prot_inuse_add); | 
 | 2284 |  | 
| Pavel Emelyanov | c29a0bc | 2008-03-31 19:41:46 -0700 | [diff] [blame] | 2285 | int sock_prot_inuse_get(struct net *net, struct proto *prot) | 
| Pavel Emelyanov | 1338d46 | 2008-03-28 16:38:43 -0700 | [diff] [blame] | 2286 | { | 
 | 2287 | 	int cpu, idx = prot->inuse_idx; | 
 | 2288 | 	int res = 0; | 
 | 2289 |  | 
 | 2290 | 	for_each_possible_cpu(cpu) | 
 | 2291 | 		res += per_cpu(prot_inuse, cpu).val[idx]; | 
 | 2292 |  | 
 | 2293 | 	return res >= 0 ? res : 0; | 
 | 2294 | } | 
 | 2295 | EXPORT_SYMBOL_GPL(sock_prot_inuse_get); | 
| Pavel Emelyanov | 70ee115 | 2008-03-31 19:42:16 -0700 | [diff] [blame] | 2296 | #endif | 
| Pavel Emelyanov | 13ff3d6 | 2008-03-28 16:38:17 -0700 | [diff] [blame] | 2297 |  | 
 | 2298 | static void assign_proto_idx(struct proto *prot) | 
 | 2299 | { | 
 | 2300 | 	prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); | 
 | 2301 |  | 
 | 2302 | 	if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { | 
 | 2303 | 		printk(KERN_ERR "PROTO_INUSE_NR exhausted\n"); | 
 | 2304 | 		return; | 
 | 2305 | 	} | 
 | 2306 |  | 
 | 2307 | 	set_bit(prot->inuse_idx, proto_inuse_idx); | 
 | 2308 | } | 
 | 2309 |  | 
 | 2310 | static void release_proto_idx(struct proto *prot) | 
 | 2311 | { | 
 | 2312 | 	if (prot->inuse_idx != PROTO_INUSE_NR - 1) | 
 | 2313 | 		clear_bit(prot->inuse_idx, proto_inuse_idx); | 
 | 2314 | } | 
 | 2315 | #else | 
 | 2316 | static inline void assign_proto_idx(struct proto *prot) | 
 | 2317 | { | 
 | 2318 | } | 
 | 2319 |  | 
 | 2320 | static inline void release_proto_idx(struct proto *prot) | 
 | 2321 | { | 
 | 2322 | } | 
 | 2323 | #endif | 
 | 2324 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2325 | int proto_register(struct proto *prot, int alloc_slab) | 
 | 2326 | { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2327 | 	if (alloc_slab) { | 
 | 2328 | 		prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, | 
| Eric Dumazet | 271b72c | 2008-10-29 02:11:14 -0700 | [diff] [blame] | 2329 | 					SLAB_HWCACHE_ALIGN | prot->slab_flags, | 
 | 2330 | 					NULL); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2331 |  | 
 | 2332 | 		if (prot->slab == NULL) { | 
 | 2333 | 			printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", | 
 | 2334 | 			       prot->name); | 
| Pavel Emelyanov | 60e7663 | 2008-03-28 16:39:10 -0700 | [diff] [blame] | 2335 | 			goto out; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2336 | 		} | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2337 |  | 
 | 2338 | 		if (prot->rsk_prot != NULL) { | 
| Alexey Dobriyan | faf2342 | 2010-02-17 09:34:12 +0000 | [diff] [blame] | 2339 | 			prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); | 
| Catalin Marinas | 7e56b5d | 2008-11-21 16:45:22 -0800 | [diff] [blame] | 2340 | 			if (prot->rsk_prot->slab_name == NULL) | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2341 | 				goto out_free_sock_slab; | 
 | 2342 |  | 
| Catalin Marinas | 7e56b5d | 2008-11-21 16:45:22 -0800 | [diff] [blame] | 2343 | 			prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name, | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2344 | 								 prot->rsk_prot->obj_size, 0, | 
| Paul Mundt | 20c2df8 | 2007-07-20 10:11:58 +0900 | [diff] [blame] | 2345 | 								 SLAB_HWCACHE_ALIGN, NULL); | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2346 |  | 
 | 2347 | 			if (prot->rsk_prot->slab == NULL) { | 
 | 2348 | 				printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", | 
 | 2349 | 				       prot->name); | 
 | 2350 | 				goto out_free_request_sock_slab_name; | 
 | 2351 | 			} | 
 | 2352 | 		} | 
| Arnaldo Carvalho de Melo | 8feaf0c | 2005-08-09 20:09:30 -0700 | [diff] [blame] | 2353 |  | 
| Arnaldo Carvalho de Melo | 6d6ee43 | 2005-12-13 23:25:19 -0800 | [diff] [blame] | 2354 | 		if (prot->twsk_prot != NULL) { | 
| Alexey Dobriyan | faf2342 | 2010-02-17 09:34:12 +0000 | [diff] [blame] | 2355 | 			prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); | 
| Arnaldo Carvalho de Melo | 8feaf0c | 2005-08-09 20:09:30 -0700 | [diff] [blame] | 2356 |  | 
| Catalin Marinas | 7e56b5d | 2008-11-21 16:45:22 -0800 | [diff] [blame] | 2357 | 			if (prot->twsk_prot->twsk_slab_name == NULL) | 
| Arnaldo Carvalho de Melo | 8feaf0c | 2005-08-09 20:09:30 -0700 | [diff] [blame] | 2358 | 				goto out_free_request_sock_slab; | 
 | 2359 |  | 
| Arnaldo Carvalho de Melo | 6d6ee43 | 2005-12-13 23:25:19 -0800 | [diff] [blame] | 2360 | 			prot->twsk_prot->twsk_slab = | 
| Catalin Marinas | 7e56b5d | 2008-11-21 16:45:22 -0800 | [diff] [blame] | 2361 | 				kmem_cache_create(prot->twsk_prot->twsk_slab_name, | 
| Arnaldo Carvalho de Melo | 6d6ee43 | 2005-12-13 23:25:19 -0800 | [diff] [blame] | 2362 | 						  prot->twsk_prot->twsk_obj_size, | 
| Eric Dumazet | 3ab5aee | 2008-11-16 19:40:17 -0800 | [diff] [blame] | 2363 | 						  0, | 
 | 2364 | 						  SLAB_HWCACHE_ALIGN | | 
 | 2365 | 							prot->slab_flags, | 
| Paul Mundt | 20c2df8 | 2007-07-20 10:11:58 +0900 | [diff] [blame] | 2366 | 						  NULL); | 
| Arnaldo Carvalho de Melo | 6d6ee43 | 2005-12-13 23:25:19 -0800 | [diff] [blame] | 2367 | 			if (prot->twsk_prot->twsk_slab == NULL) | 
| Arnaldo Carvalho de Melo | 8feaf0c | 2005-08-09 20:09:30 -0700 | [diff] [blame] | 2368 | 				goto out_free_timewait_sock_slab_name; | 
 | 2369 | 		} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2370 | 	} | 
 | 2371 |  | 
| Arnaldo Carvalho de Melo | 2a27805 | 2005-04-16 15:24:09 -0700 | [diff] [blame] | 2372 | 	write_lock(&proto_list_lock); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2373 | 	list_add(&prot->node, &proto_list); | 
| Pavel Emelyanov | 13ff3d6 | 2008-03-28 16:38:17 -0700 | [diff] [blame] | 2374 | 	assign_proto_idx(prot); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2375 | 	write_unlock(&proto_list_lock); | 
| Pavel Emelyanov | b733c00 | 2007-11-07 02:23:38 -0800 | [diff] [blame] | 2376 | 	return 0; | 
 | 2377 |  | 
| Arnaldo Carvalho de Melo | 8feaf0c | 2005-08-09 20:09:30 -0700 | [diff] [blame] | 2378 | out_free_timewait_sock_slab_name: | 
| Catalin Marinas | 7e56b5d | 2008-11-21 16:45:22 -0800 | [diff] [blame] | 2379 | 	kfree(prot->twsk_prot->twsk_slab_name); | 
| Arnaldo Carvalho de Melo | 8feaf0c | 2005-08-09 20:09:30 -0700 | [diff] [blame] | 2380 | out_free_request_sock_slab: | 
 | 2381 | 	if (prot->rsk_prot && prot->rsk_prot->slab) { | 
 | 2382 | 		kmem_cache_destroy(prot->rsk_prot->slab); | 
 | 2383 | 		prot->rsk_prot->slab = NULL; | 
 | 2384 | 	} | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2385 | out_free_request_sock_slab_name: | 
| Dan Carpenter | 72150e9 | 2010-03-06 01:04:45 +0000 | [diff] [blame] | 2386 | 	if (prot->rsk_prot) | 
 | 2387 | 		kfree(prot->rsk_prot->slab_name); | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2388 | out_free_sock_slab: | 
 | 2389 | 	kmem_cache_destroy(prot->slab); | 
 | 2390 | 	prot->slab = NULL; | 
| Pavel Emelyanov | b733c00 | 2007-11-07 02:23:38 -0800 | [diff] [blame] | 2391 | out: | 
 | 2392 | 	return -ENOBUFS; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2393 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2394 | EXPORT_SYMBOL(proto_register); | 
 | 2395 |  | 
 | 2396 | void proto_unregister(struct proto *prot) | 
 | 2397 | { | 
 | 2398 | 	write_lock(&proto_list_lock); | 
| Pavel Emelyanov | 13ff3d6 | 2008-03-28 16:38:17 -0700 | [diff] [blame] | 2399 | 	release_proto_idx(prot); | 
| Patrick McHardy | 0a3f435 | 2005-09-06 19:47:50 -0700 | [diff] [blame] | 2400 | 	list_del(&prot->node); | 
 | 2401 | 	write_unlock(&proto_list_lock); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2402 |  | 
 | 2403 | 	if (prot->slab != NULL) { | 
 | 2404 | 		kmem_cache_destroy(prot->slab); | 
 | 2405 | 		prot->slab = NULL; | 
 | 2406 | 	} | 
 | 2407 |  | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2408 | 	if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) { | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2409 | 		kmem_cache_destroy(prot->rsk_prot->slab); | 
| Catalin Marinas | 7e56b5d | 2008-11-21 16:45:22 -0800 | [diff] [blame] | 2410 | 		kfree(prot->rsk_prot->slab_name); | 
| Arnaldo Carvalho de Melo | 2e6599c | 2005-06-18 22:46:52 -0700 | [diff] [blame] | 2411 | 		prot->rsk_prot->slab = NULL; | 
 | 2412 | 	} | 
 | 2413 |  | 
| Arnaldo Carvalho de Melo | 6d6ee43 | 2005-12-13 23:25:19 -0800 | [diff] [blame] | 2414 | 	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) { | 
| Arnaldo Carvalho de Melo | 6d6ee43 | 2005-12-13 23:25:19 -0800 | [diff] [blame] | 2415 | 		kmem_cache_destroy(prot->twsk_prot->twsk_slab); | 
| Catalin Marinas | 7e56b5d | 2008-11-21 16:45:22 -0800 | [diff] [blame] | 2416 | 		kfree(prot->twsk_prot->twsk_slab_name); | 
| Arnaldo Carvalho de Melo | 6d6ee43 | 2005-12-13 23:25:19 -0800 | [diff] [blame] | 2417 | 		prot->twsk_prot->twsk_slab = NULL; | 
| Arnaldo Carvalho de Melo | 8feaf0c | 2005-08-09 20:09:30 -0700 | [diff] [blame] | 2418 | 	} | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2419 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2420 | EXPORT_SYMBOL(proto_unregister); | 
 | 2421 |  | 
 | 2422 | #ifdef CONFIG_PROC_FS | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2423 | static void *proto_seq_start(struct seq_file *seq, loff_t *pos) | 
| Eric Dumazet | 9a429c4 | 2008-01-01 21:58:02 -0800 | [diff] [blame] | 2424 | 	__acquires(proto_list_lock) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2425 | { | 
 | 2426 | 	read_lock(&proto_list_lock); | 
| Pavel Emelianov | 60f0438 | 2007-07-09 13:15:14 -0700 | [diff] [blame] | 2427 | 	return seq_list_start_head(&proto_list, *pos); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2428 | } | 
 | 2429 |  | 
 | 2430 | static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 
 | 2431 | { | 
| Pavel Emelianov | 60f0438 | 2007-07-09 13:15:14 -0700 | [diff] [blame] | 2432 | 	return seq_list_next(v, &proto_list, pos); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2433 | } | 
 | 2434 |  | 
 | 2435 | static void proto_seq_stop(struct seq_file *seq, void *v) | 
| Eric Dumazet | 9a429c4 | 2008-01-01 21:58:02 -0800 | [diff] [blame] | 2436 | 	__releases(proto_list_lock) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2437 | { | 
 | 2438 | 	read_unlock(&proto_list_lock); | 
 | 2439 | } | 
 | 2440 |  | 
 | 2441 | static char proto_method_implemented(const void *method) | 
 | 2442 | { | 
 | 2443 | 	return method == NULL ? 'n' : 'y'; | 
 | 2444 | } | 
 | 2445 |  | 
 | 2446 | static void proto_seq_printf(struct seq_file *seq, struct proto *proto) | 
 | 2447 | { | 
 | 2448 | 	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s " | 
 | 2449 | 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", | 
 | 2450 | 		   proto->name, | 
 | 2451 | 		   proto->obj_size, | 
| Eric Dumazet | 14e943d | 2008-11-19 15:14:01 -0800 | [diff] [blame] | 2452 | 		   sock_prot_inuse_get(seq_file_net(seq), proto), | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2453 | 		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1, | 
 | 2454 | 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI", | 
 | 2455 | 		   proto->max_header, | 
 | 2456 | 		   proto->slab == NULL ? "no" : "yes", | 
 | 2457 | 		   module_name(proto->owner), | 
 | 2458 | 		   proto_method_implemented(proto->close), | 
 | 2459 | 		   proto_method_implemented(proto->connect), | 
 | 2460 | 		   proto_method_implemented(proto->disconnect), | 
 | 2461 | 		   proto_method_implemented(proto->accept), | 
 | 2462 | 		   proto_method_implemented(proto->ioctl), | 
 | 2463 | 		   proto_method_implemented(proto->init), | 
 | 2464 | 		   proto_method_implemented(proto->destroy), | 
 | 2465 | 		   proto_method_implemented(proto->shutdown), | 
 | 2466 | 		   proto_method_implemented(proto->setsockopt), | 
 | 2467 | 		   proto_method_implemented(proto->getsockopt), | 
 | 2468 | 		   proto_method_implemented(proto->sendmsg), | 
 | 2469 | 		   proto_method_implemented(proto->recvmsg), | 
 | 2470 | 		   proto_method_implemented(proto->sendpage), | 
 | 2471 | 		   proto_method_implemented(proto->bind), | 
 | 2472 | 		   proto_method_implemented(proto->backlog_rcv), | 
 | 2473 | 		   proto_method_implemented(proto->hash), | 
 | 2474 | 		   proto_method_implemented(proto->unhash), | 
 | 2475 | 		   proto_method_implemented(proto->get_port), | 
 | 2476 | 		   proto_method_implemented(proto->enter_memory_pressure)); | 
 | 2477 | } | 
 | 2478 |  | 
 | 2479 | static int proto_seq_show(struct seq_file *seq, void *v) | 
 | 2480 | { | 
| Pavel Emelianov | 60f0438 | 2007-07-09 13:15:14 -0700 | [diff] [blame] | 2481 | 	if (v == &proto_list) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2482 | 		seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", | 
 | 2483 | 			   "protocol", | 
 | 2484 | 			   "size", | 
 | 2485 | 			   "sockets", | 
 | 2486 | 			   "memory", | 
 | 2487 | 			   "press", | 
 | 2488 | 			   "maxhdr", | 
 | 2489 | 			   "slab", | 
 | 2490 | 			   "module", | 
 | 2491 | 			   "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); | 
 | 2492 | 	else | 
| Pavel Emelianov | 60f0438 | 2007-07-09 13:15:14 -0700 | [diff] [blame] | 2493 | 		proto_seq_printf(seq, list_entry(v, struct proto, node)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2494 | 	return 0; | 
 | 2495 | } | 
 | 2496 |  | 
| Stephen Hemminger | f690808 | 2007-03-12 14:34:29 -0700 | [diff] [blame] | 2497 | static const struct seq_operations proto_seq_ops = { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2498 | 	.start  = proto_seq_start, | 
 | 2499 | 	.next   = proto_seq_next, | 
 | 2500 | 	.stop   = proto_seq_stop, | 
 | 2501 | 	.show   = proto_seq_show, | 
 | 2502 | }; | 
 | 2503 |  | 
 | 2504 | static int proto_seq_open(struct inode *inode, struct file *file) | 
 | 2505 | { | 
| Eric Dumazet | 14e943d | 2008-11-19 15:14:01 -0800 | [diff] [blame] | 2506 | 	return seq_open_net(inode, file, &proto_seq_ops, | 
 | 2507 | 			    sizeof(struct seq_net_private)); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2508 | } | 
 | 2509 |  | 
| Arjan van de Ven | 9a32144 | 2007-02-12 00:55:35 -0800 | [diff] [blame] | 2510 | static const struct file_operations proto_seq_fops = { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2511 | 	.owner		= THIS_MODULE, | 
 | 2512 | 	.open		= proto_seq_open, | 
 | 2513 | 	.read		= seq_read, | 
 | 2514 | 	.llseek		= seq_lseek, | 
| Eric Dumazet | 14e943d | 2008-11-19 15:14:01 -0800 | [diff] [blame] | 2515 | 	.release	= seq_release_net, | 
 | 2516 | }; | 
 | 2517 |  | 
 | 2518 | static __net_init int proto_init_net(struct net *net) | 
 | 2519 | { | 
 | 2520 | 	if (!proc_net_fops_create(net, "protocols", S_IRUGO, &proto_seq_fops)) | 
 | 2521 | 		return -ENOMEM; | 
 | 2522 |  | 
 | 2523 | 	return 0; | 
 | 2524 | } | 
 | 2525 |  | 
 | 2526 | static __net_exit void proto_exit_net(struct net *net) | 
 | 2527 | { | 
 | 2528 | 	proc_net_remove(net, "protocols"); | 
 | 2529 | } | 
 | 2530 |  | 
 | 2531 |  | 
 | 2532 | static __net_initdata struct pernet_operations proto_net_ops = { | 
 | 2533 | 	.init = proto_init_net, | 
 | 2534 | 	.exit = proto_exit_net, | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2535 | }; | 
 | 2536 |  | 
 | 2537 | static int __init proto_init(void) | 
 | 2538 | { | 
| Eric Dumazet | 14e943d | 2008-11-19 15:14:01 -0800 | [diff] [blame] | 2539 | 	return register_pernet_subsys(&proto_net_ops); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2540 | } | 
 | 2541 |  | 
 | 2542 | subsys_initcall(proto_init); | 
 | 2543 |  | 
 | 2544 | #endif /* PROC_FS */ |