Blame - net/socket.c - kernel/msm-5.4

blob: e1bd5d84d7bf11acce8a2c6b73d0f5ac41809d05 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* NET An implementation of the SOCKET network access protocol.
				3	*
				4	* Version: @(#)socket.c 1.1.93 18/02/95
				5	*
				6	* Authors: Orest Zborowski, <obz@Kodak.COM>
Jesper Juhl	02c30a8	2005-05-05 16:16:16 -0700	[diff] [blame]	7	* Ross Biro
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	8	* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
				9	*
				10	* Fixes:
				11	* Anonymous : NOTSOCK/BADF cleanup. Error fix in
				12	* shutdown()
				13	* Alan Cox : verify_area() fixes
				14	* Alan Cox : Removed DDI
				15	* Jonathan Kamens : SOCK_DGRAM reconnect bug
				16	* Alan Cox : Moved a load of checks to the very
				17	* top level.
				18	* Alan Cox : Move address structures to/from user
				19	* mode above the protocol layers.
				20	* Rob Janssen : Allow 0 length sends.
				21	* Alan Cox : Asynchronous I/O support (cribbed from the
				22	* tty drivers).
				23	* Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
				24	* Jeff Uphoff : Made max number of sockets command-line
				25	* configurable.
				26	* Matti Aarnio : Made the number of sockets dynamic,
				27	* to be allocated when needed, and mr.
				28	* Uphoff's max is used as max to be
				29	* allowed to allocate.
				30	* Linus : Argh. removed all the socket allocation
				31	* altogether: it's in the inode now.
				32	* Alan Cox : Made sock_alloc()/sock_release() public
				33	* for NetROM and future kernel nfsd type
				34	* stuff.
				35	* Alan Cox : sendmsg/recvmsg basics.
				36	* Tom Dyas : Export net symbols.
				37	* Marcin Dalecki : Fixed problems with CONFIG_NET="n".
				38	* Alan Cox : Added thread locking to sys_* calls
				39	* for sockets. May have errors at the
				40	* moment.
				41	* Kevin Buhr : Fixed the dumb errors in the above.
				42	* Andi Kleen : Some small cleanups, optimizations,
				43	* and fixed a copy_from_user() bug.
				44	* Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
				45	* Tigran Aivazian : Made listen(2) backlog sanity checks
				46	* protocol-independent
				47	*
				48	*
				49	* This program is free software; you can redistribute it and/or
				50	* modify it under the terms of the GNU General Public License
				51	* as published by the Free Software Foundation; either version
				52	* 2 of the License, or (at your option) any later version.
				53	*
				54	*
				55	* This module is effectively the top level interface to the BSD socket
				56	* paradigm.
				57	*
				58	* Based upon Swansea University Computer Society NET3.039
				59	*/
				60
				61	#include <linux/config.h>
				62	#include <linux/mm.h>
				63	#include <linux/smp_lock.h>
				64	#include <linux/socket.h>
				65	#include <linux/file.h>
				66	#include <linux/net.h>
				67	#include <linux/interrupt.h>
				68	#include <linux/netdevice.h>
				69	#include <linux/proc_fs.h>
				70	#include <linux/seq_file.h>
				71	#include <linux/wanrouter.h>
				72	#include <linux/if_bridge.h>
Arnaldo Carvalho de Melo	2038073	2005-08-16 02:18:02 -0300	[diff] [blame]	73	#include <linux/if_frad.h>
				74	#include <linux/if_vlan.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	75	#include <linux/init.h>
				76	#include <linux/poll.h>
				77	#include <linux/cache.h>
				78	#include <linux/module.h>
				79	#include <linux/highmem.h>
				80	#include <linux/divert.h>
				81	#include <linux/mount.h>
				82	#include <linux/security.h>
				83	#include <linux/syscalls.h>
				84	#include <linux/compat.h>
				85	#include <linux/kmod.h>
David Woodhouse	3ec3b2f	2005-05-17 12:08:48 +0100	[diff] [blame]	86	#include <linux/audit.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	87
				88	#ifdef CONFIG_NET_RADIO
				89	#include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
				90	#endif /* CONFIG_NET_RADIO */
				91
				92	#include <asm/uaccess.h>
				93	#include <asm/unistd.h>
				94
				95	#include <net/compat.h>
				96
				97	#include <net/sock.h>
				98	#include <linux/netfilter.h>
				99
				100	static int sock_no_open(struct inode irrelevant, struct file dontcare);
				101	static ssize_t sock_aio_read(struct kiocb iocb, char __user buf,
				102	size_t size, loff_t pos);
				103	static ssize_t sock_aio_write(struct kiocb iocb, const char __user buf,
				104	size_t size, loff_t pos);
				105	static int sock_mmap(struct file file, struct vm_area_struct vma);
				106
				107	static int sock_close(struct inode inode, struct file file);
				108	static unsigned int sock_poll(struct file *file,
				109	struct poll_table_struct *wait);
				110	static long sock_ioctl(struct file *file,
				111	unsigned int cmd, unsigned long arg);
				112	static int sock_fasync(int fd, struct file *filp, int on);
				113	static ssize_t sock_readv(struct file file, const struct iovec vector,
				114	unsigned long count, loff_t *ppos);
				115	static ssize_t sock_writev(struct file file, const struct iovec vector,
				116	unsigned long count, loff_t *ppos);
				117	static ssize_t sock_sendpage(struct file file, struct page page,
				118	int offset, size_t size, loff_t *ppos, int more);
				119
				120
				121	/*
				122	* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
				123	* in the operation structures but are done directly via the socketcall() multiplexor.
				124	*/
				125
				126	static struct file_operations socket_file_ops = {
				127	.owner = THIS_MODULE,
				128	.llseek = no_llseek,
				129	.aio_read = sock_aio_read,
				130	.aio_write = sock_aio_write,
				131	.poll = sock_poll,
				132	.unlocked_ioctl = sock_ioctl,
				133	.mmap = sock_mmap,
				134	.open = sock_no_open, /* special open code to disallow open via /proc */
				135	.release = sock_close,
				136	.fasync = sock_fasync,
				137	.readv = sock_readv,
				138	.writev = sock_writev,
				139	.sendpage = sock_sendpage
				140	};
				141
				142	/*
				143	* The protocol list. Each protocol is registered in here.
				144	*/
				145
				146	static struct net_proto_family *net_families[NPROTO];
				147
				148	#if defined(CONFIG_SMP) \|\| defined(CONFIG_PREEMPT)
				149	static atomic_t net_family_lockct = ATOMIC_INIT(0);
				150	static DEFINE_SPINLOCK(net_family_lock);
				151
				152	/* The strategy is: modifications net_family vector are short, do not
				153	sleep and veeery rare, but read access should be free of any exclusive
				154	locks.
				155	*/
				156
				157	static void net_family_write_lock(void)
				158	{
				159	spin_lock(&net_family_lock);
				160	while (atomic_read(&net_family_lockct) != 0) {
				161	spin_unlock(&net_family_lock);
				162
				163	yield();
				164
				165	spin_lock(&net_family_lock);
				166	}
				167	}
				168
				169	static __inline__ void net_family_write_unlock(void)
				170	{
				171	spin_unlock(&net_family_lock);
				172	}
				173
				174	static __inline__ void net_family_read_lock(void)
				175	{
				176	atomic_inc(&net_family_lockct);
				177	spin_unlock_wait(&net_family_lock);
				178	}
				179
				180	static __inline__ void net_family_read_unlock(void)
				181	{
				182	atomic_dec(&net_family_lockct);
				183	}
				184
				185	#else
				186	#define net_family_write_lock() do { } while(0)
				187	#define net_family_write_unlock() do { } while(0)
				188	#define net_family_read_lock() do { } while(0)
				189	#define net_family_read_unlock() do { } while(0)
				190	#endif
				191
				192
				193	/*
				194	* Statistics counters of the socket lists
				195	*/
				196
				197	static DEFINE_PER_CPU(int, sockets_in_use) = 0;
				198
				199	/*
				200	* Support routines. Move socket addresses back and forth across the kernel/user
				201	* divide and look after the messy bits.
				202	*/
				203
				204	#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
				205	16 for IP, 16 for IPX,
				206	24 for IPv6,
				207	about 80 for AX.25
				208	must be at least one bigger than
				209	the AF_UNIX size (see net/unix/af_unix.c
				210	:unix_mkname()).
				211	*/
				212
				213	/**
				214	* move_addr_to_kernel - copy a socket address into kernel space
				215	* @uaddr: Address in user space
				216	* @kaddr: Address in kernel space
				217	* @ulen: Length in user space
				218	*
				219	* The address is copied into kernel space. If the provided address is
				220	* too long an error code of -EINVAL is returned. If the copy gives
				221	* invalid addresses -EFAULT is returned. On a success 0 is returned.
				222	*/
				223
				224	int move_addr_to_kernel(void __user uaddr, int ulen, void kaddr)
				225	{
				226	if(ulen<0\|\|ulen>MAX_SOCK_ADDR)
				227	return -EINVAL;
				228	if(ulen==0)
				229	return 0;
				230	if(copy_from_user(kaddr,uaddr,ulen))
				231	return -EFAULT;
David Woodhouse	3ec3b2f	2005-05-17 12:08:48 +0100	[diff] [blame]	232	return audit_sockaddr(ulen, kaddr);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	233	}
				234
				235	/**
				236	* move_addr_to_user - copy an address to user space
				237	* @kaddr: kernel space address
				238	* @klen: length of address in kernel
				239	* @uaddr: user space address
				240	* @ulen: pointer to user length field
				241	*
				242	* The value pointed to by ulen on entry is the buffer length available.
				243	* This is overwritten with the buffer space used. -EINVAL is returned
				244	* if an overlong buffer is specified or a negative buffer size. -EFAULT
				245	* is returned if either the buffer or the length field are not
				246	* accessible.
				247	* After copying the data up to the limit the user specifies, the true
				248	* length of the data is written over the length limit the user
				249	* specified. Zero is returned for a success.
				250	*/
				251
				252	int move_addr_to_user(void kaddr, int klen, void __user uaddr, int __user *ulen)
				253	{
				254	int err;
				255	int len;
				256
				257	if((err=get_user(len, ulen)))
				258	return err;
				259	if(len>klen)
				260	len=klen;
				261	if(len<0 \|\| len> MAX_SOCK_ADDR)
				262	return -EINVAL;
				263	if(len)
				264	{
				265	if(copy_to_user(uaddr,kaddr,len))
				266	return -EFAULT;
				267	}
				268	/*
				269	* "fromlen shall refer to the value before truncation.."
				270	* 1003.1g
				271	*/
				272	return __put_user(klen, ulen);
				273	}
				274
				275	#define SOCKFS_MAGIC 0x534F434B
				276
Eric Dumazet	ba89966	2005-08-26 12:05:31 -0700	[diff] [blame]	277	static kmem_cache_t * sock_inode_cachep __read_mostly;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	278
				279	static struct inode sock_alloc_inode(struct super_block sb)
				280	{
				281	struct socket_alloc *ei;
				282	ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
				283	if (!ei)
				284	return NULL;
				285	init_waitqueue_head(&ei->socket.wait);
				286
				287	ei->socket.fasync_list = NULL;
				288	ei->socket.state = SS_UNCONNECTED;
				289	ei->socket.flags = 0;
				290	ei->socket.ops = NULL;
				291	ei->socket.sk = NULL;
				292	ei->socket.file = NULL;
				293	ei->socket.flags = 0;
				294
				295	return &ei->vfs_inode;
				296	}
				297
				298	static void sock_destroy_inode(struct inode *inode)
				299	{
				300	kmem_cache_free(sock_inode_cachep,
				301	container_of(inode, struct socket_alloc, vfs_inode));
				302	}
				303
				304	static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
				305	{
				306	struct socket_alloc ei = (struct socket_alloc ) foo;
				307
				308	if ((flags & (SLAB_CTOR_VERIFY\|SLAB_CTOR_CONSTRUCTOR)) ==
				309	SLAB_CTOR_CONSTRUCTOR)
				310	inode_init_once(&ei->vfs_inode);
				311	}
				312
				313	static int init_inodecache(void)
				314	{
				315	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
				316	sizeof(struct socket_alloc),
				317	0, SLAB_HWCACHE_ALIGN\|SLAB_RECLAIM_ACCOUNT,
				318	init_once, NULL);
				319	if (sock_inode_cachep == NULL)
				320	return -ENOMEM;
				321	return 0;
				322	}
				323
				324	static struct super_operations sockfs_ops = {
				325	.alloc_inode = sock_alloc_inode,
				326	.destroy_inode =sock_destroy_inode,
				327	.statfs = simple_statfs,
				328	};
				329
				330	static struct super_block sockfs_get_sb(struct file_system_type fs_type,
				331	int flags, const char dev_name, void data)
				332	{
				333	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
				334	}
				335
Eric Dumazet	ba89966	2005-08-26 12:05:31 -0700	[diff] [blame]	336	static struct vfsmount *sock_mnt __read_mostly;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	337
				338	static struct file_system_type sock_fs_type = {
				339	.name = "sockfs",
				340	.get_sb = sockfs_get_sb,
				341	.kill_sb = kill_anon_super,
				342	};
				343	static int sockfs_delete_dentry(struct dentry *dentry)
				344	{
				345	return 1;
				346	}
				347	static struct dentry_operations sockfs_dentry_operations = {
				348	.d_delete = sockfs_delete_dentry,
				349	};
				350
				351	/*
				352	* Obtains the first available file descriptor and sets it up for use.
				353	*
				354	* This function creates file structure and maps it to fd space
				355	* of current process. On success it returns file descriptor
				356	* and file struct implicitly stored in sock->file.
				357	* Note that another thread may close file descriptor before we return
				358	* from this function. We use the fact that now we do not refer
				359	* to socket after mapping. If one day we will need it, this
				360	* function will increment ref. count on file by 1.
				361	*
				362	* In any case returned fd MAY BE not valid!
				363	* This race condition is unavoidable
				364	* with shared fd spaces, we cannot solve it inside kernel,
				365	* but we take care of internal coherence yet.
				366	*/
				367
				368	int sock_map_fd(struct socket *sock)
				369	{
				370	int fd;
				371	struct qstr this;
				372	char name[32];
				373
				374	/*
				375	* Find a file descriptor suitable for return to the user.
				376	*/
				377
				378	fd = get_unused_fd();
				379	if (fd >= 0) {
				380	struct file *file = get_empty_filp();
				381
				382	if (!file) {
				383	put_unused_fd(fd);
				384	fd = -ENFILE;
				385	goto out;
				386	}
				387
Eric Dumazet	f31f5f0	2005-06-22 14:32:51 -0700	[diff] [blame]	388	this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	389	this.name = name;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	390	this.hash = SOCK_INODE(sock)->i_ino;
				391
				392	file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
				393	if (!file->f_dentry) {
				394	put_filp(file);
				395	put_unused_fd(fd);
				396	fd = -ENOMEM;
				397	goto out;
				398	}
				399	file->f_dentry->d_op = &sockfs_dentry_operations;
				400	d_add(file->f_dentry, SOCK_INODE(sock));
				401	file->f_vfsmnt = mntget(sock_mnt);
				402	file->f_mapping = file->f_dentry->d_inode->i_mapping;
				403
				404	sock->file = file;
				405	file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
				406	file->f_mode = FMODE_READ \| FMODE_WRITE;
				407	file->f_flags = O_RDWR;
				408	file->f_pos = 0;
Benjamin LaHaise	07dc3f0	2005-08-10 14:16:04 -0700	[diff] [blame]	409	file->private_data = sock;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	410	fd_install(fd, file);
				411	}
				412
				413	out:
				414	return fd;
				415	}
				416
				417	/**
				418	* sockfd_lookup - Go from a file number to its socket slot
				419	* @fd: file handle
				420	* @err: pointer to an error code return
				421	*
				422	* The file handle passed in is locked and the socket it is bound
				423	* too is returned. If an error occurs the err pointer is overwritten
				424	* with a negative errno code and NULL is returned. The function checks
				425	* for both invalid handles and passing a handle which is not a socket.
				426	*
				427	* On a success the socket object pointer is returned.
				428	*/
				429
				430	struct socket sockfd_lookup(int fd, int err)
				431	{
				432	struct file *file;
				433	struct inode *inode;
				434	struct socket *sock;
				435
				436	if (!(file = fget(fd)))
				437	{
				438	*err = -EBADF;
				439	return NULL;
				440	}
				441
Benjamin LaHaise	07dc3f0	2005-08-10 14:16:04 -0700	[diff] [blame]	442	if (file->f_op == &socket_file_ops)
				443	return file->private_data; /* set in sock_map_fd */
				444
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	445	inode = file->f_dentry->d_inode;
				446	if (!S_ISSOCK(inode->i_mode)) {
				447	*err = -ENOTSOCK;
				448	fput(file);
				449	return NULL;
				450	}
				451
				452	sock = SOCKET_I(inode);
				453	if (sock->file != file) {
				454	printk(KERN_ERR "socki_lookup: socket file changed!\n");
				455	sock->file = file;
				456	}
				457	return sock;
				458	}
				459
				460	/**
				461	* sock_alloc - allocate a socket
				462	*
				463	* Allocate a new inode and socket object. The two are bound together
				464	* and initialised. The socket is then returned. If we are out of inodes
				465	* NULL is returned.
				466	*/
				467
				468	static struct socket *sock_alloc(void)
				469	{
				470	struct inode * inode;
				471	struct socket * sock;
				472
				473	inode = new_inode(sock_mnt->mnt_sb);
				474	if (!inode)
				475	return NULL;
				476
				477	sock = SOCKET_I(inode);
				478
				479	inode->i_mode = S_IFSOCK\|S_IRWXUGO;
				480	inode->i_uid = current->fsuid;
				481	inode->i_gid = current->fsgid;
				482
				483	get_cpu_var(sockets_in_use)++;
				484	put_cpu_var(sockets_in_use);
				485	return sock;
				486	}
				487
				488	/*
				489	* In theory you can't get an open on this inode, but /proc provides
				490	* a back door. Remember to keep it shut otherwise you'll let the
				491	* creepy crawlies in.
				492	*/
				493
				494	static int sock_no_open(struct inode irrelevant, struct file dontcare)
				495	{
				496	return -ENXIO;
				497	}
				498
				499	struct file_operations bad_sock_fops = {
				500	.owner = THIS_MODULE,
				501	.open = sock_no_open,
				502	};
				503
				504	/**
				505	* sock_release - close a socket
				506	* @sock: socket to close
				507	*
				508	* The socket is released from the protocol stack if it has a release
				509	* callback, and the inode is then released if the socket is bound to
				510	* an inode not a file.
				511	*/
				512
				513	void sock_release(struct socket *sock)
				514	{
				515	if (sock->ops) {
				516	struct module *owner = sock->ops->owner;
				517
				518	sock->ops->release(sock);
				519	sock->ops = NULL;
				520	module_put(owner);
				521	}
				522
				523	if (sock->fasync_list)
				524	printk(KERN_ERR "sock_release: fasync list not empty!\n");
				525
				526	get_cpu_var(sockets_in_use)--;
				527	put_cpu_var(sockets_in_use);
				528	if (!sock->file) {
				529	iput(SOCK_INODE(sock));
				530	return;
				531	}
				532	sock->file=NULL;
				533	}
				534
				535	static inline int __sock_sendmsg(struct kiocb iocb, struct socket sock,
				536	struct msghdr *msg, size_t size)
				537	{
				538	struct sock_iocb *si = kiocb_to_siocb(iocb);
				539	int err;
				540
				541	si->sock = sock;
				542	si->scm = NULL;
				543	si->msg = msg;
				544	si->size = size;
				545
				546	err = security_socket_sendmsg(sock, msg, size);
				547	if (err)
				548	return err;
				549
				550	return sock->ops->sendmsg(iocb, sock, msg, size);
				551	}
				552
				553	int sock_sendmsg(struct socket sock, struct msghdr msg, size_t size)
				554	{
				555	struct kiocb iocb;
				556	struct sock_iocb siocb;
				557	int ret;
				558
				559	init_sync_kiocb(&iocb, NULL);
				560	iocb.private = &siocb;
				561	ret = __sock_sendmsg(&iocb, sock, msg, size);
				562	if (-EIOCBQUEUED == ret)
				563	ret = wait_on_sync_kiocb(&iocb);
				564	return ret;
				565	}
				566
				567	int kernel_sendmsg(struct socket sock, struct msghdr msg,
				568	struct kvec *vec, size_t num, size_t size)
				569	{
				570	mm_segment_t oldfs = get_fs();
				571	int result;
				572
				573	set_fs(KERNEL_DS);
				574	/*
				575	* the following is safe, since for compiler definitions of kvec and
				576	* iovec are identical, yielding the same in-core layout and alignment
				577	*/
				578	msg->msg_iov = (struct iovec *)vec,
				579	msg->msg_iovlen = num;
				580	result = sock_sendmsg(sock, msg, size);
				581	set_fs(oldfs);
				582	return result;
				583	}
				584
				585	static inline int __sock_recvmsg(struct kiocb iocb, struct socket sock,
				586	struct msghdr *msg, size_t size, int flags)
				587	{
				588	int err;
				589	struct sock_iocb *si = kiocb_to_siocb(iocb);
				590
				591	si->sock = sock;
				592	si->scm = NULL;
				593	si->msg = msg;
				594	si->size = size;
				595	si->flags = flags;
				596
				597	err = security_socket_recvmsg(sock, msg, size, flags);
				598	if (err)
				599	return err;
				600
				601	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
				602	}
				603
				604	int sock_recvmsg(struct socket sock, struct msghdr msg,
				605	size_t size, int flags)
				606	{
				607	struct kiocb iocb;
				608	struct sock_iocb siocb;
				609	int ret;
				610
				611	init_sync_kiocb(&iocb, NULL);
				612	iocb.private = &siocb;
				613	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
				614	if (-EIOCBQUEUED == ret)
				615	ret = wait_on_sync_kiocb(&iocb);
				616	return ret;
				617	}
				618
				619	int kernel_recvmsg(struct socket sock, struct msghdr msg,
				620	struct kvec *vec, size_t num,
				621	size_t size, int flags)
				622	{
				623	mm_segment_t oldfs = get_fs();
				624	int result;
				625
				626	set_fs(KERNEL_DS);
				627	/*
				628	* the following is safe, since for compiler definitions of kvec and
				629	* iovec are identical, yielding the same in-core layout and alignment
				630	*/
				631	msg->msg_iov = (struct iovec *)vec,
				632	msg->msg_iovlen = num;
				633	result = sock_recvmsg(sock, msg, size, flags);
				634	set_fs(oldfs);
				635	return result;
				636	}
				637
				638	static void sock_aio_dtor(struct kiocb *iocb)
				639	{
				640	kfree(iocb->private);
				641	}
				642
				643	/*
				644	* Read data from a socket. ubuf is a user mode pointer. We make sure the user
				645	* area ubuf...ubuf+size-1 is writable before asking the protocol.
				646	*/
				647
				648	static ssize_t sock_aio_read(struct kiocb iocb, char __user ubuf,
				649	size_t size, loff_t pos)
				650	{
				651	struct sock_iocb *x, siocb;
				652	struct socket *sock;
				653	int flags;
				654
				655	if (pos != 0)
				656	return -ESPIPE;
				657	if (size==0) /* Match SYS5 behaviour */
				658	return 0;
				659
				660	if (is_sync_kiocb(iocb))
				661	x = &siocb;
				662	else {
				663	x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
				664	if (!x)
				665	return -ENOMEM;
				666	iocb->ki_dtor = sock_aio_dtor;
				667	}
				668	iocb->private = x;
				669	x->kiocb = iocb;
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	670	sock = iocb->ki_filp->private_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	671
				672	x->async_msg.msg_name = NULL;
				673	x->async_msg.msg_namelen = 0;
				674	x->async_msg.msg_iov = &x->async_iov;
				675	x->async_msg.msg_iovlen = 1;
				676	x->async_msg.msg_control = NULL;
				677	x->async_msg.msg_controllen = 0;
				678	x->async_iov.iov_base = ubuf;
				679	x->async_iov.iov_len = size;
				680	flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
				681
				682	return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags);
				683	}
				684
				685
				686	/*
				687	* Write data to a socket. We verify that the user area ubuf..ubuf+size-1
				688	* is readable by the user process.
				689	*/
				690
				691	static ssize_t sock_aio_write(struct kiocb iocb, const char __user ubuf,
				692	size_t size, loff_t pos)
				693	{
				694	struct sock_iocb *x, siocb;
				695	struct socket *sock;
				696
				697	if (pos != 0)
				698	return -ESPIPE;
				699	if(size==0) /* Match SYS5 behaviour */
				700	return 0;
				701
				702	if (is_sync_kiocb(iocb))
				703	x = &siocb;
				704	else {
				705	x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
				706	if (!x)
				707	return -ENOMEM;
				708	iocb->ki_dtor = sock_aio_dtor;
				709	}
				710	iocb->private = x;
				711	x->kiocb = iocb;
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	712	sock = iocb->ki_filp->private_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	713
				714	x->async_msg.msg_name = NULL;
				715	x->async_msg.msg_namelen = 0;
				716	x->async_msg.msg_iov = &x->async_iov;
				717	x->async_msg.msg_iovlen = 1;
				718	x->async_msg.msg_control = NULL;
				719	x->async_msg.msg_controllen = 0;
				720	x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
				721	if (sock->type == SOCK_SEQPACKET)
				722	x->async_msg.msg_flags \|= MSG_EOR;
				723	x->async_iov.iov_base = (void __user *)ubuf;
				724	x->async_iov.iov_len = size;
				725
				726	return __sock_sendmsg(iocb, sock, &x->async_msg, size);
				727	}
				728
Arnaldo Carvalho de Melo	2038073	2005-08-16 02:18:02 -0300	[diff] [blame]	729	static ssize_t sock_sendpage(struct file file, struct page page,
				730	int offset, size_t size, loff_t *ppos, int more)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	731	{
				732	struct socket *sock;
				733	int flags;
				734
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	735	sock = file->private_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	736
				737	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
				738	if (more)
				739	flags \|= MSG_MORE;
				740
				741	return sock->ops->sendpage(sock, page, offset, size, flags);
				742	}
				743
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	744	static int sock_readv_writev(int type,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	745	struct file * file, const struct iovec * iov,
				746	long count, size_t size)
				747	{
				748	struct msghdr msg;
				749	struct socket *sock;
				750
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	751	sock = file->private_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	752
				753	msg.msg_name = NULL;
				754	msg.msg_namelen = 0;
				755	msg.msg_control = NULL;
				756	msg.msg_controllen = 0;
				757	msg.msg_iov = (struct iovec *) iov;
				758	msg.msg_iovlen = count;
				759	msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
				760
				761	/* read() does a VERIFY_WRITE */
				762	if (type == VERIFY_WRITE)
				763	return sock_recvmsg(sock, &msg, size, msg.msg_flags);
				764
				765	if (sock->type == SOCK_SEQPACKET)
				766	msg.msg_flags \|= MSG_EOR;
				767
				768	return sock_sendmsg(sock, &msg, size);
				769	}
				770
				771	static ssize_t sock_readv(struct file file, const struct iovec vector,
				772	unsigned long count, loff_t *ppos)
				773	{
				774	size_t tot_len = 0;
				775	int i;
				776	for (i = 0 ; i < count ; i++)
				777	tot_len += vector[i].iov_len;
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	778	return sock_readv_writev(VERIFY_WRITE,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	779	file, vector, count, tot_len);
				780	}
				781
				782	static ssize_t sock_writev(struct file file, const struct iovec vector,
				783	unsigned long count, loff_t *ppos)
				784	{
				785	size_t tot_len = 0;
				786	int i;
				787	for (i = 0 ; i < count ; i++)
				788	tot_len += vector[i].iov_len;
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	789	return sock_readv_writev(VERIFY_READ,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	790	file, vector, count, tot_len);
				791	}
				792
				793
				794	/*
				795	* Atomic setting of ioctl hooks to avoid race
				796	* with module unload.
				797	*/
				798
				799	static DECLARE_MUTEX(br_ioctl_mutex);
				800	static int (br_ioctl_hook)(unsigned int cmd, void __user arg) = NULL;
				801
				802	void brioctl_set(int (hook)(unsigned int, void __user ))
				803	{
				804	down(&br_ioctl_mutex);
				805	br_ioctl_hook = hook;
				806	up(&br_ioctl_mutex);
				807	}
				808	EXPORT_SYMBOL(brioctl_set);
				809
				810	static DECLARE_MUTEX(vlan_ioctl_mutex);
				811	static int (vlan_ioctl_hook)(void __user arg);
				812
				813	void vlan_ioctl_set(int (hook)(void __user ))
				814	{
				815	down(&vlan_ioctl_mutex);
				816	vlan_ioctl_hook = hook;
				817	up(&vlan_ioctl_mutex);
				818	}
				819	EXPORT_SYMBOL(vlan_ioctl_set);
				820
				821	static DECLARE_MUTEX(dlci_ioctl_mutex);
				822	static int (dlci_ioctl_hook)(unsigned int, void __user );
				823
				824	void dlci_ioctl_set(int (hook)(unsigned int, void __user ))
				825	{
				826	down(&dlci_ioctl_mutex);
				827	dlci_ioctl_hook = hook;
				828	up(&dlci_ioctl_mutex);
				829	}
				830	EXPORT_SYMBOL(dlci_ioctl_set);
				831
				832	/*
				833	* With an ioctl, arg may well be a user mode pointer, but we don't know
				834	* what to do with it - that's up to the protocol still.
				835	*/
				836
				837	static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
				838	{
				839	struct socket *sock;
				840	void __user argp = (void __user )arg;
				841	int pid, err;
				842
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	843	sock = file->private_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	844	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
				845	err = dev_ioctl(cmd, argp);
				846	} else
				847	#ifdef WIRELESS_EXT
				848	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
				849	err = dev_ioctl(cmd, argp);
				850	} else
				851	#endif /* WIRELESS_EXT */
				852	switch (cmd) {
				853	case FIOSETOWN:
				854	case SIOCSPGRP:
				855	err = -EFAULT;
				856	if (get_user(pid, (int __user *)argp))
				857	break;
				858	err = f_setown(sock->file, pid, 1);
				859	break;
				860	case FIOGETOWN:
				861	case SIOCGPGRP:
				862	err = put_user(sock->file->f_owner.pid, (int __user *)argp);
				863	break;
				864	case SIOCGIFBR:
				865	case SIOCSIFBR:
				866	case SIOCBRADDBR:
				867	case SIOCBRDELBR:
				868	err = -ENOPKG;
				869	if (!br_ioctl_hook)
				870	request_module("bridge");
				871
				872	down(&br_ioctl_mutex);
				873	if (br_ioctl_hook)
				874	err = br_ioctl_hook(cmd, argp);
				875	up(&br_ioctl_mutex);
				876	break;
				877	case SIOCGIFVLAN:
				878	case SIOCSIFVLAN:
				879	err = -ENOPKG;
				880	if (!vlan_ioctl_hook)
				881	request_module("8021q");
				882
				883	down(&vlan_ioctl_mutex);
				884	if (vlan_ioctl_hook)
				885	err = vlan_ioctl_hook(argp);
				886	up(&vlan_ioctl_mutex);
				887	break;
				888	case SIOCGIFDIVERT:
				889	case SIOCSIFDIVERT:
				890	/* Convert this to call through a hook */
				891	err = divert_ioctl(cmd, argp);
				892	break;
				893	case SIOCADDDLCI:
				894	case SIOCDELDLCI:
				895	err = -ENOPKG;
				896	if (!dlci_ioctl_hook)
				897	request_module("dlci");
				898
				899	if (dlci_ioctl_hook) {
				900	down(&dlci_ioctl_mutex);
				901	err = dlci_ioctl_hook(cmd, argp);
				902	up(&dlci_ioctl_mutex);
				903	}
				904	break;
				905	default:
				906	err = sock->ops->ioctl(sock, cmd, arg);
				907	break;
				908	}
				909	return err;
				910	}
				911
				912	int sock_create_lite(int family, int type, int protocol, struct socket **res)
				913	{
				914	int err;
				915	struct socket *sock = NULL;
				916
				917	err = security_socket_create(family, type, protocol, 1);
				918	if (err)
				919	goto out;
				920
				921	sock = sock_alloc();
				922	if (!sock) {
				923	err = -ENOMEM;
				924	goto out;
				925	}
				926
				927	security_socket_post_create(sock, family, type, protocol, 1);
				928	sock->type = type;
				929	out:
				930	*res = sock;
				931	return err;
				932	}
				933
				934	/* No kernel lock held - perfect */
				935	static unsigned int sock_poll(struct file file, poll_table wait)
				936	{
				937	struct socket *sock;
				938
				939	/*
				940	* We can't return errors to poll, so it's either yes or no.
				941	*/
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	942	sock = file->private_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	943	return sock->ops->poll(file, sock, wait);
				944	}
				945
				946	static int sock_mmap(struct file * file, struct vm_area_struct * vma)
				947	{
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	948	struct socket *sock = file->private_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	949
				950	return sock->ops->mmap(file, sock, vma);
				951	}
				952
Arnaldo Carvalho de Melo	2038073	2005-08-16 02:18:02 -0300	[diff] [blame]	953	static int sock_close(struct inode inode, struct file filp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	954	{
				955	/*
				956	* It was possible the inode is NULL we were
				957	* closing an unfinished socket.
				958	*/
				959
				960	if (!inode)
				961	{
				962	printk(KERN_DEBUG "sock_close: NULL inode\n");
				963	return 0;
				964	}
				965	sock_fasync(-1, filp, 0);
				966	sock_release(SOCKET_I(inode));
				967	return 0;
				968	}
				969
				970	/*
				971	* Update the socket async list
				972	*
				973	* Fasync_list locking strategy.
				974	*
				975	* 1. fasync_list is modified only under process context socket lock
				976	* i.e. under semaphore.
				977	* 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
				978	* or under socket lock.
				979	* 3. fasync_list can be used from softirq context, so that
				980	* modification under socket lock have to be enhanced with
				981	* write_lock_bh(&sk->sk_callback_lock).
				982	* --ANK (990710)
				983	*/
				984
				985	static int sock_fasync(int fd, struct file *filp, int on)
				986	{
				987	struct fasync_struct fa, fna=NULL, **prev;
				988	struct socket *sock;
				989	struct sock *sk;
				990
				991	if (on)
				992	{
				993	fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
				994	if(fna==NULL)
				995	return -ENOMEM;
				996	}
				997
Eric Dumazet	b69aee0	2005-09-06 14:42:45 -0700	[diff] [blame^]	998	sock = filp->private_data;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	999
				1000	if ((sk=sock->sk) == NULL) {
				1001	kfree(fna);
				1002	return -EINVAL;
				1003	}
				1004
				1005	lock_sock(sk);
				1006
				1007	prev=&(sock->fasync_list);
				1008
				1009	for (fa=prev; fa!=NULL; prev=&fa->fa_next,fa=prev)
				1010	if (fa->fa_file==filp)
				1011	break;
				1012
				1013	if(on)
				1014	{
				1015	if(fa!=NULL)
				1016	{
				1017	write_lock_bh(&sk->sk_callback_lock);
				1018	fa->fa_fd=fd;
				1019	write_unlock_bh(&sk->sk_callback_lock);
				1020
				1021	kfree(fna);
				1022	goto out;
				1023	}
				1024	fna->fa_file=filp;
				1025	fna->fa_fd=fd;
				1026	fna->magic=FASYNC_MAGIC;
				1027	fna->fa_next=sock->fasync_list;
				1028	write_lock_bh(&sk->sk_callback_lock);
				1029	sock->fasync_list=fna;
				1030	write_unlock_bh(&sk->sk_callback_lock);
				1031	}
				1032	else
				1033	{
				1034	if (fa!=NULL)
				1035	{
				1036	write_lock_bh(&sk->sk_callback_lock);
				1037	*prev=fa->fa_next;
				1038	write_unlock_bh(&sk->sk_callback_lock);
				1039	kfree(fa);
				1040	}
				1041	}
				1042
				1043	out:
				1044	release_sock(sock->sk);
				1045	return 0;
				1046	}
				1047
				1048	/* This function may be called only under socket lock or callback_lock */
				1049
				1050	int sock_wake_async(struct socket *sock, int how, int band)
				1051	{
				1052	if (!sock \|\| !sock->fasync_list)
				1053	return -1;
				1054	switch (how)
				1055	{
				1056	case 1:
				1057
				1058	if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
				1059	break;
				1060	goto call_kill;
				1061	case 2:
				1062	if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
				1063	break;
				1064	/* fall through */
				1065	case 0:
				1066	call_kill:
				1067	__kill_fasync(sock->fasync_list, SIGIO, band);
				1068	break;
				1069	case 3:
				1070	__kill_fasync(sock->fasync_list, SIGURG, band);
				1071	}
				1072	return 0;
				1073	}
				1074
				1075	static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
				1076	{
				1077	int err;
				1078	struct socket *sock;
				1079
				1080	/*
				1081	* Check protocol is in range
				1082	*/
				1083	if (family < 0 \|\| family >= NPROTO)
				1084	return -EAFNOSUPPORT;
				1085	if (type < 0 \|\| type >= SOCK_MAX)
				1086	return -EINVAL;
				1087
				1088	/* Compatibility.
				1089
				1090	This uglymoron is moved from INET layer to here to avoid
				1091	deadlock in module load.
				1092	*/
				1093	if (family == PF_INET && type == SOCK_PACKET) {
				1094	static int warned;
				1095	if (!warned) {
				1096	warned = 1;
				1097	printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
				1098	}
				1099	family = PF_PACKET;
				1100	}
				1101
				1102	err = security_socket_create(family, type, protocol, kern);
				1103	if (err)
				1104	return err;
				1105
				1106	#if defined(CONFIG_KMOD)
				1107	/* Attempt to load a protocol module if the find failed.
				1108	*
				1109	* 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
				1110	* requested real, full-featured networking support upon configuration.
				1111	* Otherwise module support will break!
				1112	*/
				1113	if (net_families[family]==NULL)
				1114	{
				1115	request_module("net-pf-%d",family);
				1116	}
				1117	#endif
				1118
				1119	net_family_read_lock();
				1120	if (net_families[family] == NULL) {
				1121	err = -EAFNOSUPPORT;
				1122	goto out;
				1123	}
				1124
				1125	/*
				1126	* Allocate the socket and allow the family to set things up. if
				1127	* the protocol is 0, the family is instructed to select an appropriate
				1128	* default.
				1129	*/
				1130
				1131	if (!(sock = sock_alloc())) {
				1132	printk(KERN_WARNING "socket: no more sockets\n");
				1133	err = -ENFILE; /* Not exactly a match, but its the
				1134	closest posix thing */
				1135	goto out;
				1136	}
				1137
				1138	sock->type = type;
				1139
				1140	/*
				1141	* We will call the ->create function, that possibly is in a loadable
				1142	* module, so we have to bump that loadable module refcnt first.
				1143	*/
				1144	err = -EAFNOSUPPORT;
				1145	if (!try_module_get(net_families[family]->owner))
				1146	goto out_release;
				1147
				1148	if ((err = net_families[family]->create(sock, protocol)) < 0)
				1149	goto out_module_put;
				1150	/*
				1151	* Now to bump the refcnt of the [loadable] module that owns this
				1152	* socket at sock_release time we decrement its refcnt.
				1153	*/
				1154	if (!try_module_get(sock->ops->owner)) {
				1155	sock->ops = NULL;
				1156	goto out_module_put;
				1157	}
				1158	/*
				1159	* Now that we're done with the ->create function, the [loadable]
				1160	* module can have its refcnt decremented
				1161	*/
				1162	module_put(net_families[family]->owner);
				1163	*res = sock;
				1164	security_socket_post_create(sock, family, type, protocol, kern);
				1165
				1166	out:
				1167	net_family_read_unlock();
				1168	return err;
				1169	out_module_put:
				1170	module_put(net_families[family]->owner);
				1171	out_release:
				1172	sock_release(sock);
				1173	goto out;
				1174	}
				1175
				1176	int sock_create(int family, int type, int protocol, struct socket **res)
				1177	{
				1178	return __sock_create(family, type, protocol, res, 0);
				1179	}
				1180
				1181	int sock_create_kern(int family, int type, int protocol, struct socket **res)
				1182	{
				1183	return __sock_create(family, type, protocol, res, 1);
				1184	}
				1185
				1186	asmlinkage long sys_socket(int family, int type, int protocol)
				1187	{
				1188	int retval;
				1189	struct socket *sock;
				1190
				1191	retval = sock_create(family, type, protocol, &sock);
				1192	if (retval < 0)
				1193	goto out;
				1194
				1195	retval = sock_map_fd(sock);
				1196	if (retval < 0)
				1197	goto out_release;
				1198
				1199	out:
				1200	/* It may be already another descriptor 8) Not kernel problem. */
				1201	return retval;
				1202
				1203	out_release:
				1204	sock_release(sock);
				1205	return retval;
				1206	}
				1207
				1208	/*
				1209	* Create a pair of connected sockets.
				1210	*/
				1211
				1212	asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec)
				1213	{
				1214	struct socket sock1, sock2;
				1215	int fd1, fd2, err;
				1216
				1217	/*
				1218	* Obtain the first socket and check if the underlying protocol
				1219	* supports the socketpair call.
				1220	*/
				1221
				1222	err = sock_create(family, type, protocol, &sock1);
				1223	if (err < 0)
				1224	goto out;
				1225
				1226	err = sock_create(family, type, protocol, &sock2);
				1227	if (err < 0)
				1228	goto out_release_1;
				1229
				1230	err = sock1->ops->socketpair(sock1, sock2);
				1231	if (err < 0)
				1232	goto out_release_both;
				1233
				1234	fd1 = fd2 = -1;
				1235
				1236	err = sock_map_fd(sock1);
				1237	if (err < 0)
				1238	goto out_release_both;
				1239	fd1 = err;
				1240
				1241	err = sock_map_fd(sock2);
				1242	if (err < 0)
				1243	goto out_close_1;
				1244	fd2 = err;
				1245
				1246	/* fd1 and fd2 may be already another descriptors.
				1247	* Not kernel problem.
				1248	*/
				1249
				1250	err = put_user(fd1, &usockvec[0]);
				1251	if (!err)
				1252	err = put_user(fd2, &usockvec[1]);
				1253	if (!err)
				1254	return 0;
				1255
				1256	sys_close(fd2);
				1257	sys_close(fd1);
				1258	return err;
				1259
				1260	out_close_1:
				1261	sock_release(sock2);
				1262	sys_close(fd1);
				1263	return err;
				1264
				1265	out_release_both:
				1266	sock_release(sock2);
				1267	out_release_1:
				1268	sock_release(sock1);
				1269	out:
				1270	return err;
				1271	}
				1272
				1273
				1274	/*
				1275	* Bind a name to a socket. Nothing much to do here since it's
				1276	* the protocol's responsibility to handle the local address.
				1277	*
				1278	* We move the socket address to kernel space before we call
				1279	* the protocol layer (having also checked the address is ok).
				1280	*/
				1281
				1282	asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
				1283	{
				1284	struct socket *sock;
				1285	char address[MAX_SOCK_ADDR];
				1286	int err;
				1287
				1288	if((sock = sockfd_lookup(fd,&err))!=NULL)
				1289	{
				1290	if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
				1291	err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
				1292	if (err) {
				1293	sockfd_put(sock);
				1294	return err;
				1295	}
				1296	err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
				1297	}
				1298	sockfd_put(sock);
				1299	}
				1300	return err;
				1301	}
				1302
				1303
				1304	/*
				1305	* Perform a listen. Basically, we allow the protocol to do anything
				1306	* necessary for a listen, and if that works, we mark the socket as
				1307	* ready for listening.
				1308	*/
				1309
				1310	int sysctl_somaxconn = SOMAXCONN;
				1311
				1312	asmlinkage long sys_listen(int fd, int backlog)
				1313	{
				1314	struct socket *sock;
				1315	int err;
				1316
				1317	if ((sock = sockfd_lookup(fd, &err)) != NULL) {
				1318	if ((unsigned) backlog > sysctl_somaxconn)
				1319	backlog = sysctl_somaxconn;
				1320
				1321	err = security_socket_listen(sock, backlog);
				1322	if (err) {
				1323	sockfd_put(sock);
				1324	return err;
				1325	}
				1326
				1327	err=sock->ops->listen(sock, backlog);
				1328	sockfd_put(sock);
				1329	}
				1330	return err;
				1331	}
				1332
				1333
				1334	/*
				1335	* For accept, we attempt to create a new socket, set up the link
				1336	* with the client, wake up the client, then return the new
				1337	* connected fd. We collect the address of the connector in kernel
				1338	* space and move it to user at the very end. This is unclean because
				1339	* we open the socket then return an error.
				1340	*
				1341	* 1003.1g adds the ability to recvmsg() to query connection pending
				1342	* status to recvmsg. We need to add that support in a way thats
				1343	* clean when we restucture accept also.
				1344	*/
				1345
				1346	asmlinkage long sys_accept(int fd, struct sockaddr __user upeer_sockaddr, int __user upeer_addrlen)
				1347	{
				1348	struct socket sock, newsock;
				1349	int err, len;
				1350	char address[MAX_SOCK_ADDR];
				1351
				1352	sock = sockfd_lookup(fd, &err);
				1353	if (!sock)
				1354	goto out;
				1355
				1356	err = -ENFILE;
				1357	if (!(newsock = sock_alloc()))
				1358	goto out_put;
				1359
				1360	newsock->type = sock->type;
				1361	newsock->ops = sock->ops;
				1362
				1363	err = security_socket_accept(sock, newsock);
				1364	if (err)
				1365	goto out_release;
				1366
				1367	/*
				1368	* We don't need try_module_get here, as the listening socket (sock)
				1369	* has the protocol module (sock->ops->owner) held.
				1370	*/
				1371	__module_get(newsock->ops->owner);
				1372
				1373	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
				1374	if (err < 0)
				1375	goto out_release;
				1376
				1377	if (upeer_sockaddr) {
				1378	if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
				1379	err = -ECONNABORTED;
				1380	goto out_release;
				1381	}
				1382	err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
				1383	if (err < 0)
				1384	goto out_release;
				1385	}
				1386
				1387	/* File flags are not inherited via accept() unlike another OSes. */
				1388
				1389	if ((err = sock_map_fd(newsock)) < 0)
				1390	goto out_release;
				1391
				1392	security_socket_post_accept(sock, newsock);
				1393
				1394	out_put:
				1395	sockfd_put(sock);
				1396	out:
				1397	return err;
				1398	out_release:
				1399	sock_release(newsock);
				1400	goto out_put;
				1401	}
				1402
				1403
				1404	/*
				1405	* Attempt to connect to a socket with the server address. The address
				1406	* is in user space so we verify it is OK and move it to kernel space.
				1407	*
				1408	* For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
				1409	* break bindings
				1410	*
				1411	* NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
				1412	* other SEQPACKET protocols that take time to connect() as it doesn't
				1413	* include the -EINPROGRESS status for such sockets.
				1414	*/
				1415
				1416	asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
				1417	{
				1418	struct socket *sock;
				1419	char address[MAX_SOCK_ADDR];
				1420	int err;
				1421
				1422	sock = sockfd_lookup(fd, &err);
				1423	if (!sock)
				1424	goto out;
				1425	err = move_addr_to_kernel(uservaddr, addrlen, address);
				1426	if (err < 0)
				1427	goto out_put;
				1428
				1429	err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
				1430	if (err)
				1431	goto out_put;
				1432
				1433	err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
				1434	sock->file->f_flags);
				1435	out_put:
				1436	sockfd_put(sock);
				1437	out:
				1438	return err;
				1439	}
				1440
				1441	/*
				1442	* Get the local address ('name') of a socket object. Move the obtained
				1443	* name to user space.
				1444	*/
				1445
				1446	asmlinkage long sys_getsockname(int fd, struct sockaddr __user usockaddr, int __user usockaddr_len)
				1447	{
				1448	struct socket *sock;
				1449	char address[MAX_SOCK_ADDR];
				1450	int len, err;
				1451
				1452	sock = sockfd_lookup(fd, &err);
				1453	if (!sock)
				1454	goto out;
				1455
				1456	err = security_socket_getsockname(sock);
				1457	if (err)
				1458	goto out_put;
				1459
				1460	err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
				1461	if (err)
				1462	goto out_put;
				1463	err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
				1464
				1465	out_put:
				1466	sockfd_put(sock);
				1467	out:
				1468	return err;
				1469	}
				1470
				1471	/*
				1472	* Get the remote address ('name') of a socket object. Move the obtained
				1473	* name to user space.
				1474	*/
				1475
				1476	asmlinkage long sys_getpeername(int fd, struct sockaddr __user usockaddr, int __user usockaddr_len)
				1477	{
				1478	struct socket *sock;
				1479	char address[MAX_SOCK_ADDR];
				1480	int len, err;
				1481
				1482	if ((sock = sockfd_lookup(fd, &err))!=NULL)
				1483	{
				1484	err = security_socket_getpeername(sock);
				1485	if (err) {
				1486	sockfd_put(sock);
				1487	return err;
				1488	}
				1489
				1490	err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
				1491	if (!err)
				1492	err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
				1493	sockfd_put(sock);
				1494	}
				1495	return err;
				1496	}
				1497
				1498	/*
				1499	* Send a datagram to a given address. We move the address into kernel
				1500	* space and check the user space data area is readable before invoking
				1501	* the protocol.
				1502	*/
				1503
				1504	asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags,
				1505	struct sockaddr __user *addr, int addr_len)
				1506	{
				1507	struct socket *sock;
				1508	char address[MAX_SOCK_ADDR];
				1509	int err;
				1510	struct msghdr msg;
				1511	struct iovec iov;
				1512
				1513	sock = sockfd_lookup(fd, &err);
				1514	if (!sock)
				1515	goto out;
				1516	iov.iov_base=buff;
				1517	iov.iov_len=len;
				1518	msg.msg_name=NULL;
				1519	msg.msg_iov=&iov;
				1520	msg.msg_iovlen=1;
				1521	msg.msg_control=NULL;
				1522	msg.msg_controllen=0;
				1523	msg.msg_namelen=0;
				1524	if(addr)
				1525	{
				1526	err = move_addr_to_kernel(addr, addr_len, address);
				1527	if (err < 0)
				1528	goto out_put;
				1529	msg.msg_name=address;
				1530	msg.msg_namelen=addr_len;
				1531	}
				1532	if (sock->file->f_flags & O_NONBLOCK)
				1533	flags \|= MSG_DONTWAIT;
				1534	msg.msg_flags = flags;
				1535	err = sock_sendmsg(sock, &msg, len);
				1536
				1537	out_put:
				1538	sockfd_put(sock);
				1539	out:
				1540	return err;
				1541	}
				1542
				1543	/*
				1544	* Send a datagram down a socket.
				1545	*/
				1546
				1547	asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags)
				1548	{
				1549	return sys_sendto(fd, buff, len, flags, NULL, 0);
				1550	}
				1551
				1552	/*
				1553	* Receive a frame from the socket and optionally record the address of the
				1554	* sender. We verify the buffers are writable and if needed move the
				1555	* sender address from kernel to user space.
				1556	*/
				1557
				1558	asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags,
				1559	struct sockaddr __user addr, int __user addr_len)
				1560	{
				1561	struct socket *sock;
				1562	struct iovec iov;
				1563	struct msghdr msg;
				1564	char address[MAX_SOCK_ADDR];
				1565	int err,err2;
				1566
				1567	sock = sockfd_lookup(fd, &err);
				1568	if (!sock)
				1569	goto out;
				1570
				1571	msg.msg_control=NULL;
				1572	msg.msg_controllen=0;
				1573	msg.msg_iovlen=1;
				1574	msg.msg_iov=&iov;
				1575	iov.iov_len=size;
				1576	iov.iov_base=ubuf;
				1577	msg.msg_name=address;
				1578	msg.msg_namelen=MAX_SOCK_ADDR;
				1579	if (sock->file->f_flags & O_NONBLOCK)
				1580	flags \|= MSG_DONTWAIT;
				1581	err=sock_recvmsg(sock, &msg, size, flags);
				1582
				1583	if(err >= 0 && addr != NULL)
				1584	{
				1585	err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
				1586	if(err2<0)
				1587	err=err2;
				1588	}
				1589	sockfd_put(sock);
				1590	out:
				1591	return err;
				1592	}
				1593
				1594	/*
				1595	* Receive a datagram from a socket.
				1596	*/
				1597
				1598	asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags)
				1599	{
				1600	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
				1601	}
				1602
				1603	/*
				1604	* Set a socket option. Because we don't know the option lengths we have
				1605	* to pass the user mode parameter for the protocols to sort out.
				1606	*/
				1607
				1608	asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen)
				1609	{
				1610	int err;
				1611	struct socket *sock;
				1612
				1613	if (optlen < 0)
				1614	return -EINVAL;
				1615
				1616	if ((sock = sockfd_lookup(fd, &err))!=NULL)
				1617	{
				1618	err = security_socket_setsockopt(sock,level,optname);
				1619	if (err) {
				1620	sockfd_put(sock);
				1621	return err;
				1622	}
				1623
				1624	if (level == SOL_SOCKET)
				1625	err=sock_setsockopt(sock,level,optname,optval,optlen);
				1626	else
				1627	err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
				1628	sockfd_put(sock);
				1629	}
				1630	return err;
				1631	}
				1632
				1633	/*
				1634	* Get a socket option. Because we don't know the option lengths we have
				1635	* to pass a user mode parameter for the protocols to sort out.
				1636	*/
				1637
				1638	asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user optval, int __user optlen)
				1639	{
				1640	int err;
				1641	struct socket *sock;
				1642
				1643	if ((sock = sockfd_lookup(fd, &err))!=NULL)
				1644	{
				1645	err = security_socket_getsockopt(sock, level,
				1646	optname);
				1647	if (err) {
				1648	sockfd_put(sock);
				1649	return err;
				1650	}
				1651
				1652	if (level == SOL_SOCKET)
				1653	err=sock_getsockopt(sock,level,optname,optval,optlen);
				1654	else
				1655	err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
				1656	sockfd_put(sock);
				1657	}
				1658	return err;
				1659	}
				1660
				1661
				1662	/*
				1663	* Shutdown a socket.
				1664	*/
				1665
				1666	asmlinkage long sys_shutdown(int fd, int how)
				1667	{
				1668	int err;
				1669	struct socket *sock;
				1670
				1671	if ((sock = sockfd_lookup(fd, &err))!=NULL)
				1672	{
				1673	err = security_socket_shutdown(sock, how);
				1674	if (err) {
				1675	sockfd_put(sock);
				1676	return err;
				1677	}
				1678
				1679	err=sock->ops->shutdown(sock, how);
				1680	sockfd_put(sock);
				1681	}
				1682	return err;
				1683	}
				1684
				1685	/* A couple of helpful macros for getting the address of the 32/64 bit
				1686	* fields which are the same type (int / unsigned) on our platforms.
				1687	*/
				1688	#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
				1689	#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
				1690	#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
				1691
				1692
				1693	/*
				1694	* BSD sendmsg interface
				1695	*/
				1696
				1697	asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
				1698	{
				1699	struct compat_msghdr __user msg_compat = (struct compat_msghdr __user )msg;
				1700	struct socket *sock;
				1701	char address[MAX_SOCK_ADDR];
				1702	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
				1703	unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
				1704	unsigned char *ctl_buf = ctl;
				1705	struct msghdr msg_sys;
				1706	int err, ctl_len, iov_size, total_len;
				1707
				1708	err = -EFAULT;
				1709	if (MSG_CMSG_COMPAT & flags) {
				1710	if (get_compat_msghdr(&msg_sys, msg_compat))
				1711	return -EFAULT;
				1712	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
				1713	return -EFAULT;
				1714
				1715	sock = sockfd_lookup(fd, &err);
				1716	if (!sock)
				1717	goto out;
				1718
				1719	/* do not move before msg_sys is valid */
				1720	err = -EMSGSIZE;
				1721	if (msg_sys.msg_iovlen > UIO_MAXIOV)
				1722	goto out_put;
				1723
				1724	/* Check whether to allocate the iovec area*/
				1725	err = -ENOMEM;
				1726	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
				1727	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
				1728	iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
				1729	if (!iov)
				1730	goto out_put;
				1731	}
				1732
				1733	/* This will also move the address data into kernel space */
				1734	if (MSG_CMSG_COMPAT & flags) {
				1735	err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
				1736	} else
				1737	err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
				1738	if (err < 0)
				1739	goto out_freeiov;
				1740	total_len = err;
				1741
				1742	err = -ENOBUFS;
				1743
				1744	if (msg_sys.msg_controllen > INT_MAX)
				1745	goto out_freeiov;
				1746	ctl_len = msg_sys.msg_controllen;
				1747	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
				1748	err = cmsghdr_from_user_compat_to_kern(&msg_sys, ctl, sizeof(ctl));
				1749	if (err)
				1750	goto out_freeiov;
				1751	ctl_buf = msg_sys.msg_control;
				1752	} else if (ctl_len) {
				1753	if (ctl_len > sizeof(ctl))
				1754	{
				1755	ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
				1756	if (ctl_buf == NULL)
				1757	goto out_freeiov;
				1758	}
				1759	err = -EFAULT;
				1760	/*
				1761	* Careful! Before this, msg_sys.msg_control contains a user pointer.
				1762	* Afterwards, it will be a kernel pointer. Thus the compiler-assisted
				1763	* checking falls down on this.
				1764	*/
				1765	if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len))
				1766	goto out_freectl;
				1767	msg_sys.msg_control = ctl_buf;
				1768	}
				1769	msg_sys.msg_flags = flags;
				1770
				1771	if (sock->file->f_flags & O_NONBLOCK)
				1772	msg_sys.msg_flags \|= MSG_DONTWAIT;
				1773	err = sock_sendmsg(sock, &msg_sys, total_len);
				1774
				1775	out_freectl:
				1776	if (ctl_buf != ctl)
				1777	sock_kfree_s(sock->sk, ctl_buf, ctl_len);
				1778	out_freeiov:
				1779	if (iov != iovstack)
				1780	sock_kfree_s(sock->sk, iov, iov_size);
				1781	out_put:
				1782	sockfd_put(sock);
				1783	out:
				1784	return err;
				1785	}
				1786
				1787	/*
				1788	* BSD recvmsg interface
				1789	*/
				1790
				1791	asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags)
				1792	{
				1793	struct compat_msghdr __user msg_compat = (struct compat_msghdr __user )msg;
				1794	struct socket *sock;
				1795	struct iovec iovstack[UIO_FASTIOV];
				1796	struct iovec *iov=iovstack;
				1797	struct msghdr msg_sys;
				1798	unsigned long cmsg_ptr;
				1799	int err, iov_size, total_len, len;
				1800
				1801	/* kernel mode address */
				1802	char addr[MAX_SOCK_ADDR];
				1803
				1804	/* user mode address pointers */
				1805	struct sockaddr __user *uaddr;
				1806	int __user *uaddr_len;
				1807
				1808	if (MSG_CMSG_COMPAT & flags) {
				1809	if (get_compat_msghdr(&msg_sys, msg_compat))
				1810	return -EFAULT;
				1811	} else
				1812	if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
				1813	return -EFAULT;
				1814
				1815	sock = sockfd_lookup(fd, &err);
				1816	if (!sock)
				1817	goto out;
				1818
				1819	err = -EMSGSIZE;
				1820	if (msg_sys.msg_iovlen > UIO_MAXIOV)
				1821	goto out_put;
				1822
				1823	/* Check whether to allocate the iovec area*/
				1824	err = -ENOMEM;
				1825	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
				1826	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
				1827	iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
				1828	if (!iov)
				1829	goto out_put;
				1830	}
				1831
				1832	/*
				1833	* Save the user-mode address (verify_iovec will change the
				1834	* kernel msghdr to use the kernel address space)
				1835	*/
				1836
				1837	uaddr = (void __user *) msg_sys.msg_name;
				1838	uaddr_len = COMPAT_NAMELEN(msg);
				1839	if (MSG_CMSG_COMPAT & flags) {
				1840	err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
				1841	} else
				1842	err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
				1843	if (err < 0)
				1844	goto out_freeiov;
				1845	total_len=err;
				1846
				1847	cmsg_ptr = (unsigned long)msg_sys.msg_control;
				1848	msg_sys.msg_flags = 0;
				1849	if (MSG_CMSG_COMPAT & flags)
				1850	msg_sys.msg_flags = MSG_CMSG_COMPAT;
				1851
				1852	if (sock->file->f_flags & O_NONBLOCK)
				1853	flags \|= MSG_DONTWAIT;
				1854	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
				1855	if (err < 0)
				1856	goto out_freeiov;
				1857	len = err;
				1858
				1859	if (uaddr != NULL) {
				1860	err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
				1861	if (err < 0)
				1862	goto out_freeiov;
				1863	}
				1864	err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg));
				1865	if (err)
				1866	goto out_freeiov;
				1867	if (MSG_CMSG_COMPAT & flags)
				1868	err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
				1869	&msg_compat->msg_controllen);
				1870	else
				1871	err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
				1872	&msg->msg_controllen);
				1873	if (err)
				1874	goto out_freeiov;
				1875	err = len;
				1876
				1877	out_freeiov:
				1878	if (iov != iovstack)
				1879	sock_kfree_s(sock->sk, iov, iov_size);
				1880	out_put:
				1881	sockfd_put(sock);
				1882	out:
				1883	return err;
				1884	}
				1885
				1886	#ifdef __ARCH_WANT_SYS_SOCKETCALL
				1887
				1888	/* Argument list sizes for sys_socketcall */
				1889	#define AL(x) ((x) * sizeof(unsigned long))
				1890	static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
				1891	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
				1892	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
				1893	#undef AL
				1894
				1895	/*
				1896	* System call vectors.
				1897	*
				1898	* Argument checking cleaned up. Saved 20% in size.
				1899	* This function doesn't need to set the kernel lock because
				1900	* it is set by the callees.
				1901	*/
				1902
				1903	asmlinkage long sys_socketcall(int call, unsigned long __user *args)
				1904	{
				1905	unsigned long a[6];
				1906	unsigned long a0,a1;
				1907	int err;
				1908
				1909	if(call<1\|\|call>SYS_RECVMSG)
				1910	return -EINVAL;
				1911
				1912	/* copy_from_user should be SMP safe. */
				1913	if (copy_from_user(a, args, nargs[call]))
				1914	return -EFAULT;
David Woodhouse	3ec3b2f	2005-05-17 12:08:48 +0100	[diff] [blame]	1915
David Woodhouse	4bcff1b	2005-06-02 12:13:21 +0100	[diff] [blame]	1916	err = audit_socketcall(nargs[call]/sizeof(unsigned long), a);
David Woodhouse	3ec3b2f	2005-05-17 12:08:48 +0100	[diff] [blame]	1917	if (err)
				1918	return err;
				1919
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1920	a0=a[0];
				1921	a1=a[1];
				1922
				1923	switch(call)
				1924	{
				1925	case SYS_SOCKET:
				1926	err = sys_socket(a0,a1,a[2]);
				1927	break;
				1928	case SYS_BIND:
				1929	err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
				1930	break;
				1931	case SYS_CONNECT:
				1932	err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
				1933	break;
				1934	case SYS_LISTEN:
				1935	err = sys_listen(a0,a1);
				1936	break;
				1937	case SYS_ACCEPT:
				1938	err = sys_accept(a0,(struct sockaddr __user )a1, (int __user )a[2]);
				1939	break;
				1940	case SYS_GETSOCKNAME:
				1941	err = sys_getsockname(a0,(struct sockaddr __user )a1, (int __user )a[2]);
				1942	break;
				1943	case SYS_GETPEERNAME:
				1944	err = sys_getpeername(a0, (struct sockaddr __user )a1, (int __user )a[2]);
				1945	break;
				1946	case SYS_SOCKETPAIR:
				1947	err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
				1948	break;
				1949	case SYS_SEND:
				1950	err = sys_send(a0, (void __user *)a1, a[2], a[3]);
				1951	break;
				1952	case SYS_SENDTO:
				1953	err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
				1954	(struct sockaddr __user *)a[4], a[5]);
				1955	break;
				1956	case SYS_RECV:
				1957	err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
				1958	break;
				1959	case SYS_RECVFROM:
				1960	err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
				1961	(struct sockaddr __user )a[4], (int __user )a[5]);
				1962	break;
				1963	case SYS_SHUTDOWN:
				1964	err = sys_shutdown(a0,a1);
				1965	break;
				1966	case SYS_SETSOCKOPT:
				1967	err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
				1968	break;
				1969	case SYS_GETSOCKOPT:
				1970	err = sys_getsockopt(a0, a1, a[2], (char __user )a[3], (int __user )a[4]);
				1971	break;
				1972	case SYS_SENDMSG:
				1973	err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
				1974	break;
				1975	case SYS_RECVMSG:
				1976	err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
				1977	break;
				1978	default:
				1979	err = -EINVAL;
				1980	break;
				1981	}
				1982	return err;
				1983	}
				1984
				1985	#endif /* __ARCH_WANT_SYS_SOCKETCALL */
				1986
				1987	/*
				1988	* This function is called by a protocol handler that wants to
				1989	* advertise its address family, and have it linked into the
				1990	* SOCKET module.
				1991	*/
				1992
				1993	int sock_register(struct net_proto_family *ops)
				1994	{
				1995	int err;
				1996
				1997	if (ops->family >= NPROTO) {
				1998	printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
				1999	return -ENOBUFS;
				2000	}
				2001	net_family_write_lock();
				2002	err = -EEXIST;
				2003	if (net_families[ops->family] == NULL) {
				2004	net_families[ops->family]=ops;
				2005	err = 0;
				2006	}
				2007	net_family_write_unlock();
				2008	printk(KERN_INFO "NET: Registered protocol family %d\n",
				2009	ops->family);
				2010	return err;
				2011	}
				2012
				2013	/*
				2014	* This function is called by a protocol handler that wants to
				2015	* remove its address family, and have it unlinked from the
				2016	* SOCKET module.
				2017	*/
				2018
				2019	int sock_unregister(int family)
				2020	{
				2021	if (family < 0 \|\| family >= NPROTO)
				2022	return -1;
				2023
				2024	net_family_write_lock();
				2025	net_families[family]=NULL;
				2026	net_family_write_unlock();
				2027	printk(KERN_INFO "NET: Unregistered protocol family %d\n",
				2028	family);
				2029	return 0;
				2030	}
				2031
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2032	void __init sock_init(void)
				2033	{
				2034	/*
				2035	* Initialize sock SLAB cache.
				2036	*/
				2037
				2038	sk_init();
				2039
				2040	#ifdef SLAB_SKB
				2041	/*
				2042	* Initialize skbuff SLAB cache
				2043	*/
				2044	skb_init();
				2045	#endif
				2046
				2047	/*
				2048	* Initialize the protocols module.
				2049	*/
				2050
				2051	init_inodecache();
				2052	register_filesystem(&sock_fs_type);
				2053	sock_mnt = kern_mount(&sock_fs_type);
				2054	/* The real protocol initialization is performed when
				2055	* do_initcalls is run.
				2056	*/
				2057
				2058	#ifdef CONFIG_NETFILTER
				2059	netfilter_init();
				2060	#endif
				2061	}
				2062
				2063	#ifdef CONFIG_PROC_FS
				2064	void socket_seq_show(struct seq_file *seq)
				2065	{
				2066	int cpu;
				2067	int counter = 0;
				2068
				2069	for (cpu = 0; cpu < NR_CPUS; cpu++)
				2070	counter += per_cpu(sockets_in_use, cpu);
				2071
				2072	/* It can be negative, by the way. 8) */
				2073	if (counter < 0)
				2074	counter = 0;
				2075
				2076	seq_printf(seq, "sockets: used %d\n", counter);
				2077	}
				2078	#endif /* CONFIG_PROC_FS */
				2079
				2080	/* ABI emulation layers need these two */
				2081	EXPORT_SYMBOL(move_addr_to_kernel);
				2082	EXPORT_SYMBOL(move_addr_to_user);
				2083	EXPORT_SYMBOL(sock_create);
				2084	EXPORT_SYMBOL(sock_create_kern);
				2085	EXPORT_SYMBOL(sock_create_lite);
				2086	EXPORT_SYMBOL(sock_map_fd);
				2087	EXPORT_SYMBOL(sock_recvmsg);
				2088	EXPORT_SYMBOL(sock_register);
				2089	EXPORT_SYMBOL(sock_release);
				2090	EXPORT_SYMBOL(sock_sendmsg);
				2091	EXPORT_SYMBOL(sock_unregister);
				2092	EXPORT_SYMBOL(sock_wake_async);
				2093	EXPORT_SYMBOL(sockfd_lookup);
				2094	EXPORT_SYMBOL(kernel_sendmsg);
				2095	EXPORT_SYMBOL(kernel_recvmsg);