PageRenderTime 252ms CodeModel.GetById 158ms app.highlight 81ms RepoModel.GetById 0ms app.codeStats 1ms

/linux-2.6.21.x/net/socket.c

https://bitbucket.org/altlc/wive-rtnl-ralink-rt305x-routers-firmware-amod
C | 2310 lines | 1571 code | 338 blank | 401 comment | 235 complexity | 12fbaea5fca7b66219c4d7e1adbb5627 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 * NET		An implementation of the SOCKET network access protocol.
   3 *
   4 * Version:	@(#)socket.c	1.1.93	18/02/95
   5 *
   6 * Authors:	Orest Zborowski, <obz@Kodak.COM>
   7 *		Ross Biro
   8 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9 *
  10 * Fixes:
  11 *		Anonymous	:	NOTSOCK/BADF cleanup. Error fix in
  12 *					shutdown()
  13 *		Alan Cox	:	verify_area() fixes
  14 *		Alan Cox	:	Removed DDI
  15 *		Jonathan Kamens	:	SOCK_DGRAM reconnect bug
  16 *		Alan Cox	:	Moved a load of checks to the very
  17 *					top level.
  18 *		Alan Cox	:	Move address structures to/from user
  19 *					mode above the protocol layers.
  20 *		Rob Janssen	:	Allow 0 length sends.
  21 *		Alan Cox	:	Asynchronous I/O support (cribbed from the
  22 *					tty drivers).
  23 *		Niibe Yutaka	:	Asynchronous I/O for writes (4.4BSD style)
  24 *		Jeff Uphoff	:	Made max number of sockets command-line
  25 *					configurable.
  26 *		Matti Aarnio	:	Made the number of sockets dynamic,
  27 *					to be allocated when needed, and mr.
  28 *					Uphoff's max is used as max to be
  29 *					allowed to allocate.
  30 *		Linus		:	Argh. removed all the socket allocation
  31 *					altogether: it's in the inode now.
  32 *		Alan Cox	:	Made sock_alloc()/sock_release() public
  33 *					for NetROM and future kernel nfsd type
  34 *					stuff.
  35 *		Alan Cox	:	sendmsg/recvmsg basics.
  36 *		Tom Dyas	:	Export net symbols.
  37 *		Marcin Dalecki	:	Fixed problems with CONFIG_NET="n".
  38 *		Alan Cox	:	Added thread locking to sys_* calls
  39 *					for sockets. May have errors at the
  40 *					moment.
  41 *		Kevin Buhr	:	Fixed the dumb errors in the above.
  42 *		Andi Kleen	:	Some small cleanups, optimizations,
  43 *					and fixed a copy_from_user() bug.
  44 *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
  45 *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
  46 *					protocol-independent
  47 *
  48 *
  49 *		This program is free software; you can redistribute it and/or
  50 *		modify it under the terms of the GNU General Public License
  51 *		as published by the Free Software Foundation; either version
  52 *		2 of the License, or (at your option) any later version.
  53 *
  54 *
  55 *	This module is effectively the top level interface to the BSD socket
  56 *	paradigm.
  57 *
  58 *	Based upon Swansea University Computer Society NET3.039
  59 */
  60
  61#include <linux/mm.h>
  62#include <linux/socket.h>
  63#include <linux/file.h>
  64#include <linux/net.h>
  65#include <linux/interrupt.h>
  66#include <linux/rcupdate.h>
  67#include <linux/netdevice.h>
  68#include <linux/proc_fs.h>
  69#include <linux/seq_file.h>
  70#include <linux/mutex.h>
  71#include <linux/wanrouter.h>
  72#include <linux/if_bridge.h>
  73#include <linux/if_frad.h>
  74#include <linux/if_vlan.h>
  75#include <linux/init.h>
  76#include <linux/poll.h>
  77#include <linux/cache.h>
  78#include <linux/module.h>
  79#include <linux/highmem.h>
  80#include <linux/mount.h>
  81#include <linux/security.h>
  82#include <linux/syscalls.h>
  83#include <linux/compat.h>
  84#include <linux/kmod.h>
  85#include <linux/audit.h>
  86#include <linux/wireless.h>
  87
  88#include <asm/uaccess.h>
  89#include <asm/unistd.h>
  90
  91#include <net/compat.h>
  92
  93#include <net/sock.h>
  94#include <linux/netfilter.h>
  95
  96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
  97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
  98			 unsigned long nr_segs, loff_t pos);
  99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 100			  unsigned long nr_segs, loff_t pos);
 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
 102
 103static int sock_close(struct inode *inode, struct file *file);
 104static unsigned int sock_poll(struct file *file,
 105			      struct poll_table_struct *wait);
 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 107#ifdef CONFIG_COMPAT
 108static long compat_sock_ioctl(struct file *file,
 109			      unsigned int cmd, unsigned long arg);
 110#endif
 111static int sock_fasync(int fd, struct file *filp, int on);
 112static ssize_t sock_sendpage(struct file *file, struct page *page,
 113			     int offset, size_t size, loff_t *ppos, int more);
 114
 115/*
 116 *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 117 *	in the operation structures but are done directly via the socketcall() multiplexor.
 118 */
 119
 120static const struct file_operations socket_file_ops = {
 121	.owner =	THIS_MODULE,
 122	.llseek =	no_llseek,
 123	.aio_read =	sock_aio_read,
 124	.aio_write =	sock_aio_write,
 125	.poll =		sock_poll,
 126	.unlocked_ioctl = sock_ioctl,
 127#ifdef CONFIG_COMPAT
 128	.compat_ioctl = compat_sock_ioctl,
 129#endif
 130	.mmap =		sock_mmap,
 131	.open =		sock_no_open,	/* special open code to disallow open via /proc */
 132	.release =	sock_close,
 133	.fasync =	sock_fasync,
 134	.sendpage =	sock_sendpage,
 135	.splice_write = generic_splice_sendpage,
 136};
 137
 138/*
 139 *	The protocol list. Each protocol is registered in here.
 140 */
 141
 142static DEFINE_SPINLOCK(net_family_lock);
 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
 144
 145/*
 146 *	Statistics counters of the socket lists
 147 */
 148
 149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
 150
 151/*
 152 * Support routines.
 153 * Move socket addresses back and forth across the kernel/user
 154 * divide and look after the messy bits.
 155 */
 156
 157#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
 158					   16 for IP, 16 for IPX,
 159					   24 for IPv6,
 160					   about 80 for AX.25
 161					   must be at least one bigger than
 162					   the AF_UNIX size (see net/unix/af_unix.c
 163					   :unix_mkname()).
 164					 */
 165
 166/**
 167 *	move_addr_to_kernel	-	copy a socket address into kernel space
 168 *	@uaddr: Address in user space
 169 *	@kaddr: Address in kernel space
 170 *	@ulen: Length in user space
 171 *
 172 *	The address is copied into kernel space. If the provided address is
 173 *	too long an error code of -EINVAL is returned. If the copy gives
 174 *	invalid addresses -EFAULT is returned. On a success 0 is returned.
 175 */
 176
 177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
 178{
 179	if (ulen < 0 || ulen > MAX_SOCK_ADDR)
 180		return -EINVAL;
 181	if (ulen == 0)
 182		return 0;
 183	if (copy_from_user(kaddr, uaddr, ulen))
 184		return -EFAULT;
 185	return audit_sockaddr(ulen, kaddr);
 186}
 187
 188/**
 189 *	move_addr_to_user	-	copy an address to user space
 190 *	@kaddr: kernel space address
 191 *	@klen: length of address in kernel
 192 *	@uaddr: user space address
 193 *	@ulen: pointer to user length field
 194 *
 195 *	The value pointed to by ulen on entry is the buffer length available.
 196 *	This is overwritten with the buffer space used. -EINVAL is returned
 197 *	if an overlong buffer is specified or a negative buffer size. -EFAULT
 198 *	is returned if either the buffer or the length field are not
 199 *	accessible.
 200 *	After copying the data up to the limit the user specifies, the true
 201 *	length of the data is written over the length limit the user
 202 *	specified. Zero is returned for a success.
 203 */
 204
 205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
 206		      int __user *ulen)
 207{
 208	int err;
 209	int len;
 210
 211	err = get_user(len, ulen);
 212	if (err)
 213		return err;
 214	if (len > klen)
 215		len = klen;
 216	if (len < 0 || len > MAX_SOCK_ADDR)
 217		return -EINVAL;
 218	if (len) {
 219		if (audit_sockaddr(klen, kaddr))
 220			return -ENOMEM;
 221		if (copy_to_user(uaddr, kaddr, len))
 222			return -EFAULT;
 223	}
 224	/*
 225	 *      "fromlen shall refer to the value before truncation.."
 226	 *                      1003.1g
 227	 */
 228	return __put_user(klen, ulen);
 229}
 230
 231#define SOCKFS_MAGIC 0x534F434B
 232
 233static struct kmem_cache *sock_inode_cachep __read_mostly;
 234
 235static struct inode *sock_alloc_inode(struct super_block *sb)
 236{
 237	struct socket_alloc *ei;
 238
 239	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 240	if (!ei)
 241		return NULL;
 242	init_waitqueue_head(&ei->socket.wait);
 243
 244	ei->socket.fasync_list = NULL;
 245	ei->socket.state = SS_UNCONNECTED;
 246	ei->socket.flags = 0;
 247	ei->socket.ops = NULL;
 248	ei->socket.sk = NULL;
 249	ei->socket.file = NULL;
 250
 251	return &ei->vfs_inode;
 252}
 253
 254static void sock_destroy_inode(struct inode *inode)
 255{
 256	kmem_cache_free(sock_inode_cachep,
 257			container_of(inode, struct socket_alloc, vfs_inode));
 258}
 259
 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 261{
 262	struct socket_alloc *ei = (struct socket_alloc *)foo;
 263
 264	if (flags & SLAB_CTOR_CONSTRUCTOR)
 265		inode_init_once(&ei->vfs_inode);
 266}
 267
 268static int init_inodecache(void)
 269{
 270	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
 271					      sizeof(struct socket_alloc),
 272					      0,
 273					      (SLAB_HWCACHE_ALIGN |
 274					       SLAB_RECLAIM_ACCOUNT |
 275					       SLAB_MEM_SPREAD),
 276					      init_once,
 277					      NULL);
 278	if (sock_inode_cachep == NULL)
 279		return -ENOMEM;
 280	return 0;
 281}
 282
 283static struct super_operations sockfs_ops = {
 284	.alloc_inode =	sock_alloc_inode,
 285	.destroy_inode =sock_destroy_inode,
 286	.statfs =	simple_statfs,
 287};
 288
 289static int sockfs_get_sb(struct file_system_type *fs_type,
 290			 int flags, const char *dev_name, void *data,
 291			 struct vfsmount *mnt)
 292{
 293	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
 294			     mnt);
 295}
 296
 297static struct vfsmount *sock_mnt __read_mostly;
 298
 299static struct file_system_type sock_fs_type = {
 300	.name =		"sockfs",
 301	.get_sb =	sockfs_get_sb,
 302	.kill_sb =	kill_anon_super,
 303};
 304
 305static int sockfs_delete_dentry(struct dentry *dentry)
 306{
 307	/*
 308	 * At creation time, we pretended this dentry was hashed
 309	 * (by clearing DCACHE_UNHASHED bit in d_flags)
 310	 * At delete time, we restore the truth : not hashed.
 311	 * (so that dput() can proceed correctly)
 312	 */
 313	dentry->d_flags |= DCACHE_UNHASHED;
 314	return 0;
 315}
 316static struct dentry_operations sockfs_dentry_operations = {
 317	.d_delete = sockfs_delete_dentry,
 318};
 319
 320/*
 321 *	Obtains the first available file descriptor and sets it up for use.
 322 *
 323 *	These functions create file structures and maps them to fd space
 324 *	of the current process. On success it returns file descriptor
 325 *	and file struct implicitly stored in sock->file.
 326 *	Note that another thread may close file descriptor before we return
 327 *	from this function. We use the fact that now we do not refer
 328 *	to socket after mapping. If one day we will need it, this
 329 *	function will increment ref. count on file by 1.
 330 *
 331 *	In any case returned fd MAY BE not valid!
 332 *	This race condition is unavoidable
 333 *	with shared fd spaces, we cannot solve it inside kernel,
 334 *	but we take care of internal coherence yet.
 335 */
 336
 337static int sock_alloc_fd(struct file **filep)
 338{
 339	int fd;
 340
 341	fd = get_unused_fd();
 342	if (likely(fd >= 0)) {
 343		struct file *file = get_empty_filp();
 344
 345		*filep = file;
 346		if (unlikely(!file)) {
 347			put_unused_fd(fd);
 348			return -ENFILE;
 349		}
 350	} else
 351		*filep = NULL;
 352	return fd;
 353}
 354
 355static int sock_attach_fd(struct socket *sock, struct file *file)
 356{
 357	struct qstr this;
 358	char name[32];
 359
 360	this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
 361	this.name = (unsigned char *)name;
 362	this.hash = 0;
 363
 364	file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
 365	if (unlikely(!file->f_path.dentry))
 366		return -ENOMEM;
 367
 368	file->f_path.dentry->d_op = &sockfs_dentry_operations;
 369	/*
 370	 * We dont want to push this dentry into global dentry hash table.
 371	 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
 372	 * This permits a working /proc/$pid/fd/XXX on sockets
 373	 */
 374	file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
 375	d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
 376	file->f_path.mnt = mntget(sock_mnt);
 377	file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
 378
 379	sock->file = file;
 380	file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
 381	file->f_mode = FMODE_READ | FMODE_WRITE;
 382	file->f_flags = O_RDWR;
 383	file->f_pos = 0;
 384	file->private_data = sock;
 385
 386	return 0;
 387}
 388
 389int sock_map_fd(struct socket *sock)
 390{
 391	struct file *newfile;
 392	int fd = sock_alloc_fd(&newfile);
 393
 394	if (likely(fd >= 0)) {
 395		int err = sock_attach_fd(sock, newfile);
 396
 397		if (unlikely(err < 0)) {
 398			put_filp(newfile);
 399			put_unused_fd(fd);
 400			return err;
 401		}
 402		fd_install(fd, newfile);
 403	}
 404	return fd;
 405}
 406
 407static struct socket *sock_from_file(struct file *file, int *err)
 408{
 409	if (file->f_op == &socket_file_ops)
 410		return file->private_data;	/* set in sock_map_fd */
 411
 412	*err = -ENOTSOCK;
 413	return NULL;
 414}
 415
 416/**
 417 *	sockfd_lookup	- 	Go from a file number to its socket slot
 418 *	@fd: file handle
 419 *	@err: pointer to an error code return
 420 *
 421 *	The file handle passed in is locked and the socket it is bound
 422 *	too is returned. If an error occurs the err pointer is overwritten
 423 *	with a negative errno code and NULL is returned. The function checks
 424 *	for both invalid handles and passing a handle which is not a socket.
 425 *
 426 *	On a success the socket object pointer is returned.
 427 */
 428
 429struct socket *sockfd_lookup(int fd, int *err)
 430{
 431	struct file *file;
 432	struct socket *sock;
 433
 434	file = fget(fd);
 435	if (!file) {
 436		*err = -EBADF;
 437		return NULL;
 438	}
 439
 440	sock = sock_from_file(file, err);
 441	if (!sock)
 442		fput(file);
 443	return sock;
 444}
 445
 446static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 447{
 448	struct file *file;
 449	struct socket *sock;
 450
 451	*err = -EBADF;
 452	file = fget_light(fd, fput_needed);
 453	if (file) {
 454		sock = sock_from_file(file, err);
 455		if (sock)
 456			return sock;
 457		fput_light(file, *fput_needed);
 458	}
 459	return NULL;
 460}
 461
 462/**
 463 *	sock_alloc	-	allocate a socket
 464 *
 465 *	Allocate a new inode and socket object. The two are bound together
 466 *	and initialised. The socket is then returned. If we are out of inodes
 467 *	NULL is returned.
 468 */
 469
 470static struct socket *sock_alloc(void)
 471{
 472	struct inode *inode;
 473	struct socket *sock;
 474
 475	inode = new_inode(sock_mnt->mnt_sb);
 476	if (!inode)
 477		return NULL;
 478
 479	sock = SOCKET_I(inode);
 480
 481	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 482	inode->i_uid = current->fsuid;
 483	inode->i_gid = current->fsgid;
 484
 485	get_cpu_var(sockets_in_use)++;
 486	put_cpu_var(sockets_in_use);
 487	return sock;
 488}
 489
 490/*
 491 *	In theory you can't get an open on this inode, but /proc provides
 492 *	a back door. Remember to keep it shut otherwise you'll let the
 493 *	creepy crawlies in.
 494 */
 495
 496static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 497{
 498	return -ENXIO;
 499}
 500
 501const struct file_operations bad_sock_fops = {
 502	.owner = THIS_MODULE,
 503	.open = sock_no_open,
 504};
 505
 506/**
 507 *	sock_release	-	close a socket
 508 *	@sock: socket to close
 509 *
 510 *	The socket is released from the protocol stack if it has a release
 511 *	callback, and the inode is then released if the socket is bound to
 512 *	an inode not a file.
 513 */
 514
 515void sock_release(struct socket *sock)
 516{
 517	if (sock->ops) {
 518		struct module *owner = sock->ops->owner;
 519
 520		sock->ops->release(sock);
 521		sock->ops = NULL;
 522		module_put(owner);
 523	}
 524
 525	if (sock->fasync_list)
 526		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 527
 528	get_cpu_var(sockets_in_use)--;
 529	put_cpu_var(sockets_in_use);
 530	if (!sock->file) {
 531		iput(SOCK_INODE(sock));
 532		return;
 533	}
 534	sock->file = NULL;
 535}
 536
 537static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 538				 struct msghdr *msg, size_t size)
 539{
 540	struct sock_iocb *si = kiocb_to_siocb(iocb);
 541	int err;
 542
 543	si->sock = sock;
 544	si->scm = NULL;
 545	si->msg = msg;
 546	si->size = size;
 547
 548	err = security_socket_sendmsg(sock, msg, size);
 549	if (err)
 550		return err;
 551
 552	err = sock->ops->sendmsg(iocb, sock, msg, size);
 553	return err;
 554}
 555
 556int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 557{
 558	struct kiocb iocb;
 559	struct sock_iocb siocb;
 560	int ret;
 561
 562	init_sync_kiocb(&iocb, NULL);
 563	iocb.private = &siocb;
 564	ret = __sock_sendmsg(&iocb, sock, msg, size);
 565	if (-EIOCBQUEUED == ret)
 566		ret = wait_on_sync_kiocb(&iocb);
 567	return ret;
 568}
 569
 570int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 571		   struct kvec *vec, size_t num, size_t size)
 572{
 573	mm_segment_t oldfs = get_fs();
 574	int result;
 575
 576	set_fs(KERNEL_DS);
 577	/*
 578	 * the following is safe, since for compiler definitions of kvec and
 579	 * iovec are identical, yielding the same in-core layout and alignment
 580	 */
 581	msg->msg_iov = (struct iovec *)vec;
 582	msg->msg_iovlen = num;
 583	result = sock_sendmsg(sock, msg, size);
 584	set_fs(oldfs);
 585	return result;
 586}
 587
 588static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 589				 struct msghdr *msg, size_t size, int flags)
 590{
 591	int err;
 592	struct sock_iocb *si = kiocb_to_siocb(iocb);
 593
 594	si->sock = sock;
 595	si->scm = NULL;
 596	si->msg = msg;
 597	si->size = size;
 598	si->flags = flags;
 599
 600	err = security_socket_recvmsg(sock, msg, size, flags);
 601	if (err)
 602		return err;
 603
 604	err = sock->ops->recvmsg(iocb, sock, msg, size, flags);
 605	return err;
 606}
 607
 608int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 609		 size_t size, int flags)
 610{
 611	struct kiocb iocb;
 612	struct sock_iocb siocb;
 613	int ret;
 614
 615	init_sync_kiocb(&iocb, NULL);
 616	iocb.private = &siocb;
 617	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 618	if (-EIOCBQUEUED == ret)
 619		ret = wait_on_sync_kiocb(&iocb);
 620	return ret;
 621}
 622
 623int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 624		   struct kvec *vec, size_t num, size_t size, int flags)
 625{
 626	mm_segment_t oldfs = get_fs();
 627	int result;
 628
 629	set_fs(KERNEL_DS);
 630	/*
 631	 * the following is safe, since for compiler definitions of kvec and
 632	 * iovec are identical, yielding the same in-core layout and alignment
 633	 */
 634	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
 635	result = sock_recvmsg(sock, msg, size, flags);
 636	set_fs(oldfs);
 637	return result;
 638}
 639
 640static void sock_aio_dtor(struct kiocb *iocb)
 641{
 642	kfree(iocb->private);
 643}
 644
 645static ssize_t sock_sendpage(struct file *file, struct page *page,
 646			     int offset, size_t size, loff_t *ppos, int more)
 647{
 648	struct socket *sock;
 649	int flags;
 650
 651	sock = file->private_data;
 652
 653	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 654	if (more)
 655		flags |= MSG_MORE;
 656
 657	return kernel_sendpage(sock, page, offset, size, flags);
 658}
 659
 660static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
 661					 struct sock_iocb *siocb)
 662{
 663	if (!is_sync_kiocb(iocb)) {
 664		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
 665		if (!siocb)
 666			return NULL;
 667		iocb->ki_dtor = sock_aio_dtor;
 668	}
 669
 670	siocb->kiocb = iocb;
 671	iocb->private = siocb;
 672	return siocb;
 673}
 674
 675static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 676		struct file *file, const struct iovec *iov,
 677		unsigned long nr_segs)
 678{
 679	struct socket *sock = file->private_data;
 680	size_t size = 0;
 681	int i;
 682
 683	for (i = 0; i < nr_segs; i++)
 684		size += iov[i].iov_len;
 685
 686	msg->msg_name = NULL;
 687	msg->msg_namelen = 0;
 688	msg->msg_control = NULL;
 689	msg->msg_controllen = 0;
 690	msg->msg_iov = (struct iovec *)iov;
 691	msg->msg_iovlen = nr_segs;
 692	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 693
 694	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
 695}
 696
 697static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 698				unsigned long nr_segs, loff_t pos)
 699{
 700	struct sock_iocb siocb, *x;
 701
 702	if (pos != 0)
 703		return -ESPIPE;
 704
 705	if (iocb->ki_left == 0)	/* Match SYS5 behaviour */
 706		return 0;
 707
 708
 709	x = alloc_sock_iocb(iocb, &siocb);
 710	if (!x)
 711		return -ENOMEM;
 712	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
 713}
 714
 715static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 716			struct file *file, const struct iovec *iov,
 717			unsigned long nr_segs)
 718{
 719	struct socket *sock = file->private_data;
 720	size_t size = 0;
 721	int i;
 722
 723	for (i = 0; i < nr_segs; i++)
 724		size += iov[i].iov_len;
 725
 726	msg->msg_name = NULL;
 727	msg->msg_namelen = 0;
 728	msg->msg_control = NULL;
 729	msg->msg_controllen = 0;
 730	msg->msg_iov = (struct iovec *)iov;
 731	msg->msg_iovlen = nr_segs;
 732	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 733	if (sock->type == SOCK_SEQPACKET)
 734		msg->msg_flags |= MSG_EOR;
 735
 736	return __sock_sendmsg(iocb, sock, msg, size);
 737}
 738
 739static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 740			  unsigned long nr_segs, loff_t pos)
 741{
 742	struct sock_iocb siocb, *x;
 743
 744	if (pos != 0)
 745		return -ESPIPE;
 746
 747	x = alloc_sock_iocb(iocb, &siocb);
 748	if (!x)
 749		return -ENOMEM;
 750
 751	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
 752}
 753
 754/*
 755 * Atomic setting of ioctl hooks to avoid race
 756 * with module unload.
 757 */
 758
 759static DEFINE_MUTEX(br_ioctl_mutex);
 760static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
 761
 762void brioctl_set(int (*hook) (unsigned int, void __user *))
 763{
 764	mutex_lock(&br_ioctl_mutex);
 765	br_ioctl_hook = hook;
 766	mutex_unlock(&br_ioctl_mutex);
 767}
 768
 769EXPORT_SYMBOL(brioctl_set);
 770
 771static DEFINE_MUTEX(vlan_ioctl_mutex);
 772static int (*vlan_ioctl_hook) (void __user *arg);
 773
 774void vlan_ioctl_set(int (*hook) (void __user *))
 775{
 776	mutex_lock(&vlan_ioctl_mutex);
 777	vlan_ioctl_hook = hook;
 778	mutex_unlock(&vlan_ioctl_mutex);
 779}
 780
 781EXPORT_SYMBOL(vlan_ioctl_set);
 782
 783static DEFINE_MUTEX(dlci_ioctl_mutex);
 784static int (*dlci_ioctl_hook) (unsigned int, void __user *);
 785
 786void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 787{
 788	mutex_lock(&dlci_ioctl_mutex);
 789	dlci_ioctl_hook = hook;
 790	mutex_unlock(&dlci_ioctl_mutex);
 791}
 792
 793EXPORT_SYMBOL(dlci_ioctl_set);
 794
 795/*
 796 *	With an ioctl, arg may well be a user mode pointer, but we don't know
 797 *	what to do with it - that's up to the protocol still.
 798 */
 799
 800static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 801{
 802	struct socket *sock;
 803	void __user *argp = (void __user *)arg;
 804	int pid, err;
 805
 806	sock = file->private_data;
 807
 808	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
 809		err = dev_ioctl(cmd, argp);
 810	} else
 811#ifdef CONFIG_WIRELESS_EXT
 812	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
 813		err = dev_ioctl(cmd, argp);
 814	} else
 815#endif				/* CONFIG_WIRELESS_EXT */
 816		switch (cmd) {
 817		case FIOSETOWN:
 818		case SIOCSPGRP:
 819			err = -EFAULT;
 820			if (get_user(pid, (int __user *)argp))
 821				break;
 822			err = f_setown(sock->file, pid, 1);
 823			break;
 824		case FIOGETOWN:
 825		case SIOCGPGRP:
 826			err = put_user(f_getown(sock->file),
 827				       (int __user *)argp);
 828			break;
 829		case SIOCGIFBR:
 830		case SIOCSIFBR:
 831		case SIOCBRADDBR:
 832		case SIOCBRDELBR:
 833			err = -ENOPKG;
 834			if (!br_ioctl_hook)
 835				request_module("bridge");
 836
 837			mutex_lock(&br_ioctl_mutex);
 838			if (br_ioctl_hook)
 839				err = br_ioctl_hook(cmd, argp);
 840			mutex_unlock(&br_ioctl_mutex);
 841			break;
 842		case SIOCGIFVLAN:
 843		case SIOCSIFVLAN:
 844			err = -ENOPKG;
 845			if (!vlan_ioctl_hook)
 846				request_module("8021q");
 847
 848			mutex_lock(&vlan_ioctl_mutex);
 849			if (vlan_ioctl_hook)
 850				err = vlan_ioctl_hook(argp);
 851			mutex_unlock(&vlan_ioctl_mutex);
 852			break;
 853		case SIOCADDDLCI:
 854		case SIOCDELDLCI:
 855			err = -ENOPKG;
 856			if (!dlci_ioctl_hook)
 857				request_module("dlci");
 858
 859			if (dlci_ioctl_hook) {
 860				mutex_lock(&dlci_ioctl_mutex);
 861				err = dlci_ioctl_hook(cmd, argp);
 862				mutex_unlock(&dlci_ioctl_mutex);
 863			}
 864			break;
 865		default:
 866			err = sock->ops->ioctl(sock, cmd, arg);
 867
 868			/*
 869			 * If this ioctl is unknown try to hand it down
 870			 * to the NIC driver.
 871			 */
 872			if (err == -ENOIOCTLCMD)
 873				err = dev_ioctl(cmd, argp);
 874			break;
 875		}
 876	return err;
 877}
 878
 879int sock_create_lite(int family, int type, int protocol, struct socket **res)
 880{
 881	int err;
 882	struct socket *sock = NULL;
 883
 884	err = security_socket_create(family, type, protocol, 1);
 885	if (err)
 886		goto out;
 887
 888	sock = sock_alloc();
 889	if (!sock) {
 890		err = -ENOMEM;
 891		goto out;
 892	}
 893
 894	sock->type = type;
 895	err = security_socket_post_create(sock, family, type, protocol, 1);
 896	if (err)
 897		goto out_release;
 898
 899out:
 900	*res = sock;
 901	return err;
 902out_release:
 903	sock_release(sock);
 904	sock = NULL;
 905	goto out;
 906}
 907
 908/* No kernel lock held - perfect */
 909static unsigned int sock_poll(struct file *file, poll_table *wait)
 910{
 911	struct socket *sock;
 912
 913	/*
 914	 *      We can't return errors to poll, so it's either yes or no.
 915	 */
 916	sock = file->private_data;
 917	return sock->ops->poll(file, sock, wait);
 918}
 919
 920static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 921{
 922	struct socket *sock = file->private_data;
 923
 924	return sock->ops->mmap(file, sock, vma);
 925}
 926
 927static int sock_close(struct inode *inode, struct file *filp)
 928{
 929	/*
 930	 *      It was possible the inode is NULL we were
 931	 *      closing an unfinished socket.
 932	 */
 933
 934	if (!inode) {
 935		printk(KERN_DEBUG "sock_close: NULL inode\n");
 936		return 0;
 937	}
 938	sock_fasync(-1, filp, 0);
 939	sock_release(SOCKET_I(inode));
 940	return 0;
 941}
 942
 943/*
 944 *	Update the socket async list
 945 *
 946 *	Fasync_list locking strategy.
 947 *
 948 *	1. fasync_list is modified only under process context socket lock
 949 *	   i.e. under semaphore.
 950 *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
 951 *	   or under socket lock.
 952 *	3. fasync_list can be used from softirq context, so that
 953 *	   modification under socket lock have to be enhanced with
 954 *	   write_lock_bh(&sk->sk_callback_lock).
 955 *							--ANK (990710)
 956 */
 957
 958static int sock_fasync(int fd, struct file *filp, int on)
 959{
 960	struct fasync_struct *fa, *fna = NULL, **prev;
 961	struct socket *sock;
 962	struct sock *sk;
 963
 964	if (on) {
 965		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 966		if (fna == NULL)
 967			return -ENOMEM;
 968	}
 969
 970	sock = filp->private_data;
 971
 972	sk = sock->sk;
 973	if (sk == NULL) {
 974		kfree(fna);
 975		return -EINVAL;
 976	}
 977
 978	lock_sock(sk);
 979
 980	prev = &(sock->fasync_list);
 981
 982	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
 983		if (fa->fa_file == filp)
 984			break;
 985
 986	if (on) {
 987		if (fa != NULL) {
 988			write_lock_bh(&sk->sk_callback_lock);
 989			fa->fa_fd = fd;
 990			write_unlock_bh(&sk->sk_callback_lock);
 991
 992			kfree(fna);
 993			goto out;
 994		}
 995		fna->fa_file = filp;
 996		fna->fa_fd = fd;
 997		fna->magic = FASYNC_MAGIC;
 998		fna->fa_next = sock->fasync_list;
 999		write_lock_bh(&sk->sk_callback_lock);
1000		sock->fasync_list = fna;
1001		sock_set_flag(sk, SOCK_FASYNC);
1002		write_unlock_bh(&sk->sk_callback_lock);
1003	} else {
1004		if (fa != NULL) {
1005			write_lock_bh(&sk->sk_callback_lock);
1006			*prev = fa->fa_next;
1007			if (!sock->fasync_list)
1008				sock_reset_flag(sk, SOCK_FASYNC);
1009			write_unlock_bh(&sk->sk_callback_lock);
1010			kfree(fa);
1011		}
1012	}
1013
1014out:
1015	release_sock(sock->sk);
1016	return 0;
1017}
1018
1019/* This function may be called only under socket lock or callback_lock */
1020
1021int sock_wake_async(struct socket *sock, int how, int band)
1022{
1023	if (!sock || !sock->fasync_list)
1024		return -1;
1025	switch (how) {
1026	case SOCK_WAKE_WAITD:
1027		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1028			break;
1029		goto call_kill;
1030	case SOCK_WAKE_SPACE:
1031		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1032			break;
1033		/* fall through */
1034	case SOCK_WAKE_IO:
1035call_kill:
1036		__kill_fasync(sock->fasync_list, SIGIO, band);
1037		break;
1038	case SOCK_WAKE_URG:
1039		__kill_fasync(sock->fasync_list, SIGURG, band);
1040	}
1041	return 0;
1042}
1043
1044static int __sock_create(int family, int type, int protocol,
1045			 struct socket **res, int kern)
1046{
1047	int err;
1048	struct socket *sock;
1049	const struct net_proto_family *pf;
1050
1051	/*
1052	 *      Check protocol is in range
1053	 */
1054	if (family < 0 || family >= NPROTO)
1055		return -EAFNOSUPPORT;
1056	if (type < 0 || type >= SOCK_MAX)
1057		return -EINVAL;
1058
1059	/* Compatibility.
1060
1061	   This uglymoron is moved from INET layer to here to avoid
1062	   deadlock in module load.
1063	 */
1064	if (family == PF_INET && type == SOCK_PACKET) {
1065		static int warned;
1066		if (!warned) {
1067			warned = 1;
1068			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1069			       current->comm);
1070		}
1071		family = PF_PACKET;
1072	}
1073
1074	err = security_socket_create(family, type, protocol, kern);
1075	if (err)
1076		return err;
1077
1078	/*
1079	 *	Allocate the socket and allow the family to set things up. if
1080	 *	the protocol is 0, the family is instructed to select an appropriate
1081	 *	default.
1082	 */
1083	sock = sock_alloc();
1084	if (!sock) {
1085		if (net_ratelimit())
1086			printk(KERN_WARNING "socket: no more sockets\n");
1087		return -ENFILE;	/* Not exactly a match, but its the
1088				   closest posix thing */
1089	}
1090
1091	sock->type = type;
1092
1093#if defined(CONFIG_KMOD)
1094	/* Attempt to load a protocol module if the find failed.
1095	 *
1096	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1097	 * requested real, full-featured networking support upon configuration.
1098	 * Otherwise module support will break!
1099	 */
1100	if (net_families[family] == NULL)
1101		request_module("net-pf-%d", family);
1102#endif
1103
1104	rcu_read_lock();
1105	pf = rcu_dereference(net_families[family]);
1106	err = -EAFNOSUPPORT;
1107	if (!pf)
1108		goto out_release;
1109
1110	/*
1111	 * We will call the ->create function, that possibly is in a loadable
1112	 * module, so we have to bump that loadable module refcnt first.
1113	 */
1114	if (!try_module_get(pf->owner))
1115		goto out_release;
1116
1117	/* Now protected by module ref count */
1118	rcu_read_unlock();
1119
1120	err = pf->create(sock, protocol);
1121	if (err < 0)
1122		goto out_module_put;
1123
1124	/*
1125	 * Now to bump the refcnt of the [loadable] module that owns this
1126	 * socket at sock_release time we decrement its refcnt.
1127	 */
1128	if (!try_module_get(sock->ops->owner))
1129		goto out_module_busy;
1130
1131	/*
1132	 * Now that we're done with the ->create function, the [loadable]
1133	 * module can have its refcnt decremented
1134	 */
1135	module_put(pf->owner);
1136	err = security_socket_post_create(sock, family, type, protocol, kern);
1137	if (err)
1138		goto out_release;
1139	*res = sock;
1140
1141	return 0;
1142
1143out_module_busy:
1144	err = -EAFNOSUPPORT;
1145out_module_put:
1146	sock->ops = NULL;
1147	module_put(pf->owner);
1148out_sock_release:
1149	sock_release(sock);
1150	return err;
1151
1152out_release:
1153	rcu_read_unlock();
1154	goto out_sock_release;
1155}
1156
1157int sock_create(int family, int type, int protocol, struct socket **res)
1158{
1159	return __sock_create(family, type, protocol, res, 0);
1160}
1161
1162int sock_create_kern(int family, int type, int protocol, struct socket **res)
1163{
1164	return __sock_create(family, type, protocol, res, 1);
1165}
1166
1167asmlinkage long sys_socket(int family, int type, int protocol)
1168{
1169	int retval;
1170	struct socket *sock;
1171
1172	retval = sock_create(family, type, protocol, &sock);
1173	if (retval < 0)
1174		goto out;
1175
1176	retval = sock_map_fd(sock);
1177	if (retval < 0)
1178		goto out_release;
1179
1180out:
1181	/* It may be already another descriptor 8) Not kernel problem. */
1182	return retval;
1183
1184out_release:
1185	sock_release(sock);
1186	return retval;
1187}
1188
1189/*
1190 *	Create a pair of connected sockets.
1191 */
1192
1193asmlinkage long sys_socketpair(int family, int type, int protocol,
1194			       int __user *usockvec)
1195{
1196	struct socket *sock1, *sock2;
1197	int fd1, fd2, err;
1198	struct file *newfile1, *newfile2;
1199
1200	/*
1201	 * Obtain the first socket and check if the underlying protocol
1202	 * supports the socketpair call.
1203	 */
1204
1205	err = sock_create(family, type, protocol, &sock1);
1206	if (err < 0)
1207		goto out;
1208
1209	err = sock_create(family, type, protocol, &sock2);
1210	if (err < 0)
1211		goto out_release_1;
1212
1213	err = sock1->ops->socketpair(sock1, sock2);
1214	if (err < 0)
1215		goto out_release_both;
1216
1217	fd1 = sock_alloc_fd(&newfile1);
1218	if (unlikely(fd1 < 0)) {
1219		err = fd1;
1220		goto out_release_both;
1221	}
1222
1223	fd2 = sock_alloc_fd(&newfile2);
1224	if (unlikely(fd2 < 0)) {
1225		err = fd2;
1226		put_filp(newfile1);
1227		put_unused_fd(fd1);
1228		goto out_release_both;
1229	}
1230
1231	err = sock_attach_fd(sock1, newfile1);
1232	if (unlikely(err < 0)) {
1233		goto out_fd2;
1234	}
1235
1236	err = sock_attach_fd(sock2, newfile2);
1237	if (unlikely(err < 0)) {
1238		fput(newfile1);
1239		goto out_fd1;
1240	}
1241
1242	err = audit_fd_pair(fd1, fd2);
1243	if (err < 0) {
1244		fput(newfile1);
1245		fput(newfile2);
1246		goto out_fd;
1247	}
1248
1249	fd_install(fd1, newfile1);
1250	fd_install(fd2, newfile2);
1251	/* fd1 and fd2 may be already another descriptors.
1252	 * Not kernel problem.
1253	 */
1254
1255	err = put_user(fd1, &usockvec[0]);
1256	if (!err)
1257		err = put_user(fd2, &usockvec[1]);
1258	if (!err)
1259		return 0;
1260
1261	sys_close(fd2);
1262	sys_close(fd1);
1263	return err;
1264
1265out_release_both:
1266	sock_release(sock2);
1267out_release_1:
1268	sock_release(sock1);
1269out:
1270	return err;
1271
1272out_fd2:
1273	put_filp(newfile1);
1274	sock_release(sock1);
1275out_fd1:
1276	put_filp(newfile2);
1277	sock_release(sock2);
1278out_fd:
1279	put_unused_fd(fd1);
1280	put_unused_fd(fd2);
1281	goto out;
1282}
1283
1284/*
1285 *	Bind a name to a socket. Nothing much to do here since it's
1286 *	the protocol's responsibility to handle the local address.
1287 *
1288 *	We move the socket address to kernel space before we call
1289 *	the protocol layer (having also checked the address is ok).
1290 */
1291
1292asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1293{
1294	struct socket *sock;
1295	char address[MAX_SOCK_ADDR];
1296	int err, fput_needed;
1297
1298	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1299	if(sock) {
1300		err = move_addr_to_kernel(umyaddr, addrlen, address);
1301		if (err >= 0) {
1302			err = security_socket_bind(sock,
1303						   (struct sockaddr *)address,
1304						   addrlen);
1305			if (!err)
1306				err = sock->ops->bind(sock,
1307						      (struct sockaddr *)
1308						      address, addrlen);
1309		}
1310		fput_light(sock->file, fput_needed);
1311	}
1312	return err;
1313}
1314
1315/*
1316 *	Perform a listen. Basically, we allow the protocol to do anything
1317 *	necessary for a listen, and if that works, we mark the socket as
1318 *	ready for listening.
1319 */
1320
1321int sysctl_somaxconn __read_mostly = SOMAXCONN;
1322
1323asmlinkage long sys_listen(int fd, int backlog)
1324{
1325	struct socket *sock;
1326	int err, fput_needed;
1327
1328	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1329	if (sock) {
1330		if ((unsigned)backlog > sysctl_somaxconn)
1331			backlog = sysctl_somaxconn;
1332
1333		err = security_socket_listen(sock, backlog);
1334		if (!err)
1335			err = sock->ops->listen(sock, backlog);
1336
1337		fput_light(sock->file, fput_needed);
1338	}
1339	return err;
1340}
1341
1342/*
1343 *	For accept, we attempt to create a new socket, set up the link
1344 *	with the client, wake up the client, then return the new
1345 *	connected fd. We collect the address of the connector in kernel
1346 *	space and move it to user at the very end. This is unclean because
1347 *	we open the socket then return an error.
1348 *
1349 *	1003.1g adds the ability to recvmsg() to query connection pending
1350 *	status to recvmsg. We need to add that support in a way thats
1351 *	clean when we restucture accept also.
1352 */
1353
1354asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1355			   int __user *upeer_addrlen)
1356{
1357	struct socket *sock, *newsock;
1358	struct file *newfile;
1359	int err, len, newfd, fput_needed;
1360	char address[MAX_SOCK_ADDR];
1361
1362	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1363	if (!sock)
1364		goto out;
1365
1366	err = -ENFILE;
1367	if (!(newsock = sock_alloc()))
1368		goto out_put;
1369
1370	newsock->type = sock->type;
1371	newsock->ops = sock->ops;
1372
1373	/*
1374	 * We don't need try_module_get here, as the listening socket (sock)
1375	 * has the protocol module (sock->ops->owner) held.
1376	 */
1377	__module_get(newsock->ops->owner);
1378
1379	newfd = sock_alloc_fd(&newfile);
1380	if (unlikely(newfd < 0)) {
1381		err = newfd;
1382		sock_release(newsock);
1383		goto out_put;
1384	}
1385
1386	err = sock_attach_fd(newsock, newfile);
1387	if (err < 0)
1388		goto out_fd_simple;
1389
1390	err = security_socket_accept(sock, newsock);
1391	if (err)
1392		goto out_fd;
1393
1394	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1395	if (err < 0)
1396		goto out_fd;
1397
1398	if (upeer_sockaddr) {
1399		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1400					  &len, 2) < 0) {
1401			err = -ECONNABORTED;
1402			goto out_fd;
1403		}
1404		err = move_addr_to_user(address, len, upeer_sockaddr,
1405					upeer_addrlen);
1406		if (err < 0)
1407			goto out_fd;
1408	}
1409
1410	/* File flags are not inherited via accept() unlike another OSes. */
1411
1412	fd_install(newfd, newfile);
1413	err = newfd;
1414
1415out_put:
1416	fput_light(sock->file, fput_needed);
1417out:
1418	return err;
1419out_fd_simple:
1420	sock_release(newsock);
1421	put_filp(newfile);
1422	put_unused_fd(newfd);
1423	goto out_put;
1424out_fd:
1425	fput(newfile);
1426	put_unused_fd(newfd);
1427	goto out_put;
1428}
1429
1430/*
1431 *	Attempt to connect to a socket with the server address.  The address
1432 *	is in user space so we verify it is OK and move it to kernel space.
1433 *
1434 *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1435 *	break bindings
1436 *
1437 *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1438 *	other SEQPACKET protocols that take time to connect() as it doesn't
1439 *	include the -EINPROGRESS status for such sockets.
1440 */
1441
1442asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1443			    int addrlen)
1444{
1445	struct socket *sock;
1446	char address[MAX_SOCK_ADDR];
1447	int err, fput_needed;
1448
1449	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1450	if (!sock)
1451		goto out;
1452	err = move_addr_to_kernel(uservaddr, addrlen, address);
1453	if (err < 0)
1454		goto out_put;
1455
1456	err =
1457	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1458	if (err)
1459		goto out_put;
1460
1461	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1462				 sock->file->f_flags);
1463out_put:
1464	fput_light(sock->file, fput_needed);
1465out:
1466	return err;
1467}
1468
1469/*
1470 *	Get the local address ('name') of a socket object. Move the obtained
1471 *	name to user space.
1472 */
1473
1474asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1475				int __user *usockaddr_len)
1476{
1477	struct socket *sock;
1478	char address[MAX_SOCK_ADDR];
1479	int len, err, fput_needed;
1480
1481	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1482	if (!sock)
1483		goto out;
1484
1485	err = security_socket_getsockname(sock);
1486	if (err)
1487		goto out_put;
1488
1489	err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1490	if (err)
1491		goto out_put;
1492	err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1493
1494out_put:
1495	fput_light(sock->file, fput_needed);
1496out:
1497	return err;
1498}
1499
1500/*
1501 *	Get the remote address ('name') of a socket object. Move the obtained
1502 *	name to user space.
1503 */
1504
1505asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1506				int __user *usockaddr_len)
1507{
1508	struct socket *sock;
1509	char address[MAX_SOCK_ADDR];
1510	int len, err, fput_needed;
1511
1512	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1513	if (sock != NULL) {
1514		err = security_socket_getpeername(sock);
1515		if (err) {
1516			fput_light(sock->file, fput_needed);
1517			return err;
1518		}
1519
1520		err =
1521		    sock->ops->getname(sock, (struct sockaddr *)address, &len,
1522				       1);
1523		if (!err)
1524			err = move_addr_to_user(address, len, usockaddr,
1525						usockaddr_len);
1526		fput_light(sock->file, fput_needed);
1527	}
1528	return err;
1529}
1530
1531/*
1532 *	Send a datagram to a given address. We move the address into kernel
1533 *	space and check the user space data area is readable before invoking
1534 *	the protocol.
1535 */
1536
1537asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1538			   unsigned flags, struct sockaddr __user *addr,
1539			   int addr_len)
1540{
1541	struct socket *sock;
1542	char address[MAX_SOCK_ADDR];
1543	int err;
1544	struct msghdr msg;
1545	struct iovec iov;
1546	int fput_needed;
1547
1548	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1549	if (!sock)
1550		goto out;
1551
1552	iov.iov_base = buff;
1553	iov.iov_len = len;
1554	msg.msg_name = NULL;
1555	msg.msg_iov = &iov;
1556	msg.msg_iovlen = 1;
1557	msg.msg_control = NULL;
1558	msg.msg_controllen = 0;
1559	msg.msg_namelen = 0;
1560	if (addr) {
1561		err = move_addr_to_kernel(addr, addr_len, address);
1562		if (err < 0)
1563			goto out_put;
1564		msg.msg_name = address;
1565		msg.msg_namelen = addr_len;
1566	}
1567	if (sock->file->f_flags & O_NONBLOCK)
1568		flags |= MSG_DONTWAIT;
1569	msg.msg_flags = flags;
1570	err = sock_sendmsg(sock, &msg, len);
1571
1572out_put:
1573	fput_light(sock->file, fput_needed);
1574out:
1575	return err;
1576}
1577
1578/*
1579 *	Send a datagram down a socket.
1580 */
1581
1582asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1583{
1584	return sys_sendto(fd, buff, len, flags, NULL, 0);
1585}
1586
1587/*
1588 *	Receive a frame from the socket and optionally record the address of the
1589 *	sender. We verify the buffers are writable and if needed move the
1590 *	sender address from kernel to user space.
1591 */
1592
1593asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1594			     unsigned flags, struct sockaddr __user *addr,
1595			     int __user *addr_len)
1596{
1597	struct socket *sock;
1598	struct iovec iov;
1599	struct msghdr msg;
1600	char address[MAX_SOCK_ADDR];
1601	int err, err2;
1602	int fput_needed;
1603
1604	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1605	if (!sock)
1606		goto out;
1607
1608	msg.msg_control = NULL;
1609	msg.msg_controllen = 0;
1610	msg.msg_iovlen = 1;
1611	msg.msg_iov = &iov;
1612	iov.iov_len = size;
1613	iov.iov_base = ubuf;
1614	msg.msg_name = address;
1615	msg.msg_namelen = MAX_SOCK_ADDR;
1616	if (sock->file->f_flags & O_NONBLOCK)
1617		flags |= MSG_DONTWAIT;
1618	err = sock_recvmsg(sock, &msg, size, flags);
1619
1620	if (err >= 0 && addr != NULL) {
1621		err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1622		if (err2 < 0)
1623			err = err2;
1624	}
1625
1626	fput_light(sock->file, fput_needed);
1627out:
1628	return err;
1629}
1630
1631/*
1632 *	Receive a datagram from a socket.
1633 */
1634
1635asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1636			 unsigned flags)
1637{
1638	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1639}
1640
1641/*
1642 *	Set a socket option. Because we don't know the option lengths we have
1643 *	to pass the user mode parameter for the protocols to sort out.
1644 */
1645
1646asmlinkage long sys_setsockopt(int fd, int level, int optname,
1647			       char __user *optval, int optlen)
1648{
1649	int err, fput_needed;
1650	struct socket *sock;
1651
1652	if (optlen < 0)
1653		return -EINVAL;
1654
1655	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1656	if (sock != NULL) {
1657		err = security_socket_setsockopt(sock, level, optname);
1658		if (err)
1659			goto out_put;
1660
1661		if (level == SOL_SOCKET)
1662			err =
1663			    sock_setsockopt(sock, level, optname, optval,
1664					    optlen);
1665		else
1666			err =
1667			    sock->ops->setsockopt(sock, level, optname, optval,
1668						  optlen);
1669out_put:
1670		fput_light(sock->file, fput_needed);
1671	}
1672	return err;
1673}
1674
1675/*
1676 *	Get a socket option. Because we don't know the option lengths we have
1677 *	to pass a user mode parameter for the protocols to sort out.
1678 */
1679
1680asmlinkage long sys_getsockopt(int fd, int level, int optname,
1681			       char __user *optval, int __user *optlen)
1682{
1683	int err, fput_needed;
1684	struct socket *sock;
1685
1686	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1687	if (sock != NULL) {
1688		err = security_socket_getsockopt(sock, level, optname);
1689		if (err)
1690			goto out_put;
1691
1692		if (level == SOL_SOCKET)
1693			err =
1694			    sock_getsockopt(sock, level, optname, optval,
1695					    optlen);
1696		else
1697			err =
1698			    sock->ops->getsockopt(sock, level, optname, optval,
1699						  optlen);
1700out_put:
1701		fput_light(sock->file, fput_needed);
1702	}
1703	return err;
1704}
1705
1706/*
1707 *	Shutdown a socket.
1708 */
1709
1710asmlinkage long sys_shutdown(int fd, int how)
1711{
1712	int err, fput_needed;
1713	struct socket *sock;
1714
1715	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1716	if (sock != NULL) {
1717		err = security_socket_shutdown(sock, how);
1718		if (!err)
1719			err = sock->ops->shutdown(sock, how);
1720		fput_light(sock->file, fput_needed);
1721	}
1722	return err;
1723}
1724
1725/* A couple of helpful macros for getting the address of the 32/64 bit
1726 * fields which are the same type (int / unsigned) on our platforms.
1727 */
1728#define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1729#define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
1730#define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
1731
1732/*
1733 *	BSD sendmsg interface
1734 */
1735
1736asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1737{
1738	struct compat_msghdr __user *msg_compat =
1739	    (struct compat_msghdr __user *)msg;
1740	struct socket *sock;
1741	char address[MAX_SOCK_ADDR];
1742	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1743	unsigned char ctl[sizeof(struct cmsghdr) + 20]
1744	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
1745	/* 20 is size of ipv6_pktinfo */
1746	unsigned char *ctl_buf = ctl;
1747	struct msghdr msg_sys;
1748	int err, ctl_len, iov_size, total_len;
1749	int fput_needed;
1750
1751	err = -EFAULT;
1752	if (MSG_CMSG_COMPAT & flags) {
1753		if (get_compat_msghdr(&msg_sys, msg_compat))
1754			return -EFAULT;
1755	}
1756	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1757		return -EFAULT;
1758
1759	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1760	if (!sock)
1761		goto out;
1762
1763	/* do not move before msg_sys is valid */
1764	err = -EMSGSIZE;
1765	if (msg_sys.msg_iovlen > UIO_MAXIOV)
1766		goto out_put;
1767
1768	/* Check whether to allocate the iovec area */
1769	err = -ENOMEM;
1770	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1771	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1772		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1773		if (!iov)
1774			goto out_put;
1775	}
1776
1777	/* This will also move the address data into kernel space */
1778	if (MSG_CMSG_COMPAT & flags) {
1779		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1780	} else
1781		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1782	if (err < 0)
1783		goto out_freeiov;
1784	total_len = err;
1785
1786	err = -ENOBUFS;
1787
1788	if (msg_sys.msg_controllen > INT_MAX)
1789		goto out_freeiov;
1790	ctl_len = msg_sys.msg_controllen;
1791	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1792		err =
1793		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1794						     sizeof(ctl));
1795		if (err)
1796			goto out_freeiov;
1797		ctl_buf = msg_sys.msg_control;
1798		ctl_len = msg_sys.msg_controllen;
1799	} else if (ctl_len) {
1800		if (ctl_len > sizeof(ctl)) {
1801			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1802			if (ctl_buf == NULL)
1803				goto out_freeiov;
1804		}
1805		err = -EFAULT;
1806		/*
1807		 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1808		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1809		 * checking falls down on this.
1810		 */
1811		if (copy_from_user(ctl_buf,
1812				   (void __user __force *)msg_sys.msg_control,
1813				   ctl_len))
1814			goto out_freectl;
1815		msg_sys.msg_control = ctl_buf;
1816	}
1817	msg_sys.msg_flags = flags;
1818
1819	if (sock->file->f_flags & O_NONBLOCK)
1820		msg_sys.msg_flags |= MSG_DONTWAIT;
1821	err = sock_sendmsg(sock, &msg_sys, total_len);
1822
1823out_freectl:
1824	if (ctl_buf != ctl)
1825		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1826out_freeiov:
1827	if (iov != iovstack)
1828		sock_kfree_s(sock->sk, iov, iov_size);
1829out_put:
1830	fput_light(sock->file, fput_needed);
1831out:
1832	return err;
1833}
1834
1835/*
1836 *	BSD recvmsg interface
1837 */
1838
1839asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1840			    unsigned int flags)
1841{
1842	struct compat_msghdr __user *msg_compat =
1843	    (struct compat_msghdr __user *)msg;
1844	struct socket *sock;
1845	struct iovec iovstack[UIO_FASTIOV];
1846	struct iovec *iov = iovstack;
1847	struct msghdr msg_sys;
1848	unsigned long cmsg_ptr;
1849	int err, iov_size, total_len, len;
1850	int fput_needed;
1851
1852	/* kernel mode address */
1853	char addr[MAX_SOCK_ADDR];
1854
1855	/* user mode address pointers */
1856	struct sockaddr __user *uaddr;
1857	int __user *uaddr_len;
1858
1859	if (MSG_CMSG_COMPAT & flags) {
1860		if (get_compat_msghdr(&msg_sys, msg_compat))
1861			return -EFAULT;
1862	}
1863	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1864		return -EFAULT;
1865
1866	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1867	if (!sock)
1868		goto out;
1869
1870	err = -EMSGSIZE;
1871	if (msg_sys.msg_iovlen > UIO_MAXIOV)
1872		goto out_put;
1873
1874	/* Check whether to allocate the iovec area */
1875	err = -ENOMEM;
1876	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1877	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1878		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1879		if (!iov)
1880			goto out_put;
1881	}
1882
1883	/*
1884	 *      Save the user-mode address (verify_iovec will change the
1885	 *      kernel msghdr to use the kernel address space)
1886	 */
1887
1888	uaddr = (__force void __user *)msg_sys.msg_name;
1889	uaddr_len = COMPAT_NAMELEN(msg);
1890	if (MSG_CMSG_COMPAT & flags) {
1891		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1892	} else
1893		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1894	if (err < 0)
1895		goto out_freeiov;
1896	total_len = err;
1897
1898	cmsg_ptr = (unsigned long)msg_sys.msg_control;
1899	msg_sys.msg_flags = 0;
1900	if (MSG_CMSG_COMPAT & flags)
1901		msg_sys.msg_flags = MSG_CMSG_COMPAT;
1902
1903	if (sock->file->f_flags & O_NONBLOCK)
1904		flags |= MSG_DONTWAIT;
1905	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1906	if (err < 0)
1907		goto out_freeiov;
1908	len = err;
1909
1910	if (uaddr != NULL) {
1911		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1912					uaddr_len);
1913		if (err < 0)
1914			goto out_freeiov;
1915	}
1916	err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1917			 COMPAT_FLAGS(msg));
1918	if (err)
1919		goto out_freeiov;
1920	if (MSG_CMSG_COMPAT & flags)
1921		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1922				 &msg_compat->msg_controllen);
1923	else
1924		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1925				 &msg->msg_controllen);
1926	if (err)
1927		goto out_freeiov;
1928	err = len;
1929
1930out_freeiov:
1931	if (iov != iovstack)
1932		sock_kfree_s(sock->sk, iov, iov_size);
1933out_put:
1934	fput_light(sock->file, fput_needed);
1935out:
1936	return err;
1937}
1938
1939#ifdef __ARCH_WANT_SYS_SOCKETCALL
1940
1941/* Argument list sizes for sys_socketcall */
1942#define AL(x) ((x) * sizeof(unsigned long))
1943static const unsigned char nargs[18]={
1944	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1945	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1946	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1947};
1948
1949#undef AL
1950
1951/*
1952 *	System call vectors.
1953 *
1954 *	Argument checking cleaned up. Saved 20% in size.
1955 *  This function doesn't need to set the kernel lock because
1956 *  it is set by the callees.
1957 */
1958
1959asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1960{
1961	unsigned long a[6];
1962	unsigned long a0, a1;
1963	int err;
1964
1965	if (call < 1 || call > SYS_RECVMSG)
1966		return -ENOSYS;
1967
1968	/* copy_from_user should be SMP safe. */
1969	if (copy_from_user(a, args, nargs[call]))
1970		return -EFAULT;
1971
1972	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
1973	if (err)
1974		return err;
1975
1976	a0 = a[0];
1977	a1 = a[1];
1978
1979	switch (call) {
1980	case SYS_SOCKET:
1981		err = sys_socket(a0, a1, a[2]);
1982		break;
1983	case SYS_BIND:
1984		err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
1985		break;
1986	case SYS_CONNECT:
1987		err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
1988		break;
1989	case SYS_LISTEN:
1990		err = sys_listen(a0, a1);
1991		break;
1992	case SYS_ACCEPT:
1993		err =
1994		    sys_accept(a0, (struct sockaddr __user *)a1,
1995			       (int __user *)a[2]);
1996		break;
1997	case SYS_GETSOCKNAME:
1998		err =
1999		    sys_getsockname(a0, (struct sockaddr __user *)a1,
2000				    (int __user *)a[2]);
2001		break;
2002	case SYS_GETPEERNAME:
2003		err =
2004		    sys_getpeername(a0, (struct sockaddr __user *)a1,
2005				    (int __user *)a[2]);
2006		break;
2007	case SYS_SOCKETPAIR:
2008		err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2009		break;
2010	case SYS_SEND:
2011		err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2012		break;
2013	case SYS_SENDTO:
2014		err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2015				 (struct sockaddr __user *)a[4], a[5]);
2016		break;
2017	case SYS_RECV:
2018		err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2019		break;
2020	case SYS_RECVFROM:
2021		err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2022				   (struct sockaddr __user *)a[4],
2023				   (int __user *)a[5]);
2024		break;
2025	case SYS_SHUTDOWN:
2026		err = sys_shutdown(a0, a1);
2027		break;
2028	case SYS_SETSOCKOPT:
2029		err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2030		break;
2031	case SYS_GETSOCKOPT:
2032		err =
2033		    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2034				   (int __user *)a[4]);
2035		break;
2036	case SYS_SENDMSG:
2037		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2038		break;
2039	case SYS_RECVMSG:
2040		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2041		break;
2042	default:
2043		err = -ENOSYS;
2044		break;
2045	}
2046	return err;
2047}
2048
2049#endif				/* __ARCH_WANT_SYS_SOCKETCALL */
2050
2051/**
2052 *	sock_register - add a socket protocol handler
2053 *	@ops: description of protocol
2054 *
2055 *	This function is called by a protocol handler that wants to
2056 *	advertise its address family, and have it linked into the
2057 *	socket interface. The value ops->family coresponds to the
2058 *	socket system call protocol family.
2059 */
2060int sock_register(const struct net_proto_family *ops)
2061{
2062	int err;
2063
2064	if (ops->family >= NPROTO) {
2065		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2066		       NPROTO);
2067		return -ENOBUFS;
2068	}
2069
2070	spin_lock(&net_family_lock);
2071	if (net_families[ops->family])
2072		err = -EEXIST;
2073	else {
2074		net_families[ops->family] = ops;
2075		err = 0;
2076	}
2077	spin_unlock(&net_family_lock);
2078
2079	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
2080	return err;
2081}
2082
2083/**
2084 *	sock_unregister - remove a protocol handler
2085 *	@family: protocol family to remove
2086 *
2087 *	Thiā€¦

Large files files are truncated, but you can click here to view the full file