/net/socket.c
C | 3152 lines | 2260 code | 462 blank | 430 comment | 356 complexity | 02555c78d8719a4950120d217d286e42 MD5 | raw file
Large files files are truncated, but you can click here to view the full file
1/* 2 * NET An implementation of the SOCKET network access protocol. 3 * 4 * Version: @(#)socket.c 1.1.93 18/02/95 5 * 6 * Authors: Orest Zborowski, <obz@Kodak.COM> 7 * Ross Biro 8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 9 * 10 * Fixes: 11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in 12 * shutdown() 13 * Alan Cox : verify_area() fixes 14 * Alan Cox : Removed DDI 15 * Jonathan Kamens : SOCK_DGRAM reconnect bug 16 * Alan Cox : Moved a load of checks to the very 17 * top level. 18 * Alan Cox : Move address structures to/from user 19 * mode above the protocol layers. 20 * Rob Janssen : Allow 0 length sends. 21 * Alan Cox : Asynchronous I/O support (cribbed from the 22 * tty drivers). 23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 24 * Jeff Uphoff : Made max number of sockets command-line 25 * configurable. 26 * Matti Aarnio : Made the number of sockets dynamic, 27 * to be allocated when needed, and mr. 28 * Uphoff's max is used as max to be 29 * allowed to allocate. 30 * Linus : Argh. removed all the socket allocation 31 * altogether: it's in the inode now. 32 * Alan Cox : Made sock_alloc()/sock_release() public 33 * for NetROM and future kernel nfsd type 34 * stuff. 35 * Alan Cox : sendmsg/recvmsg basics. 36 * Tom Dyas : Export net symbols. 37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 38 * Alan Cox : Added thread locking to sys_* calls 39 * for sockets. May have errors at the 40 * moment. 41 * Kevin Buhr : Fixed the dumb errors in the above. 42 * Andi Kleen : Some small cleanups, optimizations, 43 * and fixed a copy_from_user() bug. 44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 45 * Tigran Aivazian : Made listen(2) backlog sanity checks 46 * protocol-independent 47 * 48 * 49 * This program is free software; you can redistribute it and/or 50 * modify it under the terms of the GNU General Public License 51 * as published by the Free Software Foundation; either version 52 * 2 of the License, or (at your option) any later version. 53 * 54 * 55 * This module is effectively the top level interface to the BSD socket 56 * paradigm. 57 * 58 * Based upon Swansea University Computer Society NET3.039 59 */ 60 61#include <linux/mm.h> 62#include <linux/socket.h> 63#include <linux/file.h> 64#include <linux/net.h> 65#include <linux/interrupt.h> 66#include <linux/thread_info.h> 67#include <linux/rcupdate.h> 68#include <linux/netdevice.h> 69#include <linux/proc_fs.h> 70#include <linux/seq_file.h> 71#include <linux/mutex.h> 72#include <linux/wanrouter.h> 73#include <linux/if_bridge.h> 74#include <linux/if_frad.h> 75#include <linux/if_vlan.h> 76#include <linux/init.h> 77#include <linux/poll.h> 78#include <linux/cache.h> 79#include <linux/module.h> 80#include <linux/highmem.h> 81#include <linux/mount.h> 82#include <linux/security.h> 83#include <linux/syscalls.h> 84#include <linux/compat.h> 85#include <linux/kmod.h> 86#include <linux/audit.h> 87#include <linux/wireless.h> 88#include <linux/nsproxy.h> 89#include <linux/magic.h> 90#include <linux/slab.h> 91 92#include <asm/uaccess.h> 93#include <asm/unistd.h> 94 95#include <net/compat.h> 96#include <net/wext.h> 97 98#include <net/sock.h> 99#include <linux/netfilter.h> 100 101#include <linux/if_tun.h> 102#include <linux/ipv6_route.h> 103#include <linux/route.h> 104#include <linux/sockios.h> 105#include <linux/atalk.h> 106 107#ifdef CONFIG_UID_STAT 108#include <linux/uid_stat.h> 109#endif 110 111static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 112static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 113 unsigned long nr_segs, loff_t pos); 114static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 115 unsigned long nr_segs, loff_t pos); 116static int sock_mmap(struct file *file, struct vm_area_struct *vma); 117 118static int sock_close(struct inode *inode, struct file *file); 119static unsigned int sock_poll(struct file *file, 120 struct poll_table_struct *wait); 121static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 122#ifdef CONFIG_COMPAT 123static long compat_sock_ioctl(struct file *file, 124 unsigned int cmd, unsigned long arg); 125#endif 126static int sock_fasync(int fd, struct file *filp, int on); 127static ssize_t sock_sendpage(struct file *file, struct page *page, 128 int offset, size_t size, loff_t *ppos, int more); 129static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 130 struct pipe_inode_info *pipe, size_t len, 131 unsigned int flags); 132 133/* 134 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 135 * in the operation structures but are done directly via the socketcall() multiplexor. 136 */ 137 138static const struct file_operations socket_file_ops = { 139 .owner = THIS_MODULE, 140 .llseek = no_llseek, 141 .aio_read = sock_aio_read, 142 .aio_write = sock_aio_write, 143 .poll = sock_poll, 144 .unlocked_ioctl = sock_ioctl, 145#ifdef CONFIG_COMPAT 146 .compat_ioctl = compat_sock_ioctl, 147#endif 148 .mmap = sock_mmap, 149 .open = sock_no_open, /* special open code to disallow open via /proc */ 150 .release = sock_close, 151 .fasync = sock_fasync, 152 .sendpage = sock_sendpage, 153 .splice_write = generic_splice_sendpage, 154 .splice_read = sock_splice_read, 155}; 156 157/* 158 * The protocol list. Each protocol is registered in here. 159 */ 160 161static DEFINE_SPINLOCK(net_family_lock); 162static const struct net_proto_family *net_families[NPROTO] __read_mostly; 163 164/* 165 * Statistics counters of the socket lists 166 */ 167 168static DEFINE_PER_CPU(int, sockets_in_use) = 0; 169 170/* 171 * Support routines. 172 * Move socket addresses back and forth across the kernel/user 173 * divide and look after the messy bits. 174 */ 175 176#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 177 16 for IP, 16 for IPX, 178 24 for IPv6, 179 about 80 for AX.25 180 must be at least one bigger than 181 the AF_UNIX size (see net/unix/af_unix.c 182 :unix_mkname()). 183 */ 184 185/** 186 * move_addr_to_kernel - copy a socket address into kernel space 187 * @uaddr: Address in user space 188 * @kaddr: Address in kernel space 189 * @ulen: Length in user space 190 * 191 * The address is copied into kernel space. If the provided address is 192 * too long an error code of -EINVAL is returned. If the copy gives 193 * invalid addresses -EFAULT is returned. On a success 0 is returned. 194 */ 195 196int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) 197{ 198 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) 199 return -EINVAL; 200 if (ulen == 0) 201 return 0; 202 if (copy_from_user(kaddr, uaddr, ulen)) 203 return -EFAULT; 204 return audit_sockaddr(ulen, kaddr); 205} 206 207/** 208 * move_addr_to_user - copy an address to user space 209 * @kaddr: kernel space address 210 * @klen: length of address in kernel 211 * @uaddr: user space address 212 * @ulen: pointer to user length field 213 * 214 * The value pointed to by ulen on entry is the buffer length available. 215 * This is overwritten with the buffer space used. -EINVAL is returned 216 * if an overlong buffer is specified or a negative buffer size. -EFAULT 217 * is returned if either the buffer or the length field are not 218 * accessible. 219 * After copying the data up to the limit the user specifies, the true 220 * length of the data is written over the length limit the user 221 * specified. Zero is returned for a success. 222 */ 223 224int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, 225 int __user *ulen) 226{ 227 int err; 228 int len; 229 230 err = get_user(len, ulen); 231 if (err) 232 return err; 233 if (len > klen) 234 len = klen; 235 if (len < 0 || len > sizeof(struct sockaddr_storage)) 236 return -EINVAL; 237 if (len) { 238 if (audit_sockaddr(klen, kaddr)) 239 return -ENOMEM; 240 if (copy_to_user(uaddr, kaddr, len)) 241 return -EFAULT; 242 } 243 /* 244 * "fromlen shall refer to the value before truncation.." 245 * 1003.1g 246 */ 247 return __put_user(klen, ulen); 248} 249 250static struct kmem_cache *sock_inode_cachep __read_mostly; 251 252static struct inode *sock_alloc_inode(struct super_block *sb) 253{ 254 struct socket_alloc *ei; 255 256 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 257 if (!ei) 258 return NULL; 259 init_waitqueue_head(&ei->socket.wait); 260 261 ei->socket.fasync_list = NULL; 262 ei->socket.state = SS_UNCONNECTED; 263 ei->socket.flags = 0; 264 ei->socket.ops = NULL; 265 ei->socket.sk = NULL; 266 ei->socket.file = NULL; 267 268 return &ei->vfs_inode; 269} 270 271static void sock_destroy_inode(struct inode *inode) 272{ 273 kmem_cache_free(sock_inode_cachep, 274 container_of(inode, struct socket_alloc, vfs_inode)); 275} 276 277static void init_once(void *foo) 278{ 279 struct socket_alloc *ei = (struct socket_alloc *)foo; 280 281 inode_init_once(&ei->vfs_inode); 282} 283 284static int init_inodecache(void) 285{ 286 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 287 sizeof(struct socket_alloc), 288 0, 289 (SLAB_HWCACHE_ALIGN | 290 SLAB_RECLAIM_ACCOUNT | 291 SLAB_MEM_SPREAD), 292 init_once); 293 if (sock_inode_cachep == NULL) 294 return -ENOMEM; 295 return 0; 296} 297 298static const struct super_operations sockfs_ops = { 299 .alloc_inode = sock_alloc_inode, 300 .destroy_inode =sock_destroy_inode, 301 .statfs = simple_statfs, 302}; 303 304static int sockfs_get_sb(struct file_system_type *fs_type, 305 int flags, const char *dev_name, void *data, 306 struct vfsmount *mnt) 307{ 308 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 309 mnt); 310} 311 312static struct vfsmount *sock_mnt __read_mostly; 313 314static struct file_system_type sock_fs_type = { 315 .name = "sockfs", 316 .get_sb = sockfs_get_sb, 317 .kill_sb = kill_anon_super, 318}; 319 320/* 321 * sockfs_dname() is called from d_path(). 322 */ 323static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) 324{ 325 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", 326 dentry->d_inode->i_ino); 327} 328 329static const struct dentry_operations sockfs_dentry_operations = { 330 .d_dname = sockfs_dname, 331}; 332 333/* 334 * Obtains the first available file descriptor and sets it up for use. 335 * 336 * These functions create file structures and maps them to fd space 337 * of the current process. On success it returns file descriptor 338 * and file struct implicitly stored in sock->file. 339 * Note that another thread may close file descriptor before we return 340 * from this function. We use the fact that now we do not refer 341 * to socket after mapping. If one day we will need it, this 342 * function will increment ref. count on file by 1. 343 * 344 * In any case returned fd MAY BE not valid! 345 * This race condition is unavoidable 346 * with shared fd spaces, we cannot solve it inside kernel, 347 * but we take care of internal coherence yet. 348 */ 349 350static int sock_alloc_file(struct socket *sock, struct file **f, int flags) 351{ 352 struct qstr name = { .name = "" }; 353 struct path path; 354 struct file *file; 355 int fd; 356 357 fd = get_unused_fd_flags(flags); 358 if (unlikely(fd < 0)) 359 return fd; 360 361 path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); 362 if (unlikely(!path.dentry)) { 363 put_unused_fd(fd); 364 return -ENOMEM; 365 } 366 path.mnt = mntget(sock_mnt); 367 368 path.dentry->d_op = &sockfs_dentry_operations; 369 d_instantiate(path.dentry, SOCK_INODE(sock)); 370 SOCK_INODE(sock)->i_fop = &socket_file_ops; 371 372 file = alloc_file(&path, FMODE_READ | FMODE_WRITE, 373 &socket_file_ops); 374 if (unlikely(!file)) { 375 /* drop dentry, keep inode */ 376 atomic_inc(&path.dentry->d_inode->i_count); 377 path_put(&path); 378 put_unused_fd(fd); 379 return -ENFILE; 380 } 381 382 sock->file = file; 383 file->f_flags = O_RDWR | (flags & O_NONBLOCK); 384 file->f_pos = 0; 385 file->private_data = sock; 386 387 *f = file; 388 return fd; 389} 390 391int sock_map_fd(struct socket *sock, int flags) 392{ 393 struct file *newfile; 394 int fd = sock_alloc_file(sock, &newfile, flags); 395 396 if (likely(fd >= 0)) 397 fd_install(fd, newfile); 398 399 return fd; 400} 401 402static struct socket *sock_from_file(struct file *file, int *err) 403{ 404 if (file->f_op == &socket_file_ops) 405 return file->private_data; /* set in sock_map_fd */ 406 407 *err = -ENOTSOCK; 408 return NULL; 409} 410 411/** 412 * sockfd_lookup - Go from a file number to its socket slot 413 * @fd: file handle 414 * @err: pointer to an error code return 415 * 416 * The file handle passed in is locked and the socket it is bound 417 * too is returned. If an error occurs the err pointer is overwritten 418 * with a negative errno code and NULL is returned. The function checks 419 * for both invalid handles and passing a handle which is not a socket. 420 * 421 * On a success the socket object pointer is returned. 422 */ 423 424struct socket *sockfd_lookup(int fd, int *err) 425{ 426 struct file *file; 427 struct socket *sock; 428 429 file = fget(fd); 430 if (!file) { 431 *err = -EBADF; 432 return NULL; 433 } 434 435 sock = sock_from_file(file, err); 436 if (!sock) 437 fput(file); 438 return sock; 439} 440 441static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 442{ 443 struct file *file; 444 struct socket *sock; 445 446 *err = -EBADF; 447 file = fget_light(fd, fput_needed); 448 if (file) { 449 sock = sock_from_file(file, err); 450 if (sock) 451 return sock; 452 fput_light(file, *fput_needed); 453 } 454 return NULL; 455} 456 457/** 458 * sock_alloc - allocate a socket 459 * 460 * Allocate a new inode and socket object. The two are bound together 461 * and initialised. The socket is then returned. If we are out of inodes 462 * NULL is returned. 463 */ 464 465static struct socket *sock_alloc(void) 466{ 467 struct inode *inode; 468 struct socket *sock; 469 470 inode = new_inode(sock_mnt->mnt_sb); 471 if (!inode) 472 return NULL; 473 474 sock = SOCKET_I(inode); 475 476 kmemcheck_annotate_bitfield(sock, type); 477 inode->i_mode = S_IFSOCK | S_IRWXUGO; 478 inode->i_uid = current_fsuid(); 479 inode->i_gid = current_fsgid(); 480 481 percpu_add(sockets_in_use, 1); 482 return sock; 483} 484 485/* 486 * In theory you can't get an open on this inode, but /proc provides 487 * a back door. Remember to keep it shut otherwise you'll let the 488 * creepy crawlies in. 489 */ 490 491static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 492{ 493 return -ENXIO; 494} 495 496const struct file_operations bad_sock_fops = { 497 .owner = THIS_MODULE, 498 .open = sock_no_open, 499}; 500 501/** 502 * sock_release - close a socket 503 * @sock: socket to close 504 * 505 * The socket is released from the protocol stack if it has a release 506 * callback, and the inode is then released if the socket is bound to 507 * an inode not a file. 508 */ 509 510void sock_release(struct socket *sock) 511{ 512 if (sock->ops) { 513 struct module *owner = sock->ops->owner; 514 515 sock->ops->release(sock); 516 sock->ops = NULL; 517 module_put(owner); 518 } 519 520 if (sock->fasync_list) 521 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 522 523 percpu_sub(sockets_in_use, 1); 524 if (!sock->file) { 525 iput(SOCK_INODE(sock)); 526 return; 527 } 528 sock->file = NULL; 529} 530 531int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 532 union skb_shared_tx *shtx) 533{ 534 shtx->flags = 0; 535 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 536 shtx->hardware = 1; 537 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 538 shtx->software = 1; 539 return 0; 540} 541EXPORT_SYMBOL(sock_tx_timestamp); 542 543static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 544 struct msghdr *msg, size_t size) 545{ 546 struct sock_iocb *si = kiocb_to_siocb(iocb); 547 int err; 548 549 si->sock = sock; 550 si->scm = NULL; 551 si->msg = msg; 552 si->size = size; 553 554 err = security_socket_sendmsg(sock, msg, size); 555 if (err) 556 return err; 557 558 err = sock->ops->sendmsg(iocb, sock, msg, size); 559 return err; 560} 561 562int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 563{ 564 struct kiocb iocb; 565 struct sock_iocb siocb; 566 int ret; 567 568 init_sync_kiocb(&iocb, NULL); 569 iocb.private = &siocb; 570 ret = __sock_sendmsg(&iocb, sock, msg, size); 571 if (-EIOCBQUEUED == ret) 572 ret = wait_on_sync_kiocb(&iocb); 573 return ret; 574} 575 576int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 577 struct kvec *vec, size_t num, size_t size) 578{ 579 mm_segment_t oldfs = get_fs(); 580 int result; 581 582 set_fs(KERNEL_DS); 583 /* 584 * the following is safe, since for compiler definitions of kvec and 585 * iovec are identical, yielding the same in-core layout and alignment 586 */ 587 msg->msg_iov = (struct iovec *)vec; 588 msg->msg_iovlen = num; 589 result = sock_sendmsg(sock, msg, size); 590 set_fs(oldfs); 591 return result; 592} 593 594static int ktime2ts(ktime_t kt, struct timespec *ts) 595{ 596 if (kt.tv64) { 597 *ts = ktime_to_timespec(kt); 598 return 1; 599 } else { 600 return 0; 601 } 602} 603 604/* 605 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 606 */ 607void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 608 struct sk_buff *skb) 609{ 610 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); 611 struct timespec ts[3]; 612 int empty = 1; 613 struct skb_shared_hwtstamps *shhwtstamps = 614 skb_hwtstamps(skb); 615 616 /* Race occurred between timestamp enabling and packet 617 receiving. Fill in the current time for now. */ 618 if (need_software_tstamp && skb->tstamp.tv64 == 0) 619 __net_timestamp(skb); 620 621 if (need_software_tstamp) { 622 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 623 struct timeval tv; 624 skb_get_timestamp(skb, &tv); 625 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, 626 sizeof(tv), &tv); 627 } else { 628 struct timespec ts; 629 skb_get_timestampns(skb, &ts); 630 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, 631 sizeof(ts), &ts); 632 } 633 } 634 635 636 memset(ts, 0, sizeof(ts)); 637 if (skb->tstamp.tv64 && 638 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { 639 skb_get_timestampns(skb, ts + 0); 640 empty = 0; 641 } 642 if (shhwtstamps) { 643 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && 644 ktime2ts(shhwtstamps->syststamp, ts + 1)) 645 empty = 0; 646 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && 647 ktime2ts(shhwtstamps->hwtstamp, ts + 2)) 648 empty = 0; 649 } 650 if (!empty) 651 put_cmsg(msg, SOL_SOCKET, 652 SCM_TIMESTAMPING, sizeof(ts), &ts); 653} 654 655EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 656 657inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 658{ 659 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) 660 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 661 sizeof(__u32), &skb->dropcount); 662} 663 664void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, 665 struct sk_buff *skb) 666{ 667 sock_recv_timestamp(msg, sk, skb); 668 sock_recv_drops(msg, sk, skb); 669} 670EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); 671 672static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, 673 struct msghdr *msg, size_t size, int flags) 674{ 675 int err; 676 struct sock_iocb *si = kiocb_to_siocb(iocb); 677 678 si->sock = sock; 679 si->scm = NULL; 680 si->msg = msg; 681 si->size = size; 682 si->flags = flags; 683 684 err = sock->ops->recvmsg(iocb, sock, msg, size, flags); 685 return err; 686} 687 688static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 689 struct msghdr *msg, size_t size, int flags) 690{ 691 int err = security_socket_recvmsg(sock, msg, size, flags); 692 693 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags); 694} 695 696int sock_recvmsg(struct socket *sock, struct msghdr *msg, 697 size_t size, int flags) 698{ 699 struct kiocb iocb; 700 struct sock_iocb siocb; 701 int ret; 702 703 init_sync_kiocb(&iocb, NULL); 704 iocb.private = &siocb; 705 ret = __sock_recvmsg(&iocb, sock, msg, size, flags); 706 if (-EIOCBQUEUED == ret) 707 ret = wait_on_sync_kiocb(&iocb); 708 return ret; 709} 710 711static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, 712 size_t size, int flags) 713{ 714 struct kiocb iocb; 715 struct sock_iocb siocb; 716 int ret; 717 718 init_sync_kiocb(&iocb, NULL); 719 iocb.private = &siocb; 720 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags); 721 if (-EIOCBQUEUED == ret) 722 ret = wait_on_sync_kiocb(&iocb); 723 return ret; 724} 725 726int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 727 struct kvec *vec, size_t num, size_t size, int flags) 728{ 729 mm_segment_t oldfs = get_fs(); 730 int result; 731 732 set_fs(KERNEL_DS); 733 /* 734 * the following is safe, since for compiler definitions of kvec and 735 * iovec are identical, yielding the same in-core layout and alignment 736 */ 737 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; 738 result = sock_recvmsg(sock, msg, size, flags); 739 set_fs(oldfs); 740 return result; 741} 742 743static void sock_aio_dtor(struct kiocb *iocb) 744{ 745 kfree(iocb->private); 746} 747 748static ssize_t sock_sendpage(struct file *file, struct page *page, 749 int offset, size_t size, loff_t *ppos, int more) 750{ 751 struct socket *sock; 752 int flags; 753 754 sock = file->private_data; 755 756 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; 757 if (more) 758 flags |= MSG_MORE; 759 760 return kernel_sendpage(sock, page, offset, size, flags); 761} 762 763static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 764 struct pipe_inode_info *pipe, size_t len, 765 unsigned int flags) 766{ 767 struct socket *sock = file->private_data; 768 769 if (unlikely(!sock->ops->splice_read)) 770 return -EINVAL; 771 772 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 773} 774 775static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 776 struct sock_iocb *siocb) 777{ 778 if (!is_sync_kiocb(iocb)) { 779 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 780 if (!siocb) 781 return NULL; 782 iocb->ki_dtor = sock_aio_dtor; 783 } 784 785 siocb->kiocb = iocb; 786 iocb->private = siocb; 787 return siocb; 788} 789 790static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 791 struct file *file, const struct iovec *iov, 792 unsigned long nr_segs) 793{ 794 struct socket *sock = file->private_data; 795 size_t size = 0; 796 int i; 797 798 for (i = 0; i < nr_segs; i++) 799 size += iov[i].iov_len; 800 801 msg->msg_name = NULL; 802 msg->msg_namelen = 0; 803 msg->msg_control = NULL; 804 msg->msg_controllen = 0; 805 msg->msg_iov = (struct iovec *)iov; 806 msg->msg_iovlen = nr_segs; 807 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 808 809 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); 810} 811 812static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 813 unsigned long nr_segs, loff_t pos) 814{ 815 struct sock_iocb siocb, *x; 816 817 if (pos != 0) 818 return -ESPIPE; 819 820 if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 821 return 0; 822 823 824 x = alloc_sock_iocb(iocb, &siocb); 825 if (!x) 826 return -ENOMEM; 827 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 828} 829 830static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 831 struct file *file, const struct iovec *iov, 832 unsigned long nr_segs) 833{ 834 struct socket *sock = file->private_data; 835 size_t size = 0; 836 int i; 837 838 for (i = 0; i < nr_segs; i++) 839 size += iov[i].iov_len; 840 841 msg->msg_name = NULL; 842 msg->msg_namelen = 0; 843 msg->msg_control = NULL; 844 msg->msg_controllen = 0; 845 msg->msg_iov = (struct iovec *)iov; 846 msg->msg_iovlen = nr_segs; 847 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 848 if (sock->type == SOCK_SEQPACKET) 849 msg->msg_flags |= MSG_EOR; 850 851 return __sock_sendmsg(iocb, sock, msg, size); 852} 853 854static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 855 unsigned long nr_segs, loff_t pos) 856{ 857 struct sock_iocb siocb, *x; 858 859 if (pos != 0) 860 return -ESPIPE; 861 862 x = alloc_sock_iocb(iocb, &siocb); 863 if (!x) 864 return -ENOMEM; 865 866 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 867} 868 869/* 870 * Atomic setting of ioctl hooks to avoid race 871 * with module unload. 872 */ 873 874static DEFINE_MUTEX(br_ioctl_mutex); 875static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; 876 877void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 878{ 879 mutex_lock(&br_ioctl_mutex); 880 br_ioctl_hook = hook; 881 mutex_unlock(&br_ioctl_mutex); 882} 883 884EXPORT_SYMBOL(brioctl_set); 885 886static DEFINE_MUTEX(vlan_ioctl_mutex); 887static int (*vlan_ioctl_hook) (struct net *, void __user *arg); 888 889void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) 890{ 891 mutex_lock(&vlan_ioctl_mutex); 892 vlan_ioctl_hook = hook; 893 mutex_unlock(&vlan_ioctl_mutex); 894} 895 896EXPORT_SYMBOL(vlan_ioctl_set); 897 898static DEFINE_MUTEX(dlci_ioctl_mutex); 899static int (*dlci_ioctl_hook) (unsigned int, void __user *); 900 901void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) 902{ 903 mutex_lock(&dlci_ioctl_mutex); 904 dlci_ioctl_hook = hook; 905 mutex_unlock(&dlci_ioctl_mutex); 906} 907 908EXPORT_SYMBOL(dlci_ioctl_set); 909 910static long sock_do_ioctl(struct net *net, struct socket *sock, 911 unsigned int cmd, unsigned long arg) 912{ 913 int err; 914 void __user *argp = (void __user *)arg; 915 916 err = sock->ops->ioctl(sock, cmd, arg); 917 918 /* 919 * If this ioctl is unknown try to hand it down 920 * to the NIC driver. 921 */ 922 if (err == -ENOIOCTLCMD) 923 err = dev_ioctl(net, cmd, argp); 924 925 return err; 926} 927 928/* 929 * With an ioctl, arg may well be a user mode pointer, but we don't know 930 * what to do with it - that's up to the protocol still. 931 */ 932 933static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 934{ 935 struct socket *sock; 936 struct sock *sk; 937 void __user *argp = (void __user *)arg; 938 int pid, err; 939 struct net *net; 940 941 sock = file->private_data; 942 sk = sock->sk; 943 net = sock_net(sk); 944 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { 945 err = dev_ioctl(net, cmd, argp); 946 } else 947#ifdef CONFIG_WEXT_CORE 948 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 949 err = dev_ioctl(net, cmd, argp); 950 } else 951#endif 952 switch (cmd) { 953 case FIOSETOWN: 954 case SIOCSPGRP: 955 err = -EFAULT; 956 if (get_user(pid, (int __user *)argp)) 957 break; 958 err = f_setown(sock->file, pid, 1); 959 break; 960 case FIOGETOWN: 961 case SIOCGPGRP: 962 err = put_user(f_getown(sock->file), 963 (int __user *)argp); 964 break; 965 case SIOCGIFBR: 966 case SIOCSIFBR: 967 case SIOCBRADDBR: 968 case SIOCBRDELBR: 969 err = -ENOPKG; 970 if (!br_ioctl_hook) 971 request_module("bridge"); 972 973 mutex_lock(&br_ioctl_mutex); 974 if (br_ioctl_hook) 975 err = br_ioctl_hook(net, cmd, argp); 976 mutex_unlock(&br_ioctl_mutex); 977 break; 978 case SIOCGIFVLAN: 979 case SIOCSIFVLAN: 980 err = -ENOPKG; 981 if (!vlan_ioctl_hook) 982 request_module("8021q"); 983 984 mutex_lock(&vlan_ioctl_mutex); 985 if (vlan_ioctl_hook) 986 err = vlan_ioctl_hook(net, argp); 987 mutex_unlock(&vlan_ioctl_mutex); 988 break; 989 case SIOCADDDLCI: 990 case SIOCDELDLCI: 991 err = -ENOPKG; 992 if (!dlci_ioctl_hook) 993 request_module("dlci"); 994 995 mutex_lock(&dlci_ioctl_mutex); 996 if (dlci_ioctl_hook) 997 err = dlci_ioctl_hook(cmd, argp); 998 mutex_unlock(&dlci_ioctl_mutex); 999 break; 1000 default: 1001 err = sock_do_ioctl(net, sock, cmd, arg); 1002 break; 1003 } 1004 return err; 1005} 1006 1007int sock_create_lite(int family, int type, int protocol, struct socket **res) 1008{ 1009 int err; 1010 struct socket *sock = NULL; 1011 1012 err = security_socket_create(family, type, protocol, 1); 1013 if (err) 1014 goto out; 1015 1016 sock = sock_alloc(); 1017 if (!sock) { 1018 err = -ENOMEM; 1019 goto out; 1020 } 1021 1022 sock->type = type; 1023 err = security_socket_post_create(sock, family, type, protocol, 1); 1024 if (err) 1025 goto out_release; 1026 1027out: 1028 *res = sock; 1029 return err; 1030out_release: 1031 sock_release(sock); 1032 sock = NULL; 1033 goto out; 1034} 1035 1036/* No kernel lock held - perfect */ 1037static unsigned int sock_poll(struct file *file, poll_table *wait) 1038{ 1039 struct socket *sock; 1040 1041 /* 1042 * We can't return errors to poll, so it's either yes or no. 1043 */ 1044 sock = file->private_data; 1045 return sock->ops->poll(file, sock, wait); 1046} 1047 1048static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1049{ 1050 struct socket *sock = file->private_data; 1051 1052 return sock->ops->mmap(file, sock, vma); 1053} 1054 1055static int sock_close(struct inode *inode, struct file *filp) 1056{ 1057 /* 1058 * It was possible the inode is NULL we were 1059 * closing an unfinished socket. 1060 */ 1061 1062 if (!inode) { 1063 printk(KERN_DEBUG "sock_close: NULL inode\n"); 1064 return 0; 1065 } 1066 sock_release(SOCKET_I(inode)); 1067 return 0; 1068} 1069 1070/* 1071 * Update the socket async list 1072 * 1073 * Fasync_list locking strategy. 1074 * 1075 * 1. fasync_list is modified only under process context socket lock 1076 * i.e. under semaphore. 1077 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1078 * or under socket lock. 1079 * 3. fasync_list can be used from softirq context, so that 1080 * modification under socket lock have to be enhanced with 1081 * write_lock_bh(&sk->sk_callback_lock). 1082 * --ANK (990710) 1083 */ 1084 1085static int sock_fasync(int fd, struct file *filp, int on) 1086{ 1087 struct fasync_struct *fa, *fna = NULL, **prev; 1088 struct socket *sock; 1089 struct sock *sk; 1090 1091 if (on) { 1092 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); 1093 if (fna == NULL) 1094 return -ENOMEM; 1095 } 1096 1097 sock = filp->private_data; 1098 1099 sk = sock->sk; 1100 if (sk == NULL) { 1101 kfree(fna); 1102 return -EINVAL; 1103 } 1104 1105 lock_sock(sk); 1106 1107 spin_lock(&filp->f_lock); 1108 if (on) 1109 filp->f_flags |= FASYNC; 1110 else 1111 filp->f_flags &= ~FASYNC; 1112 spin_unlock(&filp->f_lock); 1113 1114 prev = &(sock->fasync_list); 1115 1116 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) 1117 if (fa->fa_file == filp) 1118 break; 1119 1120 if (on) { 1121 if (fa != NULL) { 1122 write_lock_bh(&sk->sk_callback_lock); 1123 fa->fa_fd = fd; 1124 write_unlock_bh(&sk->sk_callback_lock); 1125 1126 kfree(fna); 1127 goto out; 1128 } 1129 fna->fa_file = filp; 1130 fna->fa_fd = fd; 1131 fna->magic = FASYNC_MAGIC; 1132 fna->fa_next = sock->fasync_list; 1133 write_lock_bh(&sk->sk_callback_lock); 1134 sock->fasync_list = fna; 1135 sock_set_flag(sk, SOCK_FASYNC); 1136 write_unlock_bh(&sk->sk_callback_lock); 1137 } else { 1138 if (fa != NULL) { 1139 write_lock_bh(&sk->sk_callback_lock); 1140 *prev = fa->fa_next; 1141 if (!sock->fasync_list) 1142 sock_reset_flag(sk, SOCK_FASYNC); 1143 write_unlock_bh(&sk->sk_callback_lock); 1144 kfree(fa); 1145 } 1146 } 1147 1148out: 1149 release_sock(sock->sk); 1150 return 0; 1151} 1152 1153/* This function may be called only under socket lock or callback_lock */ 1154 1155int sock_wake_async(struct socket *sock, int how, int band) 1156{ 1157 if (!sock || !sock->fasync_list) 1158 return -1; 1159 switch (how) { 1160 case SOCK_WAKE_WAITD: 1161 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1162 break; 1163 goto call_kill; 1164 case SOCK_WAKE_SPACE: 1165 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 1166 break; 1167 /* fall through */ 1168 case SOCK_WAKE_IO: 1169call_kill: 1170 __kill_fasync(sock->fasync_list, SIGIO, band); 1171 break; 1172 case SOCK_WAKE_URG: 1173 __kill_fasync(sock->fasync_list, SIGURG, band); 1174 } 1175 return 0; 1176} 1177 1178static int __sock_create(struct net *net, int family, int type, int protocol, 1179 struct socket **res, int kern) 1180{ 1181 int err; 1182 struct socket *sock; 1183 const struct net_proto_family *pf; 1184 1185 /* 1186 * Check protocol is in range 1187 */ 1188 if (family < 0 || family >= NPROTO) 1189 return -EAFNOSUPPORT; 1190 if (type < 0 || type >= SOCK_MAX) 1191 return -EINVAL; 1192 1193 /* Compatibility. 1194 1195 This uglymoron is moved from INET layer to here to avoid 1196 deadlock in module load. 1197 */ 1198 if (family == PF_INET && type == SOCK_PACKET) { 1199 static int warned; 1200 if (!warned) { 1201 warned = 1; 1202 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", 1203 current->comm); 1204 } 1205 family = PF_PACKET; 1206 } 1207 1208 err = security_socket_create(family, type, protocol, kern); 1209 if (err) 1210 return err; 1211 1212 /* 1213 * Allocate the socket and allow the family to set things up. if 1214 * the protocol is 0, the family is instructed to select an appropriate 1215 * default. 1216 */ 1217 sock = sock_alloc(); 1218 if (!sock) { 1219 if (net_ratelimit()) 1220 printk(KERN_WARNING "socket: no more sockets\n"); 1221 return -ENFILE; /* Not exactly a match, but its the 1222 closest posix thing */ 1223 } 1224 1225 sock->type = type; 1226 1227#ifdef CONFIG_MODULES 1228 /* Attempt to load a protocol module if the find failed. 1229 * 1230 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1231 * requested real, full-featured networking support upon configuration. 1232 * Otherwise module support will break! 1233 */ 1234 if (net_families[family] == NULL) 1235 request_module("net-pf-%d", family); 1236#endif 1237 1238 rcu_read_lock(); 1239 pf = rcu_dereference(net_families[family]); 1240 err = -EAFNOSUPPORT; 1241 if (!pf) 1242 goto out_release; 1243 1244 /* 1245 * We will call the ->create function, that possibly is in a loadable 1246 * module, so we have to bump that loadable module refcnt first. 1247 */ 1248 if (!try_module_get(pf->owner)) 1249 goto out_release; 1250 1251 /* Now protected by module ref count */ 1252 rcu_read_unlock(); 1253 1254 err = pf->create(net, sock, protocol, kern); 1255 if (err < 0) 1256 goto out_module_put; 1257 1258 /* 1259 * Now to bump the refcnt of the [loadable] module that owns this 1260 * socket at sock_release time we decrement its refcnt. 1261 */ 1262 if (!try_module_get(sock->ops->owner)) 1263 goto out_module_busy; 1264 1265 /* 1266 * Now that we're done with the ->create function, the [loadable] 1267 * module can have its refcnt decremented 1268 */ 1269 module_put(pf->owner); 1270 err = security_socket_post_create(sock, family, type, protocol, kern); 1271 if (err) 1272 goto out_sock_release; 1273 *res = sock; 1274 1275 return 0; 1276 1277out_module_busy: 1278 err = -EAFNOSUPPORT; 1279out_module_put: 1280 sock->ops = NULL; 1281 module_put(pf->owner); 1282out_sock_release: 1283 sock_release(sock); 1284 return err; 1285 1286out_release: 1287 rcu_read_unlock(); 1288 goto out_sock_release; 1289} 1290 1291int sock_create(int family, int type, int protocol, struct socket **res) 1292{ 1293 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1294} 1295 1296int sock_create_kern(int family, int type, int protocol, struct socket **res) 1297{ 1298 return __sock_create(&init_net, family, type, protocol, res, 1); 1299} 1300 1301SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1302{ 1303 int retval; 1304 struct socket *sock; 1305 int flags; 1306 1307 /* Check the SOCK_* constants for consistency. */ 1308 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 1309 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); 1310 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 1311 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 1312 1313 flags = type & ~SOCK_TYPE_MASK; 1314 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1315 return -EINVAL; 1316 type &= SOCK_TYPE_MASK; 1317 1318 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1319 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1320 1321 retval = sock_create(family, type, protocol, &sock); 1322 if (retval < 0) 1323 goto out; 1324 1325 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 1326 if (retval < 0) 1327 goto out_release; 1328 1329out: 1330 /* It may be already another descriptor 8) Not kernel problem. */ 1331 return retval; 1332 1333out_release: 1334 sock_release(sock); 1335 return retval; 1336} 1337 1338/* 1339 * Create a pair of connected sockets. 1340 */ 1341 1342SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, 1343 int __user *, usockvec) 1344{ 1345 struct socket *sock1, *sock2; 1346 int fd1, fd2, err; 1347 struct file *newfile1, *newfile2; 1348 int flags; 1349 1350 flags = type & ~SOCK_TYPE_MASK; 1351 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1352 return -EINVAL; 1353 type &= SOCK_TYPE_MASK; 1354 1355 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1356 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1357 1358 /* 1359 * Obtain the first socket and check if the underlying protocol 1360 * supports the socketpair call. 1361 */ 1362 1363 err = sock_create(family, type, protocol, &sock1); 1364 if (err < 0) 1365 goto out; 1366 1367 err = sock_create(family, type, protocol, &sock2); 1368 if (err < 0) 1369 goto out_release_1; 1370 1371 err = sock1->ops->socketpair(sock1, sock2); 1372 if (err < 0) 1373 goto out_release_both; 1374 1375 fd1 = sock_alloc_file(sock1, &newfile1, flags); 1376 if (unlikely(fd1 < 0)) { 1377 err = fd1; 1378 goto out_release_both; 1379 } 1380 1381 fd2 = sock_alloc_file(sock2, &newfile2, flags); 1382 if (unlikely(fd2 < 0)) { 1383 err = fd2; 1384 fput(newfile1); 1385 put_unused_fd(fd1); 1386 sock_release(sock2); 1387 goto out; 1388 } 1389 1390 audit_fd_pair(fd1, fd2); 1391 fd_install(fd1, newfile1); 1392 fd_install(fd2, newfile2); 1393 /* fd1 and fd2 may be already another descriptors. 1394 * Not kernel problem. 1395 */ 1396 1397 err = put_user(fd1, &usockvec[0]); 1398 if (!err) 1399 err = put_user(fd2, &usockvec[1]); 1400 if (!err) 1401 return 0; 1402 1403 sys_close(fd2); 1404 sys_close(fd1); 1405 return err; 1406 1407out_release_both: 1408 sock_release(sock2); 1409out_release_1: 1410 sock_release(sock1); 1411out: 1412 return err; 1413} 1414 1415/* 1416 * Bind a name to a socket. Nothing much to do here since it's 1417 * the protocol's responsibility to handle the local address. 1418 * 1419 * We move the socket address to kernel space before we call 1420 * the protocol layer (having also checked the address is ok). 1421 */ 1422 1423SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) 1424{ 1425 struct socket *sock; 1426 struct sockaddr_storage address; 1427 int err, fput_needed; 1428 1429 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1430 if (sock) { 1431 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); 1432 if (err >= 0) { 1433 err = security_socket_bind(sock, 1434 (struct sockaddr *)&address, 1435 addrlen); 1436 if (!err) 1437 err = sock->ops->bind(sock, 1438 (struct sockaddr *) 1439 &address, addrlen); 1440 } 1441 fput_light(sock->file, fput_needed); 1442 } 1443 return err; 1444} 1445 1446/* 1447 * Perform a listen. Basically, we allow the protocol to do anything 1448 * necessary for a listen, and if that works, we mark the socket as 1449 * ready for listening. 1450 */ 1451 1452SYSCALL_DEFINE2(listen, int, fd, int, backlog) 1453{ 1454 struct socket *sock; 1455 int err, fput_needed; 1456 int somaxconn; 1457 1458 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1459 if (sock) { 1460 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; 1461 if ((unsigned)backlog > somaxconn) 1462 backlog = somaxconn; 1463 1464 err = security_socket_listen(sock, backlog); 1465 if (!err) 1466 err = sock->ops->listen(sock, backlog); 1467 1468 fput_light(sock->file, fput_needed); 1469 } 1470 return err; 1471} 1472 1473/* 1474 * For accept, we attempt to create a new socket, set up the link 1475 * with the client, wake up the client, then return the new 1476 * connected fd. We collect the address of the connector in kernel 1477 * space and move it to user at the very end. This is unclean because 1478 * we open the socket then return an error. 1479 * 1480 * 1003.1g adds the ability to recvmsg() to query connection pending 1481 * status to recvmsg. We need to add that support in a way thats 1482 * clean when we restucture accept also. 1483 */ 1484 1485SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, 1486 int __user *, upeer_addrlen, int, flags) 1487{ 1488 struct socket *sock, *newsock; 1489 struct file *newfile; 1490 int err, len, newfd, fput_needed; 1491 struct sockaddr_storage address; 1492 1493 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1494 return -EINVAL; 1495 1496 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1497 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1498 1499 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1500 if (!sock) 1501 goto out; 1502 1503 err = -ENFILE; 1504 if (!(newsock = sock_alloc())) 1505 goto out_put; 1506 1507 newsock->type = sock->type; 1508 newsock->ops = sock->ops; 1509 1510 /* 1511 * We don't need try_module_get here, as the listening socket (sock) 1512 * has the protocol module (sock->ops->owner) held. 1513 */ 1514 __module_get(newsock->ops->owner); 1515 1516 newfd = sock_alloc_file(newsock, &newfile, flags); 1517 if (unlikely(newfd < 0)) { 1518 err = newfd; 1519 sock_release(newsock); 1520 goto out_put; 1521 } 1522 1523 err = security_socket_accept(sock, newsock); 1524 if (err) 1525 goto out_fd; 1526 1527 err = sock->ops->accept(sock, newsock, sock->file->f_flags); 1528 if (err < 0) 1529 goto out_fd; 1530 1531 if (upeer_sockaddr) { 1532 if (newsock->ops->getname(newsock, (struct sockaddr *)&address, 1533 &len, 2) < 0) { 1534 err = -ECONNABORTED; 1535 goto out_fd; 1536 } 1537 err = move_addr_to_user((struct sockaddr *)&address, 1538 len, upeer_sockaddr, upeer_addrlen); 1539 if (err < 0) 1540 goto out_fd; 1541 } 1542 1543 /* File flags are not inherited via accept() unlike another OSes. */ 1544 1545 fd_install(newfd, newfile); 1546 err = newfd; 1547 1548out_put: 1549 fput_light(sock->file, fput_needed); 1550out: 1551 return err; 1552out_fd: 1553 fput(newfile); 1554 put_unused_fd(newfd); 1555 goto out_put; 1556} 1557 1558SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, 1559 int __user *, upeer_addrlen) 1560{ 1561 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 1562} 1563 1564/* 1565 * Attempt to connect to a socket with the server address. The address 1566 * is in user space so we verify it is OK and move it to kernel space. 1567 * 1568 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 1569 * break bindings 1570 * 1571 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 1572 * other SEQPACKET protocols that take time to connect() as it doesn't 1573 * include the -EINPROGRESS status for such sockets. 1574 */ 1575 1576SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, 1577 int, addrlen) 1578{ 1579 struct socket *sock; 1580 struct sockaddr_storage address; 1581 int err, fput_needed; 1582 1583 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1584 if (!sock) 1585 goto out; 1586 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); 1587 if (err < 0) 1588 goto out_put; 1589 1590 err = 1591 security_socket_connect(sock, (struct sockaddr *)&address, addrlen); 1592 if (err) 1593 goto out_put; 1594 1595 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, 1596 sock->file->f_flags); 1597out_put: 1598 fput_light(sock->file, fput_needed); 1599out: 1600 return err; 1601} 1602 1603/* 1604 * Get the local address ('name') of a socket object. Move the obtained 1605 * name to user space. 1606 */ 1607 1608SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, 1609 int __user *, usockaddr_len) 1610{ 1611 struct socket *sock; 1612 struct sockaddr_storage address; 1613 int len, err, fput_needed; 1614 1615 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1616 if (!sock) 1617 goto out; 1618 1619 err = security_socket_getsockname(sock); 1620 if (err) 1621 goto out_put; 1622 1623 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); 1624 if (err) 1625 goto out_put; 1626 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); 1627 1628out_put: 1629 fput_light(sock->file, fput_needed); 1630out: 1631 return err; 1632} 1633 1634/* 1635 * Get the remote address ('name') of a socket object. Move the obtained 1636 * name to user space. 1637 */ 1638 1639SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, 1640 int __user *, usockaddr_len) 1641{ 1642 struct socket *sock; 1643 struct sockaddr_storage address; 1644 int len, err, fput_needed; 1645 1646 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1647 if (sock != NULL) { 1648 err = security_socket_getpeername(sock); 1649 if (err) { 1650 fput_light(sock->file, fput_needed); 1651 return err; 1652 } 1653 1654 err = 1655 sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1656 1); 1657 if (!err) 1658 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, 1659 usockaddr_len); 1660 fput_light(sock->file, fput_needed); 1661 } 1662 return err; 1663} 1664 1665/* 1666 * Send a datagram to a given address. We move the address into kernel 1667 * space and check the user space data area is readable before invoking 1668 * the protocol. 1669 */ 1670 1671SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, 1672 unsigned, flags, struct sockaddr __user *, addr, 1673 int, addr_len) 1674{ 1675 struct socket *sock; 1676 struct sockaddr_storage address; 1677 int err; 1678 struct msghdr msg; 1679 struct iovec iov; 1680 int fput_needed; 1681 1682 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1683 if (!sock) 1684 goto out; 1685 1686 iov.iov_base = buff; 1687 iov.iov_len = len; 1688 msg.msg_name = NULL; 1689 msg.msg_iov = &iov; 1690 msg.msg_iovlen = 1; 1691 msg.msg_control = NULL; 1692 msg.msg_controllen = 0; 1693 msg.msg_namelen = 0; 1694 if (addr) { 1695 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); 1696 if (err < 0) 1697 goto out_put; 1698 msg.msg_name = (struct sockaddr *)&address; 1699 msg.msg_namelen = addr_len; 1700 } 1701 if (sock->file->f_flags & O_NONBLOCK) 1702 flags |= MSG_DONTWAIT; 1703 msg.msg_flags = flags; 1704 err = sock_sendmsg(sock, &msg, len); 1705 1706out_put: 1707 fput_light(sock->file, fput_needed); 1708out: 1709 return err; 1710} 1711 1712/* 1713 * Send a datagram down a socket. 1714 */ 1715 1716SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, 1717 unsigned, flags) 1718{ 1719 return sys_sendto(fd, buff, len, flags, NULL, 0); 1720} 1721 1722/* 1723 * Receive a frame from the socket and optionally record the address of the 1724 * sender. We verify the buffers are writable and if needed move the 1725 * sender address from kernel to user space. 1726 */ 1727 1728SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, 1729 unsigned, flags, struct sockaddr __user *, addr, 1730 int __user *, addr_len) 1731{ 1732 struct socket *sock; 1733 struct iovec iov; 1734 struct msghdr msg; 1735 struct sockaddr_storage address; 1736 int err, err2; 1737 int fput_needed; 1738 1739 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1740 if (!sock) 1741 goto out; 1742 1743 msg.msg_control = NULL; 1744 msg.msg_controllen = 0; 1745 msg.msg_iovlen = 1; 1746 msg.msg_iov = &iov; 1747 iov.iov_len = size; 1748 iov.iov_base = ubuf; 1749 msg.msg_name = (struct sockaddr *)&address; 1750 msg.msg_namelen = sizeof(address); 1751 if (sock->file->f_flags & O_NONBLOCK) 1752 flags |= MSG_DONTWAIT; 1753 err = sock_recvmsg(sock, &msg, size, flags); 1754 1755 if (err >= 0 && addr != NULL) { 1756 err2 = move_addr_to_user((struct sockaddr *)&address, 1757 msg.msg_namelen, addr, addr_len); 1758 if (err2 < 0) 1759 err = err2; 1760 } 1761 1762 fput_light(sock->file, fput_needed); 1763out: 1764 return err; 1765} 1766 1767/* 1768 * Receive a datagram from a socket. 1769 */ 1770 1771asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, 1772 unsigned flags) 1773{ 1774 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 1775} 1776 1777/* 1778 * Set a socket option. Because we don't know the option lengths we have 1779 * to pass the user mode parameter for the protocols to sort out. 1780 */ 1781 1782SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, 1783 char __user *, optval, int, optlen) 1784{ 1785 int err, fput_needed; 1786 struct socket *sock; 1787 1788 if (optlen < 0) 1789 return -EINVAL; 1790 1791 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1792 if (sock != NULL) { 1793 err = security_socket_setsockopt(sock, level, optname); 1794 if (err) 1795 goto out_put; 1796 1797 if (level == SOL_SOCKET) 1798 err = 1799 sock_setsockopt(sock, level, optname, optval, 1800 optlen); 1801 else 1802 err = 1803 sock->ops->setsockopt(sock, level, optname, optval, 1804 optlen); 1805out_put: 1806 fput_light(sock->file, fput_needed); 1807 } 1808 return err; 1809} 1810 1811/* 1812 * Get a socket option. Because we don't know the option lengths we have 1813 * to pass a user mode parameter for the protocols to sort out. 1814 */ 1815 1816SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, 1817 char __user *, optval, int __user *, optlen) 1818{ 1819 int err, fput_needed; 1820 struct socket *sock; 1821 1822 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1823 if (sock != NULL) { 1824 err = security_socket_getsockopt(sock, level, optname); 1825 if (err) 1826 goto out_put; 1827 1828 if (level == SOL_SOCKET) 1829 err = 1830 sock_getsockopt(sock, level, optname, optval, 1831 optlen); 1832 else 1833 err = 1834 sock->ops->getsockopt(sock, level, optname, optval, 1835 optlen); 1836out_put: 1837 fput_light(sock->file, fput_needed); 1838 } 1839 return err; 1840} 1841 1842/* 1843 * Shutdown a socket. 1844 */ 1845 1846SYSCALL_DEFINE2(shutdown, int, fd, int, how) 1847{ 1848 int err, fput_needed; 1849 struct socket *sock; 1850 1851 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1852 if (sock != NULL) { 1853 err = security_socket_shutdown(sock, how); 1854 if (!err) 1855 err = sock->ops->shutdown(sock, how); 1856 fput_light(sock->file, fput_needed); 1857 } 1858 return err; 1859} 1860 1861/* A couple of helpful macros for getting the address of the 32/64 bit 1862 * fields which are the same type (int / unsigned) on our platforms. 1863 */ 1864#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 1865#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 1866#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 1867 1868/* 1869 * BSD sendmsg interface 1870 */ 1871 1872SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) 1873{ 1874 struct compat_msghdr __user *msg_compat = 1875 (struct compat_msghdr __user *)msg; 1876 struct socket *sock; 1877 struct sockaddr_storage address; 1878 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1879 unsigned char ctl[sizeof(struct cmsghdr) + 20] 1880 __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1881 /* 20 is size of ipv6_pktinfo */ 1882 unsigned char *ctl_buf = ctl; 1883 struct msghdr msg_sys; 1884 int err, ctl_len, iov_size, total_len; 1885 int fput_needed; 1886 1887 err = -EFAULT; 1888 if (MSG_CMSG_COMPAT & flags) { 1889 if (get_compat_msghdr(&msg_sys, msg_compat)) 1890 return -EFAULT; 1891 } 1892 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 1893 return -EFAULT; 1894 1895 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1896 if (!sock) 1897 goto out; 1898 1899 /* do not move before msg_sys is valid */ 1900 err = -EMSGSIZE; 1901 if (msg_sys.msg_iovlen > UIO_MAXIOV) 1902 goto out_put; 1903 1904 /* Check whether to allocate the iovec area */ 1905 err = -ENOMEM; 1906 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 1907 if (msg_sys.msg_iovlen > UIO_FASTIOV) { 1908 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 1909 if (!iov) 1910 goto out_put; 1911 } 1912 1913 /* This will also move the address data into kernel space */ 1914 if (MSG_CMSG_COMPAT & flags) { 1915 err = verify_compat_iovec(&msg_sys, iov, 1916 (struct sockaddr *)&address, 1917 VERIFY_READ); 1918 } else 1919 err = verify_iovec(&msg_sys, iov, 1920 (struct sockaddr *)&address, 1921 VERIFY_READ); 1922 if (err < 0) 1923 goto out_freeiov; 1924 total_len = err; 1925 1926 err = -ENOBUFS; 1927 1928 if (msg_sys.msg_controllen > INT_MAX) 1929 goto out_freeiov; 1930 ctl_len = msg_sys.msg_controllen; 1931 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 1932 err = 1933 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 1934 sizeof(ctl)); 1935 if (err) 1936 goto out_freeiov; 1937 ctl_buf = msg_sys.msg_control; 1938 ctl_len = msg_sys.msg_controllen; 1939 } else if (ctl_len) { 1940 if (ctl_len > sizeof(ctl)) { 1941 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 1942 if (ctl_buf == NULL) 1943 goto out_freeiov; 1944 } 1945 err = -EFAULT; 1946 /* 1947 * Careful! Before this, msg_sys.msg_control contains a user pointer. 1948 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1949 * checking falls down on this. 1950 */ 1951 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1952 ctl_len)) 1953 goto out_freectl; 1954 msg_sys.msg_control = ctl_buf; 1955 } 1956 msg_sys.msg_flags = flags; 1957 1958 if (sock->file->f_flags & O_NONBLOCK) 1959 msg_sys.msg_flags |= MSG_DONTWAIT; 1960 err = sock_sendmsg(sock, &msg_sys, total_len); 1961 1962out_freectl: 1963 if (ctl_buf != ctl) 1964 sock_kfree_s(sock->sk, ctl_buf, ctl_len); 1965out_freeiov: 1966 if (iov != iovstack) 1967 sock_kfree_s(sock->sk, iov, iov_size); 1968out_put: 1969 fput_light(sock->file, fput_needed); 1970out: 1971 return err; 1972} 1973 1974static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, 1975 struct msghdr *msg_sys, unsigned flags, int nosec) 1976{ 1977 struct compat_msghdr __user *msg_compat = 1978 (struct compat_msghdr __user *)msg; 1979 struct iovec iovstack[UIO_FASTIOV]; 1980 struct iovec *iov = iovstack; 1981 unsigned long cmsg_ptr; 1982 int err, iov_size, total_len, len; 1983 1984 /* kernel mode address */ 1985 struct sockaddr_storage addr; 1986 1987 /* user mode address pointers */ 1988 struct sockaddr __user *uaddr; 1989 int __user *uaddr_len; 1990 1991 if (MSG_CMSG_COMPAT & flags) { 1992 if (get_compat_msghdr(msg_sys, msg_compat)) 1993 return -EFAULT; 1994 } 1995 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) 1996 return -EFAULT; 1997 1998 err = -EMSGSIZE; 1999 if (msg_sys->msg_iovlen > UIO_MAXIOV) 2000 goto out; 2001 2002 /* Check whether to allocate the iovec area */ 2003 err = -ENOMEM; 2004 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); 2005 if (msg_sys->msg_iovlen > UIO_FASTIOV) { 2006 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 2007 if (!iov) 2008 goto out; 2009 } 2010 2011 /* 2012 * Save the user-mode address (verify_iovec will change the 2013 * kernel msghdr to use the kernel address space) 2014 */ 2015 2016 uaddr = (__force void __user *)msg_sys->msg_name; 2017 uaddr_len = COMPAT_NAMELEN(msg); 2018 if (MSG_CMSG_COMPAT & flags) { 2019 err = verify_compat_iovec(msg_sys, iov, 2020 (struct sockaddr *)&addr, 2021 VERIFY_WRITE); 2022 } else 2023 err = verify_iovec(msg_sys, iov, 2024 (struct sockaddr *)&addr, 2025 VERIFY_WRITE); 2026 if (err < 0) 2027 goto out_freeiov; 2028 total_len = err; 2029 2030 cmsg_ptr = (unsigned long)msg_sys->msg_control; 2031 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 2032 2033 if (sock->file->f_flags & O_NONBLOCK) 2034 flags |= MSG_DONTWAIT; 2035 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sy…
Large files files are truncated, but you can click here to view the full file